t4_sge.c revision 221464
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 221464 2011-05-04 23:07:30Z np $");
30
31#include "opt_inet.h"
32
33#include <sys/types.h>
34#include <sys/mbuf.h>
35#include <sys/socket.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/taskqueue.h>
40#include <sys/sysctl.h>
41#include <net/bpf.h>
42#include <net/ethernet.h>
43#include <net/if.h>
44#include <net/if_vlan_var.h>
45#include <netinet/in.h>
46#include <netinet/ip.h>
47#include <netinet/tcp.h>
48
49#include "common/common.h"
50#include "common/t4_regs.h"
51#include "common/t4_regs_values.h"
52#include "common/t4_msg.h"
53#include "common/t4fw_interface.h"
54
55struct fl_buf_info {
56	int size;
57	int type;
58	uma_zone_t zone;
59};
60
61/* Filled up by t4_sge_modload */
62static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
63
64#define FL_BUF_SIZE(x)	(fl_buf_info[x].size)
65#define FL_BUF_TYPE(x)	(fl_buf_info[x].type)
66#define FL_BUF_ZONE(x)	(fl_buf_info[x].zone)
67
68enum {
69	FL_PKTSHIFT = 2
70};
71
72#define FL_ALIGN	min(CACHE_LINE_SIZE, 32)
73#if CACHE_LINE_SIZE > 64
74#define SPG_LEN		128
75#else
76#define SPG_LEN		64
77#endif
78
79/* Used to track coalesced tx work request */
80struct txpkts {
81	uint64_t *flitp;	/* ptr to flit where next pkt should start */
82	uint8_t npkt;		/* # of packets in this work request */
83	uint8_t nflits;		/* # of flits used by this work request */
84	uint16_t plen;		/* total payload (sum of all packets) */
85};
86
87/* A packet's SGL.  This + m_pkthdr has all info needed for tx */
88struct sgl {
89	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
90	int nflits;		/* # of flits needed for the SGL */
91	bus_dma_segment_t seg[TX_SGL_SEGS];
92};
93
94static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
95    int, iq_intr_handler_t *, char *);
96static inline void init_fl(struct sge_fl *, int, char *);
97static inline void init_eq(struct sge_eq *, int, char *);
98static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
99    bus_addr_t *, void **);
100static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
101    void *);
102static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
103    int);
104static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
105static int alloc_iq(struct sge_iq *, int);
106static int free_iq(struct sge_iq *);
107static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int);
108static int free_rxq(struct port_info *, struct sge_rxq *);
109static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int);
110static int free_ctrlq(struct adapter *, struct sge_ctrlq *);
111static int alloc_txq(struct port_info *, struct sge_txq *, int);
112static int free_txq(struct port_info *, struct sge_txq *);
113static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
114static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
115static inline void iq_next(struct sge_iq *);
116static inline void ring_fl_db(struct adapter *, struct sge_fl *);
117static void refill_fl(struct adapter *, struct sge_fl *, int, int);
118static int alloc_fl_sdesc(struct sge_fl *);
119static void free_fl_sdesc(struct sge_fl *);
120static int alloc_tx_maps(struct sge_txq *);
121static void free_tx_maps(struct sge_txq *);
122static void set_fl_tag_idx(struct sge_fl *, int);
123
124static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
125static int free_pkt_sgl(struct sge_txq *, struct sgl *);
126static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
127    struct sgl *);
128static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
129    struct mbuf *, struct sgl *);
130static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
131static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
132    struct txpkts *, struct mbuf *, struct sgl *);
133static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
134static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
135static inline void ring_eq_db(struct adapter *, struct sge_eq *);
136static inline int reclaimable(struct sge_eq *);
137static int reclaim_tx_descs(struct sge_txq *, int, int);
138static void write_eqflush_wr(struct sge_eq *);
139static __be64 get_flit(bus_dma_segment_t *, int, int);
140static int handle_sge_egr_update(struct adapter *,
141    const struct cpl_sge_egr_update *);
142
143static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *);
144
145/*
146 * Called on MOD_LOAD and fills up fl_buf_info[].
147 */
148void
149t4_sge_modload(void)
150{
151	int i;
152	int bufsize[FL_BUF_SIZES] = {
153		MCLBYTES,
154#if MJUMPAGESIZE != MCLBYTES
155		MJUMPAGESIZE,
156#endif
157		MJUM9BYTES,
158		MJUM16BYTES
159	};
160
161	for (i = 0; i < FL_BUF_SIZES; i++) {
162		FL_BUF_SIZE(i) = bufsize[i];
163		FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
164		FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
165	}
166}
167
168/**
169 *	t4_sge_init - initialize SGE
170 *	@sc: the adapter
171 *
172 *	Performs SGE initialization needed every time after a chip reset.
173 *	We do not initialize any of the queues here, instead the driver
174 *	top-level must request them individually.
175 */
176void
177t4_sge_init(struct adapter *sc)
178{
179	struct sge *s = &sc->sge;
180	int i;
181
182	t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) |
183			 V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
184			 F_EGRSTATUSPAGESIZE,
185			 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) |
186			 V_PKTSHIFT(FL_PKTSHIFT) |
187			 F_RXPKTCPLMODE |
188			 V_EGRSTATUSPAGESIZE(SPG_LEN == 128));
189	t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE,
190			 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0),
191			 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10));
192
193	for (i = 0; i < FL_BUF_SIZES; i++) {
194		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
195		    FL_BUF_SIZE(i));
196	}
197
198	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
199		     V_THRESHOLD_0(s->counter_val[0]) |
200		     V_THRESHOLD_1(s->counter_val[1]) |
201		     V_THRESHOLD_2(s->counter_val[2]) |
202		     V_THRESHOLD_3(s->counter_val[3]));
203
204	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
205		     V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) |
206		     V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1])));
207	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
208		     V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) |
209		     V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3])));
210	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
211		     V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) |
212		     V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5])));
213}
214
215int
216t4_create_dma_tag(struct adapter *sc)
217{
218	int rc;
219
220	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
221	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
222	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
223	    NULL, &sc->dmat);
224	if (rc != 0) {
225		device_printf(sc->dev,
226		    "failed to create main DMA tag: %d\n", rc);
227	}
228
229	return (rc);
230}
231
232int
233t4_destroy_dma_tag(struct adapter *sc)
234{
235	if (sc->dmat)
236		bus_dma_tag_destroy(sc->dmat);
237
238	return (0);
239}
240
241/*
242 * Allocate and initialize the firmware event queue, control queues, and the
243 * forwarded interrupt queues (if any).  The adapter owns all these queues as
244 * they are not associated with any particular port.
245 *
246 * Returns errno on failure.  Resources allocated up to that point may still be
247 * allocated.  Caller is responsible for cleanup in case this function fails.
248 */
249int
250t4_setup_adapter_queues(struct adapter *sc)
251{
252	int i, rc;
253	struct sge_iq *iq, *fwq;
254	struct sge_ctrlq *ctrlq;
255	iq_intr_handler_t *handler;
256	char name[16];
257
258	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
259
260	if (sysctl_ctx_init(&sc->ctx) == 0) {
261		struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
262		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
263
264		sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
265		    "ctrlq", CTLFLAG_RD, NULL, "ctrl queues");
266	}
267
268	fwq = &sc->sge.fwq;
269	if (sc->flags & INTR_FWD) {
270		iq = &sc->sge.fiq[0];
271
272		/*
273		 * Forwarded interrupt queues - allocate 1 if there's only 1
274		 * vector available, one less than the number of vectors
275		 * otherwise (the first vector is reserved for the error
276		 * interrupt in that case).
277		 */
278		i = sc->intr_count > 1 ? 1 : 0;
279		for (; i < sc->intr_count; i++, iq++) {
280
281			snprintf(name, sizeof(name), "%s fiq%d",
282			    device_get_nameunit(sc->dev), i);
283			init_iq(iq, sc, 0, 0, (sc->sge.nrxq + 1) * 2, 16, NULL,
284			    name);
285
286			rc = alloc_iq(iq, i);
287			if (rc != 0) {
288				device_printf(sc->dev,
289				    "failed to create fwd intr queue %d: %d\n",
290				    i, rc);
291				return (rc);
292			}
293		}
294
295		handler = t4_evt_rx;
296		i = 0;	/* forward fwq's interrupt to the first fiq */
297	} else {
298		handler = NULL;
299		i = 1;	/* fwq should use vector 1 (0 is used by error) */
300	}
301
302	snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev));
303	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name);
304	rc = alloc_iq(fwq, i);
305	if (rc != 0) {
306		device_printf(sc->dev,
307		    "failed to create firmware event queue: %d\n", rc);
308
309		return (rc);
310	}
311
312	/*
313	 * Control queues - one per hardware channel.
314	 */
315	ctrlq = &sc->sge.ctrlq[0];
316	for (i = 0; i < NCHAN; i++, ctrlq++) {
317		snprintf(name, sizeof(name), "%s ctrlq%d",
318		    device_get_nameunit(sc->dev), i);
319		init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name);
320
321		rc = alloc_ctrlq(sc, ctrlq, i);
322		if (rc != 0) {
323			device_printf(sc->dev,
324			    "failed to create control queue %d: %d\n", i, rc);
325			return (rc);
326		}
327	}
328
329	return (rc);
330}
331
332/*
333 * Idempotent
334 */
335int
336t4_teardown_adapter_queues(struct adapter *sc)
337{
338	int i;
339	struct sge_iq *iq;
340
341	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
342
343	/* Do this before freeing the queues */
344	if (sc->oid_ctrlq) {
345		sysctl_ctx_free(&sc->ctx);
346		sc->oid_ctrlq = NULL;
347	}
348
349	for (i = 0; i < NCHAN; i++)
350		free_ctrlq(sc, &sc->sge.ctrlq[i]);
351
352	iq = &sc->sge.fwq;
353	free_iq(iq);
354	if (sc->flags & INTR_FWD) {
355		for (i = 0; i < NFIQ(sc); i++) {
356			iq = &sc->sge.fiq[i];
357			free_iq(iq);
358		}
359	}
360
361	return (0);
362}
363
364int
365t4_setup_eth_queues(struct port_info *pi)
366{
367	int rc = 0, i, intr_idx;
368	struct sge_rxq *rxq;
369	struct sge_txq *txq;
370	char name[16];
371	struct adapter *sc = pi->adapter;
372
373	if (sysctl_ctx_init(&pi->ctx) == 0) {
374		struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
375		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
376
377		pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
378		    "rxq", CTLFLAG_RD, NULL, "rx queues");
379		pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
380		    "txq", CTLFLAG_RD, NULL, "tx queues");
381	}
382
383	for_each_rxq(pi, i, rxq) {
384
385		snprintf(name, sizeof(name), "%s rxq%d-iq",
386		    device_get_nameunit(pi->dev), i);
387		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
388		    pi->qsize_rxq, RX_IQ_ESIZE,
389		    sc->flags & INTR_FWD ? t4_eth_rx : NULL, name);
390
391		snprintf(name, sizeof(name), "%s rxq%d-fl",
392		    device_get_nameunit(pi->dev), i);
393		init_fl(&rxq->fl, pi->qsize_rxq / 8, name);
394
395		if (sc->flags & INTR_FWD)
396			intr_idx = (pi->first_rxq + i) % NFIQ(sc);
397		else
398			intr_idx = pi->first_rxq + i + 2;
399
400		rc = alloc_rxq(pi, rxq, intr_idx, i);
401		if (rc != 0)
402			goto done;
403
404		intr_idx++;
405	}
406
407	for_each_txq(pi, i, txq) {
408
409		snprintf(name, sizeof(name), "%s txq%d",
410		    device_get_nameunit(pi->dev), i);
411		init_eq(&txq->eq, pi->qsize_txq, name);
412
413		rc = alloc_txq(pi, txq, i);
414		if (rc != 0)
415			goto done;
416	}
417
418done:
419	if (rc)
420		t4_teardown_eth_queues(pi);
421
422	return (rc);
423}
424
425/*
426 * Idempotent
427 */
428int
429t4_teardown_eth_queues(struct port_info *pi)
430{
431	int i;
432	struct sge_rxq *rxq;
433	struct sge_txq *txq;
434
435	/* Do this before freeing the queues */
436	if (pi->oid_txq || pi->oid_rxq) {
437		sysctl_ctx_free(&pi->ctx);
438		pi->oid_txq = pi->oid_rxq = NULL;
439	}
440
441	for_each_txq(pi, i, txq) {
442		free_txq(pi, txq);
443	}
444
445	for_each_rxq(pi, i, rxq) {
446		free_rxq(pi, rxq);
447	}
448
449	return (0);
450}
451
452/* Deals with errors and forwarded interrupts */
453void
454t4_intr_all(void *arg)
455{
456	struct adapter *sc = arg;
457
458	t4_intr_err(arg);
459	t4_intr_fwd(&sc->sge.fiq[0]);
460}
461
462/* Deals with forwarded interrupts on the given ingress queue */
463void
464t4_intr_fwd(void *arg)
465{
466	struct sge_iq *iq = arg, *q;
467	struct adapter *sc = iq->adapter;
468	struct rsp_ctrl *ctrl;
469	int ndesc_pending = 0, ndesc_total = 0;
470	int qid;
471
472	if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
473		return;
474
475	while (is_new_response(iq, &ctrl)) {
476
477		rmb();
478
479		/* Only interrupt muxing expected on this queue */
480		KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_INTR,
481		    ("unexpected event on forwarded interrupt queue: %x",
482		    G_RSPD_TYPE(ctrl->u.type_gen)));
483
484		qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start;
485		q = sc->sge.iqmap[qid];
486
487		q->handler(q);
488
489		ndesc_total++;
490		if (++ndesc_pending >= iq->qsize / 4) {
491			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
492			    V_CIDXINC(ndesc_pending) |
493			    V_INGRESSQID(iq->cntxt_id) |
494			    V_SEINTARM(
495				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
496			ndesc_pending = 0;
497		}
498
499		iq_next(iq);
500	}
501
502	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
503	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
504
505	atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
506}
507
508/* Deals with error interrupts */
509void
510t4_intr_err(void *arg)
511{
512	struct adapter *sc = arg;
513
514	if (sc->intr_type == INTR_INTX)
515		t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
516
517	t4_slow_intr_handler(sc);
518}
519
520/* Deals with the firmware event queue */
521void
522t4_intr_evt(void *arg)
523{
524	struct sge_iq *iq = arg;
525
526	if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
527		return;
528
529	t4_evt_rx(arg);
530
531	atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
532}
533
534void
535t4_intr_data(void *arg)
536{
537	struct sge_iq *iq = arg;
538
539	if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
540		return;
541
542	t4_eth_rx(arg);
543
544	atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
545}
546
547void
548t4_evt_rx(void *arg)
549{
550	struct sge_iq *iq = arg;
551	struct adapter *sc = iq->adapter;
552	struct rsp_ctrl *ctrl;
553	const struct rss_header *rss;
554	int ndesc_pending = 0, ndesc_total = 0;
555
556	KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__));
557
558	while (is_new_response(iq, &ctrl)) {
559
560		rmb();
561
562		rss = (const void *)iq->cdesc;
563
564		/* Should only get CPL on this queue */
565		KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_CPL,
566		    ("%s: unexpected type %d", __func__,
567		    G_RSPD_TYPE(ctrl->u.type_gen)));
568
569		switch (rss->opcode) {
570		case CPL_FW4_MSG:
571		case CPL_FW6_MSG: {
572			const struct cpl_fw6_msg *cpl;
573
574			cpl = (const void *)(rss + 1);
575			if (cpl->type == FW6_TYPE_CMD_RPL)
576				t4_handle_fw_rpl(sc, cpl->data);
577
578			break;
579			}
580		case CPL_SGE_EGR_UPDATE:
581			handle_sge_egr_update(sc, (const void *)(rss + 1));
582			break;
583
584		default:
585			device_printf(sc->dev,
586			    "can't handle CPL opcode %d.", rss->opcode);
587		}
588
589		ndesc_total++;
590		if (++ndesc_pending >= iq->qsize / 4) {
591			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
592			    V_CIDXINC(ndesc_pending) |
593			    V_INGRESSQID(iq->cntxt_id) |
594			    V_SEINTARM(
595				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
596			ndesc_pending = 0;
597		}
598		iq_next(iq);
599	}
600
601	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
602	    V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params));
603}
604
605void
606t4_eth_rx(void *arg)
607{
608	struct sge_rxq *rxq = arg;
609	struct sge_iq *iq = arg;
610	struct adapter *sc = iq->adapter;
611	struct rsp_ctrl *ctrl;
612	struct ifnet *ifp = rxq->ifp;
613	struct sge_fl *fl = &rxq->fl;
614	struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next;
615	const struct rss_header *rss;
616	const struct cpl_rx_pkt *cpl;
617	uint32_t len;
618	int ndescs = 0, i;
619	struct mbuf *m0, *m;
620#ifdef INET
621	struct lro_ctrl *lro = &rxq->lro;
622	struct lro_entry *l;
623#endif
624
625	prefetch(sd->m);
626	prefetch(sd->cl);
627
628	iq->intr_next = iq->intr_params;
629	while (is_new_response(iq, &ctrl)) {
630
631		rmb();
632
633		rss = (const void *)iq->cdesc;
634		i = G_RSPD_TYPE(ctrl->u.type_gen);
635
636		if (__predict_false(i == X_RSPD_TYPE_CPL)) {
637
638			/* Can't be anything except an egress update */
639			KASSERT(rss->opcode == CPL_SGE_EGR_UPDATE,
640			    ("%s: unexpected CPL %x", __func__, rss->opcode));
641
642			handle_sge_egr_update(sc, (const void *)(rss + 1));
643			goto nextdesc;
644		}
645		KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT,
646		    ("%s: unexpected CPL %x rsp %d", __func__, rss->opcode, i));
647
648		sd_next = sd + 1;
649		if (__predict_false(fl->cidx + 1 == fl->cap))
650			sd_next = fl->sdesc;
651		prefetch(sd_next->m);
652		prefetch(sd_next->cl);
653
654		cpl = (const void *)(rss + 1);
655
656		m0 = sd->m;
657		sd->m = NULL;	/* consumed */
658
659		len = be32toh(ctrl->pldbuflen_qid);
660		if (__predict_false((len & F_RSPD_NEWBUF) == 0))
661			panic("%s: cannot handle packed frames", __func__);
662		len = G_RSPD_LEN(len);
663
664		bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
665		    BUS_DMASYNC_POSTREAD);
666
667		m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
668		if (len < MINCLSIZE) {
669			/* copy data to mbuf, buffer will be recycled */
670			bcopy(sd->cl, mtod(m0, caddr_t), len);
671			m0->m_len = len;
672		} else {
673			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
674			m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
675			sd->cl = NULL;	/* consumed */
676			m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
677		}
678
679		len -= FL_PKTSHIFT;
680		m0->m_len -= FL_PKTSHIFT;
681		m0->m_data += FL_PKTSHIFT;
682
683		m0->m_pkthdr.len = len;
684		m0->m_pkthdr.rcvif = ifp;
685		m0->m_flags |= M_FLOWID;
686		m0->m_pkthdr.flowid = rss->hash_val;
687
688		if (cpl->csum_calc && !cpl->err_vec &&
689		    ifp->if_capenable & IFCAP_RXCSUM) {
690			m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED |
691			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
692			if (cpl->ip_frag)
693				m0->m_pkthdr.csum_data = be16toh(cpl->csum);
694			else
695				m0->m_pkthdr.csum_data = 0xffff;
696			rxq->rxcsum++;
697		}
698
699		if (cpl->vlan_ex) {
700			m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
701			m0->m_flags |= M_VLANTAG;
702			rxq->vlan_extraction++;
703		}
704
705		i = 1;	/* # of fl sdesc used */
706		sd = sd_next;
707		if (__predict_false(++fl->cidx == fl->cap))
708			fl->cidx = 0;
709
710		len -= m0->m_len;
711		m = m0;
712		while (len) {
713			i++;
714
715			sd_next = sd + 1;
716			if (__predict_false(fl->cidx + 1 == fl->cap))
717				sd_next = fl->sdesc;
718			prefetch(sd_next->m);
719			prefetch(sd_next->cl);
720
721			m->m_next = sd->m;
722			sd->m = NULL;	/* consumed */
723			m = m->m_next;
724
725			bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
726			    BUS_DMASYNC_POSTREAD);
727
728			m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
729			if (len <= MLEN) {
730				bcopy(sd->cl, mtod(m, caddr_t), len);
731				m->m_len = len;
732			} else {
733				bus_dmamap_unload(fl->tag[sd->tag_idx],
734				    sd->map);
735				m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
736				sd->cl = NULL;	/* consumed */
737				m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
738			}
739
740			i++;
741			sd = sd_next;
742			if (__predict_false(++fl->cidx == fl->cap))
743				fl->cidx = 0;
744
745			len -= m->m_len;
746		}
747
748#ifdef INET
749		if (cpl->l2info & htobe32(F_RXF_LRO) &&
750		    rxq->flags & RXQ_LRO_ENABLED &&
751		    tcp_lro_rx(lro, m0, 0) == 0) {
752			/* queued for LRO */
753		} else
754#endif
755		ifp->if_input(ifp, m0);
756
757		FL_LOCK(fl);
758		fl->needed += i;
759		if (fl->needed >= 32)
760			refill_fl(sc, fl, 64, 32);
761		FL_UNLOCK(fl);
762
763nextdesc:	ndescs++;
764		iq_next(iq);
765
766		if (ndescs > 32) {
767			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
768			    V_CIDXINC(ndescs) |
769			    V_INGRESSQID((u32)iq->cntxt_id) |
770			    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
771			ndescs = 0;
772		}
773	}
774
775#ifdef INET
776	while (!SLIST_EMPTY(&lro->lro_active)) {
777		l = SLIST_FIRST(&lro->lro_active);
778		SLIST_REMOVE_HEAD(&lro->lro_active, next);
779		tcp_lro_flush(lro, l);
780	}
781#endif
782
783	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
784	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
785
786	FL_LOCK(fl);
787	if (fl->needed >= 32)
788		refill_fl(sc, fl, 128, 8);
789	FL_UNLOCK(fl);
790}
791
792int
793t4_mgmt_tx(struct adapter *sc, struct mbuf *m)
794{
795	return ctrl_tx(sc, &sc->sge.ctrlq[0], m);
796}
797
798/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
799#define TXPKTS_PKT_HDR ((\
800    sizeof(struct ulp_txpkt) + \
801    sizeof(struct ulptx_idata) + \
802    sizeof(struct cpl_tx_pkt_core) \
803    ) / 8)
804
805/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
806#define TXPKTS_WR_HDR (\
807    sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
808    TXPKTS_PKT_HDR)
809
810/* Header of a tx WR, before SGL of first packet (in flits) */
811#define TXPKT_WR_HDR ((\
812    sizeof(struct fw_eth_tx_pkt_wr) + \
813    sizeof(struct cpl_tx_pkt_core) \
814    ) / 8 )
815
816/* Header of a tx LSO WR, before SGL of first packet (in flits) */
817#define TXPKT_LSO_WR_HDR ((\
818    sizeof(struct fw_eth_tx_pkt_wr) + \
819    sizeof(struct cpl_tx_pkt_lso) + \
820    sizeof(struct cpl_tx_pkt_core) \
821    ) / 8 )
822
823int
824t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
825{
826	struct port_info *pi = (void *)ifp->if_softc;
827	struct adapter *sc = pi->adapter;
828	struct sge_eq *eq = &txq->eq;
829	struct buf_ring *br = txq->br;
830	struct mbuf *next;
831	int rc, coalescing, can_reclaim;
832	struct txpkts txpkts;
833	struct sgl sgl;
834
835	TXQ_LOCK_ASSERT_OWNED(txq);
836	KASSERT(m, ("%s: called with nothing to do.", __func__));
837
838	prefetch(&eq->desc[eq->pidx]);
839	prefetch(&txq->sdesc[eq->pidx]);
840
841	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
842	coalescing = 0;
843
844	if (eq->avail < 8)
845		reclaim_tx_descs(txq, 0, 8);
846
847	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
848
849		if (eq->avail < 8)
850			break;
851
852		next = m->m_nextpkt;
853		m->m_nextpkt = NULL;
854
855		if (next || buf_ring_peek(br))
856			coalescing = 1;
857
858		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
859		if (rc != 0) {
860			if (rc == ENOMEM) {
861
862				/* Short of resources, suspend tx */
863
864				m->m_nextpkt = next;
865				break;
866			}
867
868			/*
869			 * Unrecoverable error for this packet, throw it away
870			 * and move on to the next.  get_pkt_sgl may already
871			 * have freed m (it will be NULL in that case and the
872			 * m_freem here is still safe).
873			 */
874
875			m_freem(m);
876			continue;
877		}
878
879		if (coalescing &&
880		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
881
882			/* Successfully absorbed into txpkts */
883
884			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
885			goto doorbell;
886		}
887
888		/*
889		 * We weren't coalescing to begin with, or current frame could
890		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
891		 * given to it can't be coalesced).  Either way there should be
892		 * nothing in txpkts.
893		 */
894		KASSERT(txpkts.npkt == 0,
895		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
896
897		/* We're sending out individual packets now */
898		coalescing = 0;
899
900		if (eq->avail < 8)
901			reclaim_tx_descs(txq, 0, 8);
902		rc = write_txpkt_wr(pi, txq, m, &sgl);
903		if (rc != 0) {
904
905			/* Short of hardware descriptors, suspend tx */
906
907			/*
908			 * This is an unlikely but expensive failure.  We've
909			 * done all the hard work (DMA mappings etc.) and now we
910			 * can't send out the packet.  What's worse, we have to
911			 * spend even more time freeing up everything in sgl.
912			 */
913			txq->no_desc++;
914			free_pkt_sgl(txq, &sgl);
915
916			m->m_nextpkt = next;
917			break;
918		}
919
920		ETHER_BPF_MTAP(ifp, m);
921		if (sgl.nsegs == 0)
922			m_freem(m);
923
924doorbell:
925		/* Fewer and fewer doorbells as the queue fills up */
926		if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2)))
927		    ring_eq_db(sc, eq);
928
929		can_reclaim = reclaimable(eq);
930		if (can_reclaim >= 32)
931			reclaim_tx_descs(txq, can_reclaim, 32);
932	}
933
934	if (txpkts.npkt > 0)
935		write_txpkts_wr(txq, &txpkts);
936
937	/*
938	 * m not NULL means there was an error but we haven't thrown it away.
939	 * This can happen when we're short of tx descriptors (no_desc) or maybe
940	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
941	 * will get things going again.
942	 *
943	 * If eq->avail is already 0 we know a credit flush was requested in the
944	 * WR that reduced it to 0 so we don't need another flush (we don't have
945	 * any descriptor for a flush WR anyway, duh).
946	 */
947	if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) {
948		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
949
950		txsd->desc_used = 1;
951		txsd->credits = 0;
952		write_eqflush_wr(eq);
953	}
954	txq->m = m;
955
956	if (eq->pending)
957		ring_eq_db(sc, eq);
958
959	can_reclaim = reclaimable(eq);
960	if (can_reclaim >= 32)
961		reclaim_tx_descs(txq, can_reclaim, 128);
962
963	return (0);
964}
965
966void
967t4_update_fl_bufsize(struct ifnet *ifp)
968{
969	struct port_info *pi = ifp->if_softc;
970	struct sge_rxq *rxq;
971	struct sge_fl *fl;
972	int i;
973
974	for_each_rxq(pi, i, rxq) {
975		fl = &rxq->fl;
976
977		FL_LOCK(fl);
978		set_fl_tag_idx(fl, ifp->if_mtu);
979		FL_UNLOCK(fl);
980	}
981}
982
983/*
984 * A non-NULL handler indicates this iq will not receive direct interrupts, the
985 * handler will be invoked by a forwarded interrupt queue.
986 */
987static inline void
988init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
989    int qsize, int esize, iq_intr_handler_t *handler, char *name)
990{
991	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
992	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
993	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
994	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
995
996	iq->flags = 0;
997	iq->adapter = sc;
998	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) |
999	    V_QINTR_CNT_EN(pktc_idx >= 0);
1000	iq->intr_pktc_idx = pktc_idx;
1001	iq->qsize = roundup(qsize, 16);		/* See FW_IQ_CMD/iqsize */
1002	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
1003	iq->handler = handler;
1004	strlcpy(iq->lockname, name, sizeof(iq->lockname));
1005}
1006
1007static inline void
1008init_fl(struct sge_fl *fl, int qsize, char *name)
1009{
1010	fl->qsize = qsize;
1011	strlcpy(fl->lockname, name, sizeof(fl->lockname));
1012}
1013
1014static inline void
1015init_eq(struct sge_eq *eq, int qsize, char *name)
1016{
1017	eq->qsize = qsize;
1018	strlcpy(eq->lockname, name, sizeof(eq->lockname));
1019}
1020
1021static int
1022alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1023    bus_dmamap_t *map, bus_addr_t *pa, void **va)
1024{
1025	int rc;
1026
1027	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1028	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1029	if (rc != 0) {
1030		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1031		goto done;
1032	}
1033
1034	rc = bus_dmamem_alloc(*tag, va,
1035	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1036	if (rc != 0) {
1037		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1038		goto done;
1039	}
1040
1041	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1042	if (rc != 0) {
1043		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1044		goto done;
1045	}
1046done:
1047	if (rc)
1048		free_ring(sc, *tag, *map, *pa, *va);
1049
1050	return (rc);
1051}
1052
1053static int
1054free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1055    bus_addr_t pa, void *va)
1056{
1057	if (pa)
1058		bus_dmamap_unload(tag, map);
1059	if (va)
1060		bus_dmamem_free(tag, va, map);
1061	if (tag)
1062		bus_dma_tag_destroy(tag);
1063
1064	return (0);
1065}
1066
1067/*
1068 * Allocates the ring for an ingress queue and an optional freelist.  If the
1069 * freelist is specified it will be allocated and then associated with the
1070 * ingress queue.
1071 *
1072 * Returns errno on failure.  Resources allocated up to that point may still be
1073 * allocated.  Caller is responsible for cleanup in case this function fails.
1074 *
1075 * If the ingress queue will take interrupts directly (iq->handler == NULL) then
1076 * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
1077 * the index of the queue to which its interrupts will be forwarded.
1078 */
1079static int
1080alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1081    int intr_idx)
1082{
1083	int rc, i, cntxt_id;
1084	size_t len;
1085	struct fw_iq_cmd c;
1086	struct adapter *sc = iq->adapter;
1087	__be32 v = 0;
1088
1089	/* The adapter queues are nominally allocated in port[0]'s name */
1090	if (pi == NULL)
1091		pi = sc->port[0];
1092
1093	len = iq->qsize * iq->esize;
1094	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1095	    (void **)&iq->desc);
1096	if (rc != 0)
1097		return (rc);
1098
1099	bzero(&c, sizeof(c));
1100	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1101	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1102	    V_FW_IQ_CMD_VFN(0));
1103
1104	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1105	    FW_LEN16(c));
1106
1107	/* Special handling for firmware event queue */
1108	if (iq == &sc->sge.fwq)
1109		v |= F_FW_IQ_CMD_IQASYNCH;
1110
1111	if (iq->handler) {
1112		KASSERT(intr_idx < NFIQ(sc),
1113		    ("%s: invalid indirect intr_idx %d", __func__, intr_idx));
1114		v |= F_FW_IQ_CMD_IQANDST;
1115		v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fiq[intr_idx].abs_id);
1116	} else {
1117		KASSERT(intr_idx < sc->intr_count,
1118		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1119		v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1120	}
1121
1122	c.type_to_iqandstindex = htobe32(v |
1123	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1124	    V_FW_IQ_CMD_VIID(pi->viid) |
1125	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1126	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1127	    F_FW_IQ_CMD_IQGTSMODE |
1128	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1129	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1130	c.iqsize = htobe16(iq->qsize);
1131	c.iqaddr = htobe64(iq->ba);
1132
1133	if (fl) {
1134		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1135
1136		for (i = 0; i < FL_BUF_SIZES; i++) {
1137
1138			/*
1139			 * A freelist buffer must be 16 byte aligned as the SGE
1140			 * uses the low 4 bits of the bus addr to figure out the
1141			 * buffer size.
1142			 */
1143			rc = bus_dma_tag_create(sc->dmat, 16, 0,
1144			    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1145			    FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1146			    NULL, NULL, &fl->tag[i]);
1147			if (rc != 0) {
1148				device_printf(sc->dev,
1149				    "failed to create fl DMA tag[%d]: %d\n",
1150				    i, rc);
1151				return (rc);
1152			}
1153		}
1154		len = fl->qsize * RX_FL_ESIZE;
1155		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1156		    &fl->ba, (void **)&fl->desc);
1157		if (rc)
1158			return (rc);
1159
1160		/* Allocate space for one software descriptor per buffer. */
1161		fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8;
1162		FL_LOCK(fl);
1163		set_fl_tag_idx(fl, pi->ifp->if_mtu);
1164		rc = alloc_fl_sdesc(fl);
1165		FL_UNLOCK(fl);
1166		if (rc != 0) {
1167			device_printf(sc->dev,
1168			    "failed to setup fl software descriptors: %d\n",
1169			    rc);
1170			return (rc);
1171		}
1172		fl->needed = fl->cap;
1173
1174		c.iqns_to_fl0congen =
1175		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE));
1176		c.fl0dcaen_to_fl0cidxfthresh =
1177		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1178			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1179		c.fl0size = htobe16(fl->qsize);
1180		c.fl0addr = htobe64(fl->ba);
1181	}
1182
1183	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1184	if (rc != 0) {
1185		device_printf(sc->dev,
1186		    "failed to create ingress queue: %d\n", rc);
1187		return (rc);
1188	}
1189
1190	iq->cdesc = iq->desc;
1191	iq->cidx = 0;
1192	iq->gen = 1;
1193	iq->intr_next = iq->intr_params;
1194	iq->cntxt_id = be16toh(c.iqid);
1195	iq->abs_id = be16toh(c.physiqid);
1196	iq->flags |= (IQ_ALLOCATED | IQ_STARTED);
1197
1198	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1199	KASSERT(cntxt_id < sc->sge.niq,
1200	    ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1201	    cntxt_id, sc->sge.niq - 1));
1202	sc->sge.iqmap[cntxt_id] = iq;
1203
1204	if (fl) {
1205		fl->cntxt_id = be16toh(c.fl0id);
1206		fl->pidx = fl->cidx = 0;
1207
1208		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1209		KASSERT(cntxt_id < sc->sge.neq,
1210		    ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__,
1211		    cntxt_id, sc->sge.neq - 1));
1212		sc->sge.eqmap[cntxt_id] = (void *)fl;
1213
1214		FL_LOCK(fl);
1215		refill_fl(sc, fl, -1, 8);
1216		FL_UNLOCK(fl);
1217	}
1218
1219	/* Enable IQ interrupts */
1220	atomic_store_rel_32(&iq->state, IQS_IDLE);
1221	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1222	    V_INGRESSQID(iq->cntxt_id));
1223
1224	return (0);
1225}
1226
1227/*
1228 * This can be called with the iq/fl in any state - fully allocated and
1229 * functional, partially allocated, even all-zeroed out.
1230 */
1231static int
1232free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1233{
1234	int i, rc;
1235	struct adapter *sc = iq->adapter;
1236	device_t dev;
1237
1238	if (sc == NULL)
1239		return (0);	/* nothing to do */
1240
1241	dev = pi ? pi->dev : sc->dev;
1242
1243	if (iq->flags & IQ_STARTED) {
1244		rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0,
1245		    iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
1246		if (rc != 0) {
1247			device_printf(dev,
1248			    "failed to stop queue %p: %d\n", iq, rc);
1249			return (rc);
1250		}
1251		iq->flags &= ~IQ_STARTED;
1252
1253		/* Synchronize with the interrupt handler */
1254		while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED))
1255			pause("iqfree", hz / 1000);
1256	}
1257
1258	if (iq->flags & IQ_ALLOCATED) {
1259
1260		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1261		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1262		    fl ? fl->cntxt_id : 0xffff, 0xffff);
1263		if (rc != 0) {
1264			device_printf(dev,
1265			    "failed to free queue %p: %d\n", iq, rc);
1266			return (rc);
1267		}
1268		iq->flags &= ~IQ_ALLOCATED;
1269	}
1270
1271	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1272
1273	bzero(iq, sizeof(*iq));
1274
1275	if (fl) {
1276		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1277		    fl->desc);
1278
1279		if (fl->sdesc) {
1280			FL_LOCK(fl);
1281			free_fl_sdesc(fl);
1282			FL_UNLOCK(fl);
1283		}
1284
1285		if (mtx_initialized(&fl->fl_lock))
1286			mtx_destroy(&fl->fl_lock);
1287
1288		for (i = 0; i < FL_BUF_SIZES; i++) {
1289			if (fl->tag[i])
1290				bus_dma_tag_destroy(fl->tag[i]);
1291		}
1292
1293		bzero(fl, sizeof(*fl));
1294	}
1295
1296	return (0);
1297}
1298
1299static int
1300alloc_iq(struct sge_iq *iq, int intr_idx)
1301{
1302	return alloc_iq_fl(NULL, iq, NULL, intr_idx);
1303}
1304
1305static int
1306free_iq(struct sge_iq *iq)
1307{
1308	return free_iq_fl(NULL, iq, NULL);
1309}
1310
1311static int
1312alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
1313{
1314	int rc;
1315	struct sysctl_oid *oid;
1316	struct sysctl_oid_list *children;
1317	char name[16];
1318
1319	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx);
1320	if (rc != 0)
1321		return (rc);
1322
1323#ifdef INET
1324	rc = tcp_lro_init(&rxq->lro);
1325	if (rc != 0)
1326		return (rc);
1327	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
1328
1329	if (pi->ifp->if_capenable & IFCAP_LRO)
1330		rxq->flags |= RXQ_LRO_ENABLED;
1331#endif
1332	rxq->ifp = pi->ifp;
1333
1334	children = SYSCTL_CHILDREN(pi->oid_rxq);
1335
1336	snprintf(name, sizeof(name), "%d", idx);
1337	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1338	    NULL, "rx queue");
1339	children = SYSCTL_CHILDREN(oid);
1340
1341#ifdef INET
1342	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
1343	    &rxq->lro.lro_queued, 0, NULL);
1344	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
1345	    &rxq->lro.lro_flushed, 0, NULL);
1346#endif
1347	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
1348	    &rxq->rxcsum, "# of times hardware assisted with checksum");
1349	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
1350	    CTLFLAG_RD, &rxq->vlan_extraction,
1351	    "# of times hardware extracted 802.1Q tag");
1352
1353	return (rc);
1354}
1355
1356static int
1357free_rxq(struct port_info *pi, struct sge_rxq *rxq)
1358{
1359	int rc;
1360
1361#ifdef INET
1362	if (rxq->lro.ifp) {
1363		tcp_lro_free(&rxq->lro);
1364		rxq->lro.ifp = NULL;
1365	}
1366#endif
1367
1368	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1369	if (rc == 0)
1370		bzero(rxq, sizeof(*rxq));
1371
1372	return (rc);
1373}
1374
1375static int
1376alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
1377{
1378	int rc, cntxt_id;
1379	size_t len;
1380	struct fw_eq_ctrl_cmd c;
1381	struct sge_eq *eq = &ctrlq->eq;
1382	char name[16];
1383	struct sysctl_oid *oid;
1384	struct sysctl_oid_list *children;
1385
1386	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1387
1388	len = eq->qsize * CTRL_EQ_ESIZE;
1389	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1390	    &eq->ba, (void **)&eq->desc);
1391	if (rc)
1392		return (rc);
1393
1394	eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE;
1395	eq->spg = (void *)&eq->desc[eq->cap];
1396	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1397	eq->iqid = sc->sge.fwq.cntxt_id;
1398
1399	bzero(&c, sizeof(c));
1400
1401	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
1402	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
1403	    V_FW_EQ_CTRL_CMD_VFN(0));
1404	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
1405	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
1406	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
1407	c.physeqid_pkd = htobe32(0);
1408	c.fetchszm_to_iqid =
1409	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1410		V_FW_EQ_CTRL_CMD_PCIECHN(idx) |
1411		V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
1412	c.dcaen_to_eqsize =
1413	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1414		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1415		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1416		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
1417	c.eqaddr = htobe64(eq->ba);
1418
1419	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1420	if (rc != 0) {
1421		device_printf(sc->dev,
1422		    "failed to create control queue %d: %d\n", idx, rc);
1423		return (rc);
1424	}
1425
1426	eq->pidx = eq->cidx = 0;
1427	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
1428	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1429
1430	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1431	KASSERT(cntxt_id < sc->sge.neq,
1432	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1433	    cntxt_id, sc->sge.neq - 1));
1434	sc->sge.eqmap[cntxt_id] = eq;
1435
1436	children = SYSCTL_CHILDREN(sc->oid_ctrlq);
1437
1438	snprintf(name, sizeof(name), "%d", idx);
1439	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1440	    NULL, "ctrl queue");
1441	children = SYSCTL_CHILDREN(oid);
1442
1443	SYSCTL_ADD_UQUAD(&sc->ctx, children, OID_AUTO, "total_wrs", CTLFLAG_RD,
1444	    &ctrlq->total_wrs, "total # of work requests");
1445	SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1446	    &ctrlq->no_desc, 0,
1447	    "# of times ctrlq ran out of hardware descriptors");
1448	SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "too_long", CTLFLAG_RD,
1449	    &ctrlq->too_long, 0, "# of oversized work requests");
1450
1451	return (rc);
1452}
1453
1454static int
1455free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq)
1456{
1457	int rc;
1458	struct sge_eq *eq = &ctrlq->eq;
1459
1460	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1461		rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1462		if (rc != 0) {
1463			device_printf(sc->dev,
1464			    "failed to free ctrl queue %p: %d\n", eq, rc);
1465			return (rc);
1466		}
1467		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1468	}
1469
1470	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1471
1472	if (mtx_initialized(&eq->eq_lock))
1473		mtx_destroy(&eq->eq_lock);
1474
1475	bzero(ctrlq, sizeof(*ctrlq));
1476	return (0);
1477}
1478
1479static int
1480alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
1481{
1482	int rc, cntxt_id;
1483	size_t len;
1484	struct adapter *sc = pi->adapter;
1485	struct fw_eq_eth_cmd c;
1486	struct sge_eq *eq = &txq->eq;
1487	char name[16];
1488	struct sysctl_oid *oid;
1489	struct sysctl_oid_list *children;
1490
1491	txq->ifp = pi->ifp;
1492	TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq);
1493
1494	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1495
1496	len = eq->qsize * TX_EQ_ESIZE;
1497	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1498	    &eq->ba, (void **)&eq->desc);
1499	if (rc)
1500		return (rc);
1501
1502	eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE;
1503	eq->spg = (void *)&eq->desc[eq->cap];
1504	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1505	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
1506	    M_ZERO | M_WAITOK);
1507	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
1508	eq->iqid = sc->sge.rxq[pi->first_rxq].iq.cntxt_id;
1509
1510	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
1511	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
1512	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
1513	if (rc != 0) {
1514		device_printf(sc->dev,
1515		    "failed to create tx DMA tag: %d\n", rc);
1516		return (rc);
1517	}
1518
1519	rc = alloc_tx_maps(txq);
1520	if (rc != 0) {
1521		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
1522		return (rc);
1523	}
1524
1525	bzero(&c, sizeof(c));
1526
1527	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
1528	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
1529	    V_FW_EQ_ETH_CMD_VFN(0));
1530	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
1531	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
1532	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
1533	c.fetchszm_to_iqid =
1534	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1535		V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) |
1536		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
1537	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1538		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1539		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1540		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
1541	c.eqaddr = htobe64(eq->ba);
1542
1543	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1544	if (rc != 0) {
1545		device_printf(pi->dev,
1546		    "failed to create egress queue: %d\n", rc);
1547		return (rc);
1548	}
1549
1550	eq->pidx = eq->cidx = 0;
1551	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
1552	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1553
1554	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1555	KASSERT(cntxt_id < sc->sge.neq,
1556	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1557	    cntxt_id, sc->sge.neq - 1));
1558	sc->sge.eqmap[cntxt_id] = eq;
1559
1560	children = SYSCTL_CHILDREN(pi->oid_txq);
1561
1562	snprintf(name, sizeof(name), "%d", idx);
1563	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1564	    NULL, "tx queue");
1565	children = SYSCTL_CHILDREN(oid);
1566
1567	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
1568	    &txq->txcsum, "# of times hardware assisted with checksum");
1569	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
1570	    CTLFLAG_RD, &txq->vlan_insertion,
1571	    "# of times hardware inserted 802.1Q tag");
1572	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
1573	    &txq->tso_wrs, "# of IPv4 TSO work requests");
1574	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
1575	    &txq->imm_wrs, "# of work requests with immediate data");
1576	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
1577	    &txq->sgl_wrs, "# of work requests with direct SGL");
1578	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
1579	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
1580	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
1581	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
1582	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
1583	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
1584
1585	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
1586	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
1587	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1588	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
1589	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
1590	    &txq->egr_update, 0, "egress update notifications from the SGE");
1591
1592	return (rc);
1593}
1594
1595static int
1596free_txq(struct port_info *pi, struct sge_txq *txq)
1597{
1598	int rc;
1599	struct adapter *sc = pi->adapter;
1600	struct sge_eq *eq = &txq->eq;
1601
1602	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1603
1604		/*
1605		 * Wait for the response to a credit flush if there's one
1606		 * pending.  Clearing the flag tells handle_sge_egr_update or
1607		 * cxgbe_txq_start (depending on how far the response has made
1608		 * it) that they should ignore the response and wake up free_txq
1609		 * instead.
1610		 *
1611		 * The interface has been marked down by the time we get here
1612		 * (both IFF_UP and IFF_DRV_RUNNING cleared).  qflush has
1613		 * emptied the tx buf_rings and we know nothing new is being
1614		 * queued for tx so we don't have to worry about a new credit
1615		 * flush request.
1616		 */
1617		TXQ_LOCK(txq);
1618		if (eq->flags & EQ_CRFLUSHED) {
1619			eq->flags &= ~EQ_CRFLUSHED;
1620			msleep(txq, &eq->eq_lock, 0, "crflush", 0);
1621		}
1622		TXQ_UNLOCK(txq);
1623
1624		rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1625		if (rc != 0) {
1626			device_printf(pi->dev,
1627			    "failed to free egress queue %p: %d\n", eq, rc);
1628			return (rc);
1629		}
1630		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1631	}
1632
1633	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1634
1635	free(txq->sdesc, M_CXGBE);
1636
1637	if (txq->maps)
1638		free_tx_maps(txq);
1639
1640	buf_ring_free(txq->br, M_CXGBE);
1641
1642	if (txq->tx_tag)
1643		bus_dma_tag_destroy(txq->tx_tag);
1644
1645	if (mtx_initialized(&eq->eq_lock))
1646		mtx_destroy(&eq->eq_lock);
1647
1648	bzero(txq, sizeof(*txq));
1649	return (0);
1650}
1651
1652static void
1653oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1654{
1655	bus_addr_t *ba = arg;
1656
1657	KASSERT(nseg == 1,
1658	    ("%s meant for single segment mappings only.", __func__));
1659
1660	*ba = error ? 0 : segs->ds_addr;
1661}
1662
1663static inline bool
1664is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
1665{
1666	*ctrl = (void *)((uintptr_t)iq->cdesc +
1667	    (iq->esize - sizeof(struct rsp_ctrl)));
1668
1669	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
1670}
1671
1672static inline void
1673iq_next(struct sge_iq *iq)
1674{
1675	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
1676	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
1677		iq->cidx = 0;
1678		iq->gen ^= 1;
1679		iq->cdesc = iq->desc;
1680	}
1681}
1682
1683#define FL_HW_IDX(x) ((x) >> 3)
1684static inline void
1685ring_fl_db(struct adapter *sc, struct sge_fl *fl)
1686{
1687	int ndesc = fl->pending / 8;
1688
1689	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
1690		ndesc--;	/* hold back one credit */
1691
1692	if (ndesc <= 0)
1693		return;		/* nothing to do */
1694
1695	wmb();
1696
1697	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO |
1698	    V_QID(fl->cntxt_id) | V_PIDX(ndesc));
1699	fl->pending -= ndesc * 8;
1700}
1701
1702/*
1703 * Fill up the freelist by upto nbufs and ring its doorbell if the number of
1704 * buffers ready to be handed to the hardware >= dbthresh.
1705 */
1706static void
1707refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh)
1708{
1709	__be64 *d = &fl->desc[fl->pidx];
1710	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
1711	bus_dma_tag_t tag;
1712	bus_addr_t pa;
1713	caddr_t cl;
1714	int rc;
1715
1716	FL_LOCK_ASSERT_OWNED(fl);
1717
1718	if (nbufs < 0 || nbufs > fl->needed)
1719		nbufs = fl->needed;
1720
1721	while (nbufs--) {
1722
1723		if (sd->cl != NULL) {
1724
1725			/*
1726			 * This happens when a frame small enough to fit
1727			 * entirely in an mbuf was received in cl last time.
1728			 * We'd held on to cl and can reuse it now.  Note that
1729			 * we reuse a cluster of the old size if fl->tag_idx is
1730			 * no longer the same as sd->tag_idx.
1731			 */
1732
1733			KASSERT(*d == sd->ba_tag,
1734			    ("%s: recyling problem at pidx %d",
1735			    __func__, fl->pidx));
1736
1737			d++;
1738			goto recycled;
1739		}
1740
1741
1742		if (fl->tag_idx != sd->tag_idx) {
1743			bus_dmamap_t map;
1744			bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
1745			bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
1746
1747			/*
1748			 * An MTU change can get us here.  Discard the old map
1749			 * which was created with the old tag, but only if
1750			 * we're able to get a new one.
1751			 */
1752			rc = bus_dmamap_create(newtag, 0, &map);
1753			if (rc == 0) {
1754				bus_dmamap_destroy(oldtag, sd->map);
1755				sd->map = map;
1756				sd->tag_idx = fl->tag_idx;
1757			}
1758		}
1759
1760		tag = fl->tag[sd->tag_idx];
1761
1762		cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
1763		if (cl == NULL)
1764			break;
1765
1766		rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
1767		    oneseg_dma_callback, &pa, 0);
1768		if (rc != 0 || pa == 0) {
1769			fl->dmamap_failed++;
1770			uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
1771			break;
1772		}
1773
1774		sd->cl = cl;
1775		*d++ = htobe64(pa | sd->tag_idx);
1776
1777#ifdef INVARIANTS
1778		sd->ba_tag = htobe64(pa | sd->tag_idx);
1779#endif
1780
1781recycled:
1782		/* sd->m is never recycled, should always be NULL */
1783		KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
1784
1785		sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
1786		if (sd->m == NULL)
1787			break;
1788
1789		fl->pending++;
1790		fl->needed--;
1791		sd++;
1792		if (++fl->pidx == fl->cap) {
1793			fl->pidx = 0;
1794			sd = fl->sdesc;
1795			d = fl->desc;
1796		}
1797	}
1798
1799	if (fl->pending >= dbthresh)
1800		ring_fl_db(sc, fl);
1801}
1802
1803static int
1804alloc_fl_sdesc(struct sge_fl *fl)
1805{
1806	struct fl_sdesc *sd;
1807	bus_dma_tag_t tag;
1808	int i, rc;
1809
1810	FL_LOCK_ASSERT_OWNED(fl);
1811
1812	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
1813	    M_ZERO | M_WAITOK);
1814
1815	tag = fl->tag[fl->tag_idx];
1816	sd = fl->sdesc;
1817	for (i = 0; i < fl->cap; i++, sd++) {
1818
1819		sd->tag_idx = fl->tag_idx;
1820		rc = bus_dmamap_create(tag, 0, &sd->map);
1821		if (rc != 0)
1822			goto failed;
1823	}
1824
1825	return (0);
1826failed:
1827	while (--i >= 0) {
1828		sd--;
1829		bus_dmamap_destroy(tag, sd->map);
1830		if (sd->m) {
1831			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1832			m_free(sd->m);
1833			sd->m = NULL;
1834		}
1835	}
1836	KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
1837
1838	free(fl->sdesc, M_CXGBE);
1839	fl->sdesc = NULL;
1840
1841	return (rc);
1842}
1843
1844static void
1845free_fl_sdesc(struct sge_fl *fl)
1846{
1847	struct fl_sdesc *sd;
1848	int i;
1849
1850	FL_LOCK_ASSERT_OWNED(fl);
1851
1852	sd = fl->sdesc;
1853	for (i = 0; i < fl->cap; i++, sd++) {
1854
1855		if (sd->m) {
1856			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1857			m_free(sd->m);
1858			sd->m = NULL;
1859		}
1860
1861		if (sd->cl) {
1862			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1863			uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
1864			sd->cl = NULL;
1865		}
1866
1867		bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
1868	}
1869
1870	free(fl->sdesc, M_CXGBE);
1871	fl->sdesc = NULL;
1872}
1873
1874static int
1875alloc_tx_maps(struct sge_txq *txq)
1876{
1877	struct tx_map *txm;
1878	int i, rc, count;
1879
1880	/*
1881	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
1882	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
1883	 * sized for the worst case.
1884	 */
1885	count = txq->eq.qsize * 10 / 8;
1886	txq->map_total = txq->map_avail = count;
1887	txq->map_cidx = txq->map_pidx = 0;
1888
1889	txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
1890	    M_ZERO | M_WAITOK);
1891
1892	txm = txq->maps;
1893	for (i = 0; i < count; i++, txm++) {
1894		rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map);
1895		if (rc != 0)
1896			goto failed;
1897	}
1898
1899	return (0);
1900failed:
1901	while (--i >= 0) {
1902		txm--;
1903		bus_dmamap_destroy(txq->tx_tag, txm->map);
1904	}
1905	KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__));
1906
1907	free(txq->maps, M_CXGBE);
1908	txq->maps = NULL;
1909
1910	return (rc);
1911}
1912
1913static void
1914free_tx_maps(struct sge_txq *txq)
1915{
1916	struct tx_map *txm;
1917	int i;
1918
1919	txm = txq->maps;
1920	for (i = 0; i < txq->map_total; i++, txm++) {
1921
1922		if (txm->m) {
1923			bus_dmamap_unload(txq->tx_tag, txm->map);
1924			m_freem(txm->m);
1925			txm->m = NULL;
1926		}
1927
1928		bus_dmamap_destroy(txq->tx_tag, txm->map);
1929	}
1930
1931	free(txq->maps, M_CXGBE);
1932	txq->maps = NULL;
1933}
1934
1935/*
1936 * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
1937 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
1938 * of immediate data.
1939 */
1940#define IMM_LEN ( \
1941      2 * TX_EQ_ESIZE \
1942    - sizeof(struct fw_eth_tx_pkt_wr) \
1943    - sizeof(struct cpl_tx_pkt_core))
1944
1945/*
1946 * Returns non-zero on failure, no need to cleanup anything in that case.
1947 *
1948 * Note 1: We always try to defrag the mbuf if required and return EFBIG only
1949 * if the resulting chain still won't fit in a tx descriptor.
1950 *
1951 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
1952 * does not have the TCP header in it.
1953 */
1954static int
1955get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
1956    int sgl_only)
1957{
1958	struct mbuf *m = *fp;
1959	struct tx_map *txm;
1960	int rc, defragged = 0, n;
1961
1962	TXQ_LOCK_ASSERT_OWNED(txq);
1963
1964	if (m->m_pkthdr.tso_segsz)
1965		sgl_only = 1;	/* Do not allow immediate data with LSO */
1966
1967start:	sgl->nsegs = 0;
1968
1969	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
1970		return (0);	/* nsegs = 0 tells caller to use imm. tx */
1971
1972	if (txq->map_avail == 0) {
1973		txq->no_dmamap++;
1974		return (ENOMEM);
1975	}
1976	txm = &txq->maps[txq->map_pidx];
1977
1978	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
1979		*fp = m_pullup(m, 50);
1980		m = *fp;
1981		if (m == NULL)
1982			return (ENOBUFS);
1983	}
1984
1985	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
1986	    &sgl->nsegs, BUS_DMA_NOWAIT);
1987	if (rc == EFBIG && defragged == 0) {
1988		m = m_defrag(m, M_DONTWAIT);
1989		if (m == NULL)
1990			return (EFBIG);
1991
1992		defragged = 1;
1993		*fp = m;
1994		goto start;
1995	}
1996	if (rc != 0)
1997		return (rc);
1998
1999	txm->m = m;
2000	txq->map_avail--;
2001	if (++txq->map_pidx == txq->map_total)
2002		txq->map_pidx = 0;
2003
2004	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2005	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2006
2007	/*
2008	 * Store the # of flits required to hold this frame's SGL in nflits.  An
2009	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2010	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
2011	 * then len1 must be set to 0.
2012	 */
2013	n = sgl->nsegs - 1;
2014	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
2015
2016	return (0);
2017}
2018
2019
2020/*
2021 * Releases all the txq resources used up in the specified sgl.
2022 */
2023static int
2024free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
2025{
2026	struct tx_map *txm;
2027
2028	TXQ_LOCK_ASSERT_OWNED(txq);
2029
2030	if (sgl->nsegs == 0)
2031		return (0);	/* didn't use any map */
2032
2033	/* 1 pkt uses exactly 1 map, back it out */
2034
2035	txq->map_avail++;
2036	if (txq->map_pidx > 0)
2037		txq->map_pidx--;
2038	else
2039		txq->map_pidx = txq->map_total - 1;
2040
2041	txm = &txq->maps[txq->map_pidx];
2042	bus_dmamap_unload(txq->tx_tag, txm->map);
2043	txm->m = NULL;
2044
2045	return (0);
2046}
2047
2048static int
2049write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
2050    struct sgl *sgl)
2051{
2052	struct sge_eq *eq = &txq->eq;
2053	struct fw_eth_tx_pkt_wr *wr;
2054	struct cpl_tx_pkt_core *cpl;
2055	uint32_t ctrl;	/* used in many unrelated places */
2056	uint64_t ctrl1;
2057	int nflits, ndesc, pktlen;
2058	struct tx_sdesc *txsd;
2059	caddr_t dst;
2060
2061	TXQ_LOCK_ASSERT_OWNED(txq);
2062
2063	pktlen = m->m_pkthdr.len;
2064
2065	/*
2066	 * Do we have enough flits to send this frame out?
2067	 */
2068	ctrl = sizeof(struct cpl_tx_pkt_core);
2069	if (m->m_pkthdr.tso_segsz) {
2070		nflits = TXPKT_LSO_WR_HDR;
2071		ctrl += sizeof(struct cpl_tx_pkt_lso);
2072	} else
2073		nflits = TXPKT_WR_HDR;
2074	if (sgl->nsegs > 0)
2075		nflits += sgl->nflits;
2076	else {
2077		nflits += howmany(pktlen, 8);
2078		ctrl += pktlen;
2079	}
2080	ndesc = howmany(nflits, 8);
2081	if (ndesc > eq->avail)
2082		return (ENOMEM);
2083
2084	/* Firmware work request header */
2085	wr = (void *)&eq->desc[eq->pidx];
2086	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
2087	    V_FW_WR_IMMDLEN(ctrl));
2088	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
2089	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2090		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2091		eq->flags |= EQ_CRFLUSHED;
2092	}
2093
2094	wr->equiq_to_len16 = htobe32(ctrl);
2095	wr->r3 = 0;
2096
2097	if (m->m_pkthdr.tso_segsz) {
2098		struct cpl_tx_pkt_lso *lso = (void *)(wr + 1);
2099		struct ether_header *eh;
2100		struct ip *ip;
2101		struct tcphdr *tcp;
2102
2103		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
2104		    F_LSO_LAST_SLICE;
2105
2106		eh = mtod(m, struct ether_header *);
2107		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2108			ctrl |= V_LSO_ETHHDR_LEN(1);
2109			ip = (void *)((struct ether_vlan_header *)eh + 1);
2110		} else
2111			ip = (void *)(eh + 1);
2112
2113		tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
2114		ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
2115		    V_LSO_TCPHDR_LEN(tcp->th_off);
2116
2117		lso->lso_ctrl = htobe32(ctrl);
2118		lso->ipid_ofst = htobe16(0);
2119		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
2120		lso->seqno_offset = htobe32(0);
2121		lso->len = htobe32(pktlen);
2122
2123		cpl = (void *)(lso + 1);
2124
2125		txq->tso_wrs++;
2126	} else
2127		cpl = (void *)(wr + 1);
2128
2129	/* Checksum offload */
2130	ctrl1 = 0;
2131	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2132		ctrl1 |= F_TXPKT_IPCSUM_DIS;
2133	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2134		ctrl1 |= F_TXPKT_L4CSUM_DIS;
2135	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2136		txq->txcsum++;	/* some hardware assistance provided */
2137
2138	/* VLAN tag insertion */
2139	if (m->m_flags & M_VLANTAG) {
2140		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2141		txq->vlan_insertion++;
2142	}
2143
2144	/* CPL header */
2145	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2146	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2147	cpl->pack = 0;
2148	cpl->len = htobe16(pktlen);
2149	cpl->ctrl1 = htobe64(ctrl1);
2150
2151	/* Software descriptor */
2152	txsd = &txq->sdesc[eq->pidx];
2153	txsd->desc_used = ndesc;
2154
2155	eq->pending += ndesc;
2156	eq->avail -= ndesc;
2157	eq->pidx += ndesc;
2158	if (eq->pidx >= eq->cap)
2159		eq->pidx -= eq->cap;
2160
2161	/* SGL */
2162	dst = (void *)(cpl + 1);
2163	if (sgl->nsegs > 0) {
2164		txsd->credits = 1;
2165		txq->sgl_wrs++;
2166		write_sgl_to_txd(eq, sgl, &dst);
2167	} else {
2168		txsd->credits = 0;
2169		txq->imm_wrs++;
2170		for (; m; m = m->m_next) {
2171			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2172#ifdef INVARIANTS
2173			pktlen -= m->m_len;
2174#endif
2175		}
2176#ifdef INVARIANTS
2177		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
2178#endif
2179
2180	}
2181
2182	txq->txpkt_wrs++;
2183	return (0);
2184}
2185
2186/*
2187 * Returns 0 to indicate that m has been accepted into a coalesced tx work
2188 * request.  It has either been folded into txpkts or txpkts was flushed and m
2189 * has started a new coalesced work request (as the first frame in a fresh
2190 * txpkts).
2191 *
2192 * Returns non-zero to indicate a failure - caller is responsible for
2193 * transmitting m, if there was anything in txpkts it has been flushed.
2194 */
2195static int
2196add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
2197    struct mbuf *m, struct sgl *sgl)
2198{
2199	struct sge_eq *eq = &txq->eq;
2200	int can_coalesce;
2201	struct tx_sdesc *txsd;
2202	int flits;
2203
2204	TXQ_LOCK_ASSERT_OWNED(txq);
2205
2206	if (txpkts->npkt > 0) {
2207		flits = TXPKTS_PKT_HDR + sgl->nflits;
2208		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2209		    txpkts->nflits + flits <= TX_WR_FLITS &&
2210		    txpkts->nflits + flits <= eq->avail * 8 &&
2211		    txpkts->plen + m->m_pkthdr.len < 65536;
2212
2213		if (can_coalesce) {
2214			txpkts->npkt++;
2215			txpkts->nflits += flits;
2216			txpkts->plen += m->m_pkthdr.len;
2217
2218			txsd = &txq->sdesc[eq->pidx];
2219			txsd->credits++;
2220
2221			return (0);
2222		}
2223
2224		/*
2225		 * Couldn't coalesce m into txpkts.  The first order of business
2226		 * is to send txpkts on its way.  Then we'll revisit m.
2227		 */
2228		write_txpkts_wr(txq, txpkts);
2229	}
2230
2231	/*
2232	 * Check if we can start a new coalesced tx work request with m as
2233	 * the first packet in it.
2234	 */
2235
2236	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
2237
2238	flits = TXPKTS_WR_HDR + sgl->nflits;
2239	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2240	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
2241
2242	if (can_coalesce == 0)
2243		return (EINVAL);
2244
2245	/*
2246	 * Start a fresh coalesced tx WR with m as the first frame in it.
2247	 */
2248	txpkts->npkt = 1;
2249	txpkts->nflits = flits;
2250	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
2251	txpkts->plen = m->m_pkthdr.len;
2252
2253	txsd = &txq->sdesc[eq->pidx];
2254	txsd->credits = 1;
2255
2256	return (0);
2257}
2258
2259/*
2260 * Note that write_txpkts_wr can never run out of hardware descriptors (but
2261 * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
2262 * coalescing only if sufficient hardware descriptors are available.
2263 */
2264static void
2265write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
2266{
2267	struct sge_eq *eq = &txq->eq;
2268	struct fw_eth_tx_pkts_wr *wr;
2269	struct tx_sdesc *txsd;
2270	uint32_t ctrl;
2271	int ndesc;
2272
2273	TXQ_LOCK_ASSERT_OWNED(txq);
2274
2275	ndesc = howmany(txpkts->nflits, 8);
2276
2277	wr = (void *)&eq->desc[eq->pidx];
2278	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
2279	    V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
2280	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
2281	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2282		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2283		eq->flags |= EQ_CRFLUSHED;
2284	}
2285	wr->equiq_to_len16 = htobe32(ctrl);
2286	wr->plen = htobe16(txpkts->plen);
2287	wr->npkt = txpkts->npkt;
2288	wr->r3 = wr->r4 = 0;
2289
2290	/* Everything else already written */
2291
2292	txsd = &txq->sdesc[eq->pidx];
2293	txsd->desc_used = ndesc;
2294
2295	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
2296
2297	eq->pending += ndesc;
2298	eq->avail -= ndesc;
2299	eq->pidx += ndesc;
2300	if (eq->pidx >= eq->cap)
2301		eq->pidx -= eq->cap;
2302
2303	txq->txpkts_pkts += txpkts->npkt;
2304	txq->txpkts_wrs++;
2305	txpkts->npkt = 0;	/* emptied */
2306}
2307
2308static inline void
2309write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
2310    struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
2311{
2312	struct ulp_txpkt *ulpmc;
2313	struct ulptx_idata *ulpsc;
2314	struct cpl_tx_pkt_core *cpl;
2315	struct sge_eq *eq = &txq->eq;
2316	uintptr_t flitp, start, end;
2317	uint64_t ctrl;
2318	caddr_t dst;
2319
2320	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
2321
2322	start = (uintptr_t)eq->desc;
2323	end = (uintptr_t)eq->spg;
2324
2325	/* Checksum offload */
2326	ctrl = 0;
2327	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2328		ctrl |= F_TXPKT_IPCSUM_DIS;
2329	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2330		ctrl |= F_TXPKT_L4CSUM_DIS;
2331	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2332		txq->txcsum++;	/* some hardware assistance provided */
2333
2334	/* VLAN tag insertion */
2335	if (m->m_flags & M_VLANTAG) {
2336		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2337		txq->vlan_insertion++;
2338	}
2339
2340	/*
2341	 * The previous packet's SGL must have ended at a 16 byte boundary (this
2342	 * is required by the firmware/hardware).  It follows that flitp cannot
2343	 * wrap around between the ULPTX master command and ULPTX subcommand (8
2344	 * bytes each), and that it can not wrap around in the middle of the
2345	 * cpl_tx_pkt_core either.
2346	 */
2347	flitp = (uintptr_t)txpkts->flitp;
2348	KASSERT((flitp & 0xf) == 0,
2349	    ("%s: last SGL did not end at 16 byte boundary: %p",
2350	    __func__, txpkts->flitp));
2351
2352	/* ULP master command */
2353	ulpmc = (void *)flitp;
2354	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
2355	    V_ULP_TXPKT_FID(eq->iqid));
2356	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
2357	    sizeof(*cpl) + 8 * sgl->nflits, 16));
2358
2359	/* ULP subcommand */
2360	ulpsc = (void *)(ulpmc + 1);
2361	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
2362	    F_ULP_TX_SC_MORE);
2363	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
2364
2365	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
2366	if (flitp == end)
2367		flitp = start;
2368
2369	/* CPL_TX_PKT */
2370	cpl = (void *)flitp;
2371	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2372	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2373	cpl->pack = 0;
2374	cpl->len = htobe16(m->m_pkthdr.len);
2375	cpl->ctrl1 = htobe64(ctrl);
2376
2377	flitp += sizeof(*cpl);
2378	if (flitp == end)
2379		flitp = start;
2380
2381	/* SGL for this frame */
2382	dst = (caddr_t)flitp;
2383	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
2384	txpkts->flitp = (void *)dst;
2385
2386	KASSERT(((uintptr_t)dst & 0xf) == 0,
2387	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
2388}
2389
2390/*
2391 * If the SGL ends on an address that is not 16 byte aligned, this function will
2392 * add a 0 filled flit at the end.  It returns 1 in that case.
2393 */
2394static int
2395write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
2396{
2397	__be64 *flitp, *end;
2398	struct ulptx_sgl *usgl;
2399	bus_dma_segment_t *seg;
2400	int i, padded;
2401
2402	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
2403	    ("%s: bad SGL - nsegs=%d, nflits=%d",
2404	    __func__, sgl->nsegs, sgl->nflits));
2405
2406	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
2407	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
2408
2409	flitp = (__be64 *)(*to);
2410	end = flitp + sgl->nflits;
2411	seg = &sgl->seg[0];
2412	usgl = (void *)flitp;
2413
2414	/*
2415	 * We start at a 16 byte boundary somewhere inside the tx descriptor
2416	 * ring, so we're at least 16 bytes away from the status page.  There is
2417	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
2418	 */
2419
2420	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
2421	    V_ULPTX_NSGE(sgl->nsegs));
2422	usgl->len0 = htobe32(seg->ds_len);
2423	usgl->addr0 = htobe64(seg->ds_addr);
2424	seg++;
2425
2426	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
2427
2428		/* Won't wrap around at all */
2429
2430		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
2431			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
2432			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
2433		}
2434		if (i & 1)
2435			usgl->sge[i / 2].len[1] = htobe32(0);
2436	} else {
2437
2438		/* Will wrap somewhere in the rest of the SGL */
2439
2440		/* 2 flits already written, write the rest flit by flit */
2441		flitp = (void *)(usgl + 1);
2442		for (i = 0; i < sgl->nflits - 2; i++) {
2443			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
2444				flitp = (void *)eq->desc;
2445			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
2446		}
2447		end = flitp;
2448	}
2449
2450	if ((uintptr_t)end & 0xf) {
2451		*(uint64_t *)end = 0;
2452		end++;
2453		padded = 1;
2454	} else
2455		padded = 0;
2456
2457	if ((uintptr_t)end == (uintptr_t)eq->spg)
2458		*to = (void *)eq->desc;
2459	else
2460		*to = (void *)end;
2461
2462	return (padded);
2463}
2464
2465static inline void
2466copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
2467{
2468	if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
2469		bcopy(from, *to, len);
2470		(*to) += len;
2471	} else {
2472		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
2473
2474		bcopy(from, *to, portion);
2475		from += portion;
2476		portion = len - portion;	/* remaining */
2477		bcopy(from, (void *)eq->desc, portion);
2478		(*to) = (caddr_t)eq->desc + portion;
2479	}
2480}
2481
2482static inline void
2483ring_eq_db(struct adapter *sc, struct sge_eq *eq)
2484{
2485	wmb();
2486	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
2487	    V_QID(eq->cntxt_id) | V_PIDX(eq->pending));
2488	eq->pending = 0;
2489}
2490
2491static inline int
2492reclaimable(struct sge_eq *eq)
2493{
2494	unsigned int cidx;
2495
2496	cidx = eq->spg->cidx;	/* stable snapshot */
2497	cidx = be16_to_cpu(cidx);
2498
2499	if (cidx >= eq->cidx)
2500		return (cidx - eq->cidx);
2501	else
2502		return (cidx + eq->cap - eq->cidx);
2503}
2504
2505/*
2506 * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
2507 * many as possible but stop when there are around "n" mbufs to free.
2508 *
2509 * The actual number reclaimed is provided as the return value.
2510 */
2511static int
2512reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
2513{
2514	struct tx_sdesc *txsd;
2515	struct tx_map *txm;
2516	unsigned int reclaimed, maps;
2517	struct sge_eq *eq = &txq->eq;
2518
2519	EQ_LOCK_ASSERT_OWNED(eq);
2520
2521	if (can_reclaim == 0)
2522		can_reclaim = reclaimable(eq);
2523
2524	maps = reclaimed = 0;
2525	while (can_reclaim && maps < n) {
2526		int ndesc;
2527
2528		txsd = &txq->sdesc[eq->cidx];
2529		ndesc = txsd->desc_used;
2530
2531		/* Firmware doesn't return "partial" credits. */
2532		KASSERT(can_reclaim >= ndesc,
2533		    ("%s: unexpected number of credits: %d, %d",
2534		    __func__, can_reclaim, ndesc));
2535
2536		maps += txsd->credits;
2537
2538		reclaimed += ndesc;
2539		can_reclaim -= ndesc;
2540
2541		eq->cidx += ndesc;
2542		if (__predict_false(eq->cidx >= eq->cap))
2543			eq->cidx -= eq->cap;
2544	}
2545
2546	txm = &txq->maps[txq->map_cidx];
2547	if (maps)
2548		prefetch(txm->m);
2549
2550	eq->avail += reclaimed;
2551	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
2552	    ("%s: too many descriptors available", __func__));
2553
2554	txq->map_avail += maps;
2555	KASSERT(txq->map_avail <= txq->map_total,
2556	    ("%s: too many maps available", __func__));
2557
2558	while (maps--) {
2559		struct tx_map *next;
2560
2561		next = txm + 1;
2562		if (__predict_false(txq->map_cidx + 1 == txq->map_total))
2563			next = txq->maps;
2564		prefetch(next->m);
2565
2566		bus_dmamap_unload(txq->tx_tag, txm->map);
2567		m_freem(txm->m);
2568		txm->m = NULL;
2569
2570		txm = next;
2571		if (__predict_false(++txq->map_cidx == txq->map_total))
2572			txq->map_cidx = 0;
2573	}
2574
2575	return (reclaimed);
2576}
2577
2578static void
2579write_eqflush_wr(struct sge_eq *eq)
2580{
2581	struct fw_eq_flush_wr *wr;
2582
2583	EQ_LOCK_ASSERT_OWNED(eq);
2584	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
2585
2586	wr = (void *)&eq->desc[eq->pidx];
2587	bzero(wr, sizeof(*wr));
2588	wr->opcode = FW_EQ_FLUSH_WR;
2589	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
2590	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
2591
2592	eq->flags |= EQ_CRFLUSHED;
2593	eq->pending++;
2594	eq->avail--;
2595	if (++eq->pidx == eq->cap)
2596		eq->pidx = 0;
2597}
2598
2599static __be64
2600get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
2601{
2602	int i = (idx / 3) * 2;
2603
2604	switch (idx % 3) {
2605	case 0: {
2606		__be64 rc;
2607
2608		rc = htobe32(sgl[i].ds_len);
2609		if (i + 1 < nsegs)
2610			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
2611
2612		return (rc);
2613	}
2614	case 1:
2615		return htobe64(sgl[i].ds_addr);
2616	case 2:
2617		return htobe64(sgl[i + 1].ds_addr);
2618	}
2619
2620	return (0);
2621}
2622
2623static void
2624set_fl_tag_idx(struct sge_fl *fl, int mtu)
2625{
2626	int i;
2627
2628	FL_LOCK_ASSERT_OWNED(fl);
2629
2630	for (i = 0; i < FL_BUF_SIZES - 1; i++) {
2631		if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT))
2632			break;
2633	}
2634
2635	fl->tag_idx = i;
2636}
2637
2638static int
2639handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl)
2640{
2641	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
2642	struct sge *s = &sc->sge;
2643	struct sge_txq *txq;
2644	struct port_info *pi;
2645
2646	txq = (void *)s->eqmap[qid - s->eq_start];
2647	TXQ_LOCK(txq);
2648	if (txq->eq.flags & EQ_CRFLUSHED) {
2649		pi = txq->ifp->if_softc;
2650		taskqueue_enqueue(pi->tq, &txq->resume_tx);
2651		txq->egr_update++;
2652	} else
2653		wakeup_one(txq);	/* txq is going away, wakeup free_txq */
2654	TXQ_UNLOCK(txq);
2655
2656	return (0);
2657}
2658
2659/*
2660 * m0 is freed on successful transmission.
2661 */
2662static int
2663ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0)
2664{
2665	struct sge_eq *eq = &ctrlq->eq;
2666	int rc = 0, ndesc;
2667	int can_reclaim;
2668	caddr_t dst;
2669	struct mbuf *m;
2670
2671	M_ASSERTPKTHDR(m0);
2672
2673	if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) {
2674		ctrlq->too_long++;
2675		return (EMSGSIZE);
2676	}
2677	ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE);
2678
2679	EQ_LOCK(eq);
2680
2681	can_reclaim = reclaimable(eq);
2682	eq->cidx += can_reclaim;
2683	eq->avail += can_reclaim;
2684	if (__predict_false(eq->cidx >= eq->cap))
2685		eq->cidx -= eq->cap;
2686
2687	if (eq->avail < ndesc) {
2688		rc = EAGAIN;
2689		ctrlq->no_desc++;
2690		goto failed;
2691	}
2692
2693	dst = (void *)&eq->desc[eq->pidx];
2694	for (m = m0; m; m = m->m_next)
2695		copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2696
2697	eq->pidx += ndesc;
2698	if (__predict_false(eq->pidx >= eq->cap))
2699		eq->pidx -= eq->cap;
2700
2701	eq->pending += ndesc;
2702	ctrlq->total_wrs++;
2703	ring_eq_db(sc, eq);
2704failed:
2705	EQ_UNLOCK(eq);
2706	if (rc == 0)
2707		m_freem(m0);
2708
2709	return (rc);
2710}
2711