sfxge_tx.c revision 311028
1148330Snetchild/*-
2148330Snetchild * Copyright (c) 2010-2016 Solarflare Communications Inc.
3148330Snetchild * All rights reserved.
4148330Snetchild *
5148330Snetchild * This software was developed in part by Philip Paeps under contract for
6148330Snetchild * Solarflare Communications, Inc.
7148330Snetchild *
8148330Snetchild * Redistribution and use in source and binary forms, with or without
9148330Snetchild * modification, are permitted provided that the following conditions are met:
10148330Snetchild *
11148330Snetchild * 1. Redistributions of source code must retain the above copyright notice,
12148330Snetchild *    this list of conditions and the following disclaimer.
13148330Snetchild * 2. Redistributions in binary form must reproduce the above copyright notice,
14148543Snetchild *    this list of conditions and the following disclaimer in the documentation
15148543Snetchild *    and/or other materials provided with the distribution.
16215669Snetchild *
17215669Snetchild * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18215669Snetchild * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19215669Snetchild * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20215669Snetchild * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21215669Snetchild * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22215669Snetchild * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23215669Snetchild * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24215669Snetchild * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25216179Suqs * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26216179Suqs * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27216179Suqs * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28216179Suqs *
29216179Suqs * The views and conclusions contained in the software and documentation are
30216179Suqs * those of the authors and should not be interpreted as representing official
31216179Suqs * policies, either expressed or implied, of the FreeBSD Project.
32216179Suqs */
33216179Suqs
34216179Suqs/* Theory of operation:
35216179Suqs *
36216179Suqs * Tx queues allocation and mapping
37216179Suqs *
38216179Suqs * One Tx queue with enabled checksum offload is allocated per Rx channel
39216179Suqs * (event queue).  Also 2 Tx queues (one without checksum offload and one
40148330Snetchild * with IP checksum offload only) are allocated and bound to event queue 0.
41241599Sjkim * sfxge_txq_type is used as Tx queue label.
42241599Sjkim *
43238481Sdes * So, event queue plus label mapping to Tx queue index is:
44238481Sdes *	if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES)
45238481Sdes *	else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1
46238481Sdes * See sfxge_get_txq_by_label() sfxge_ev.c
47238481Sdes */
48235633Sdim
49235633Sdim#include <sys/cdefs.h>
50235633Sdim__FBSDID("$FreeBSD: stable/11/sys/dev/sfxge/sfxge_tx.c 311028 2017-01-01 19:33:12Z arybchik $");
51235633Sdim
52235633Sdim#include "opt_rss.h"
53235633Sdim
54235633Sdim#include <sys/param.h>
55235633Sdim#include <sys/malloc.h>
56235633Sdim#include <sys/mbuf.h>
57235633Sdim#include <sys/smp.h>
58235633Sdim#include <sys/socket.h>
59235633Sdim#include <sys/sysctl.h>
60235633Sdim#include <sys/syslog.h>
61235633Sdim#include <sys/limits.h>
62235633Sdim
63235633Sdim#include <net/bpf.h>
64225583Slstewart#include <net/ethernet.h>
65225583Slstewart#include <net/if.h>
66225583Slstewart#include <net/if_vlan_var.h>
67225419Skib
68225419Skib#include <netinet/in.h>
69225419Skib#include <netinet/ip.h>
70225419Skib#include <netinet/ip6.h>
71225227Skib#include <netinet/tcp.h>
72225415Skib
73225415Skib#ifdef RSS
74225227Skib#include <net/rss_config.h>
75225227Skib#endif
76225227Skib
77225227Skib#include "common/efx.h"
78225227Skib
79225227Skib#include "sfxge.h"
80225415Skib#include "sfxge_tx.h"
81225415Skib
82225227Skib
83225227Skib#define	SFXGE_PARAM_TX_DPL_GET_MAX	SFXGE_PARAM(tx_dpl_get_max)
84225227Skibstatic int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
85225227SkibTUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max);
86225227SkibSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN,
87225227Skib	   &sfxge_tx_dpl_get_max, 0,
88233504Spluknet	   "Maximum number of any packets in deferred packet get-list");
89233504Spluknet
90233504Spluknet#define	SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \
91233504Spluknet	SFXGE_PARAM(tx_dpl_get_non_tcp_max)
92233504Spluknetstatic int sfxge_tx_dpl_get_non_tcp_max =
93225528Spluknet	SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT;
94225528SpluknetTUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max);
95225528SpluknetSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN,
96233504Spluknet	   &sfxge_tx_dpl_get_non_tcp_max, 0,
97233504Spluknet	   "Maximum number of non-TCP packets in deferred packet get-list");
98225528Spluknet
99225528Spluknet#define	SFXGE_PARAM_TX_DPL_PUT_MAX	SFXGE_PARAM(tx_dpl_put_max)
100233504Spluknetstatic int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT;
101233504SpluknetTUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max);
102233504SpluknetSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
103233504Spluknet	   &sfxge_tx_dpl_put_max, 0,
104223894Salc	   "Maximum number of any packets in deferred packet put-list");
105223894Salc
106223892Salc#define	SFXGE_PARAM_TSO_FW_ASSISTED	SFXGE_PARAM(tso_fw_assisted)
107223892Salcstatic int sfxge_tso_fw_assisted = (SFXGE_FATSOV1 | SFXGE_FATSOV2);
108223892SalcTUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted);
109223892SalcSYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN,
110223852Srmacklem	   &sfxge_tso_fw_assisted, 0,
111223852Srmacklem	   "Bitmask of FW-assisted TSO allowed to use if supported by NIC firmware");
112223802Srmacklem
113223802Srmacklem
114223802Srmacklemstatic const struct {
115223794Srmacklem	const char *name;
116223794Srmacklem	size_t offset;
117223794Srmacklem} sfxge_tx_stats[] = {
118223794Srmacklem#define	SFXGE_TX_STAT(name, member) \
119223908Santoine	{ #name, offsetof(struct sfxge_txq, member) }
120223908Santoine	SFXGE_TX_STAT(tso_bursts, tso_bursts),
121223908Santoine	SFXGE_TX_STAT(tso_packets, tso_packets),
122223908Santoine	SFXGE_TX_STAT(tso_long_headers, tso_long_headers),
123223908Santoine	SFXGE_TX_STAT(tso_pdrop_too_many, tso_pdrop_too_many),
124223908Santoine	SFXGE_TX_STAT(tso_pdrop_no_rsrc, tso_pdrop_no_rsrc),
125223908Santoine	SFXGE_TX_STAT(tx_collapses, collapses),
126223908Santoine	SFXGE_TX_STAT(tx_drops, drops),
127225510Spluknet	SFXGE_TX_STAT(tx_get_overflow, get_overflow),
128225510Spluknet	SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow),
129225510Spluknet	SFXGE_TX_STAT(tx_put_overflow, put_overflow),
130225510Spluknet	SFXGE_TX_STAT(tx_netdown_drops, netdown_drops),
131223908Santoine};
132223908Santoine
133225510Spluknet
134221345Sdim/* Forward declarations. */
135221345Sdimstatic void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
136221345Sdimstatic void sfxge_tx_qlist_post(struct sfxge_txq *txq);
137221345Sdimstatic void sfxge_tx_qunblock(struct sfxge_txq *txq);
138221345Sdimstatic int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
139221345Sdim			      const bus_dma_segment_t *dma_seg, int n_dma_seg,
140221345Sdim			      int vlan_tagged);
141221345Sdim
142220755Sdimstatic int
143220755Sdimsfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf)
144220755Sdim{
145220755Sdim	uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ?
146220755Sdim			     mbuf->m_pkthdr.ether_vtag :
147220755Sdim			     0);
148220755Sdim
149220755Sdim	if (this_tag == txq->hw_vlan_tci)
150220755Sdim		return (0);
151220755Sdim
152220755Sdim	efx_tx_qdesc_vlantci_create(txq->common,
153220755Sdim				    bswap16(this_tag),
154220755Sdim				    &txq->pend_desc[0]);
155220755Sdim	txq->n_pend_desc = 1;
156220755Sdim	txq->hw_vlan_tci = this_tag;
157220755Sdim	return (1);
158220755Sdim}
159220755Sdim
160220755Sdimstatic inline void
161220755Sdimsfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp)
162225510Spluknet{
163225510Spluknet	KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0"));
164225510Spluknet	if (__predict_false(*pstmp ==
165225510Spluknet			    &txq->stmp[txq->ptr_mask]))
166223908Santoine		*pstmp = &txq->stmp[0];
167223908Santoine	else
168219005Sremko		(*pstmp)++;
169219005Sremko}
170218893Sdim
171218893Sdim
172218893Sdimvoid
173218893Sdimsfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
174218893Sdim{
175218893Sdim	unsigned int completed;
176218893Sdim
177218893Sdim	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
178223908Santoine
179223908Santoine	completed = txq->completed;
180218202Suqs	while (completed != txq->pending) {
181218202Suqs		struct sfxge_tx_mapping *stmp;
182218202Suqs		unsigned int id;
183218202Suqs
184217309Snwhitehorn		id = completed++ & txq->ptr_mask;
185217309Snwhitehorn
186217309Snwhitehorn		stmp = &txq->stmp[id];
187217309Snwhitehorn		if (stmp->flags & TX_BUF_UNMAP) {
188217309Snwhitehorn			bus_dmamap_unload(txq->packet_dma_tag, stmp->map);
189217309Snwhitehorn			if (stmp->flags & TX_BUF_MBUF) {
190217309Snwhitehorn				struct mbuf *m = stmp->u.mbuf;
191217309Snwhitehorn				do
192217309Snwhitehorn					m = m_free(m);
193217309Snwhitehorn				while (m != NULL);
194217309Snwhitehorn			} else {
195217309Snwhitehorn				free(stmp->u.heap_buf, M_SFXGE);
196217309Snwhitehorn			}
197217309Snwhitehorn			stmp->flags = 0;
198217309Snwhitehorn		}
199217309Snwhitehorn	}
200217309Snwhitehorn	txq->completed = completed;
201217309Snwhitehorn
202217309Snwhitehorn	/* Check whether we need to unblock the queue. */
203217309Snwhitehorn	mb();
204217309Snwhitehorn	if (txq->blocked) {
205217309Snwhitehorn		unsigned int level;
206217309Snwhitehorn
207217309Snwhitehorn		level = txq->added - txq->completed;
208217309Snwhitehorn		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries))
209217309Snwhitehorn			sfxge_tx_qunblock(txq);
210217309Snwhitehorn	}
211217309Snwhitehorn}
212217309Snwhitehorn
213217309Snwhitehornstatic unsigned int
214217309Snwhitehornsfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
215217309Snwhitehorn{
216217309Snwhitehorn	/* Absence of TCP checksum flags does not mean that it is non-TCP
217217309Snwhitehorn	 * but it should be true if user wants to achieve high throughput.
218217309Snwhitehorn	 */
219217309Snwhitehorn	return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)));
220217309Snwhitehorn}
221217309Snwhitehorn
222217309Snwhitehorn/*
223217309Snwhitehorn * Reorder the put list and append it to the get list.
224217309Snwhitehorn */
225217309Snwhitehornstatic void
226217309Snwhitehornsfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
227217309Snwhitehorn{
228217309Snwhitehorn	struct sfxge_tx_dpl *stdp;
229216187Sdougb	struct mbuf *mbuf, *get_next, **get_tailp;
230216187Sdougb	volatile uintptr_t *putp;
231215176Spluknet	uintptr_t put;
232215176Spluknet	unsigned int count;
233215150Sdim	unsigned int non_tcp_count;
234215150Sdim
235223908Santoine	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
236223908Santoine
237216179Suqs	stdp = &txq->dpl;
238215051Sattilio
239215051Sattilio	/* Acquire the put list. */
240215051Sattilio	putp = &stdp->std_put;
241216179Suqs	put = atomic_readandclear_ptr(putp);
242214738Sjhb	mbuf = (void *)put;
243214738Sjhb
244214738Sjhb	if (mbuf == NULL)
245214738Sjhb		return;
246214097Savg
247214097Savg	/* Reverse the put list. */
248218202Suqs	get_tailp = &mbuf->m_nextpkt;
249218202Suqs	get_next = NULL;
250213700Smm
251213700Smm	count = 0;
252213644Sgordon	non_tcp_count = 0;
253213644Sgordon	do {
254213644Sgordon		struct mbuf *put_next;
255212435Smdf
256212435Smdf		non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf);
257211353Sbrueffer		put_next = mbuf->m_nextpkt;
258211353Sbrueffer		mbuf->m_nextpkt = get_next;
259210958Sed		get_next = mbuf;
260210958Sed		mbuf = put_next;
261210958Sed
262210958Sed		count++;
263210958Sed	} while (mbuf != NULL);
264213387Santoine
265213387Santoine	if (count > stdp->std_put_hiwat)
266213387Santoine		stdp->std_put_hiwat = count;
267210538Srpaulo
268210538Srpaulo	/* Append the reversed put list to the get list. */
269213387Santoine	KASSERT(*get_tailp == NULL, ("*get_tailp != NULL"));
270213387Santoine	*stdp->std_getp = get_next;
271210299Sed	stdp->std_getp = get_tailp;
272210299Sed	stdp->std_get_count += count;
273210299Sed	stdp->std_get_non_tcp_count += non_tcp_count;
274210299Sed}
275210299Sed
276210299Sedstatic void
277210299Sedsfxge_tx_qreap(struct sfxge_txq *txq)
278210299Sed{
279209973Simp	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
280209973Simp
281209640Smarcel	txq->reaped = txq->completed;
282209640Smarcel}
283209640Smarcel
284209640Smarcelstatic void
285208099Sdelphijsfxge_tx_qlist_post(struct sfxge_txq *txq)
286208099Sdelphij{
287208099Sdelphij	unsigned int old_added;
288207995Sobrien	unsigned int block_level;
289207995Sobrien	unsigned int level;
290207995Sobrien	int rc;
291207995Sobrien
292207995Sobrien	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
293207995Sobrien
294207995Sobrien	KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0"));
295207995Sobrien	KASSERT(txq->n_pend_desc <= txq->max_pkt_desc,
296207995Sobrien		("txq->n_pend_desc too large"));
297207995Sobrien	KASSERT(!txq->blocked, ("txq->blocked"));
298207995Sobrien
299207995Sobrien	old_added = txq->added;
300207995Sobrien
301207995Sobrien	/* Post the fragment list. */
302207995Sobrien	rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc,
303207995Sobrien			  txq->reaped, &txq->added);
304207995Sobrien	KASSERT(rc == 0, ("efx_tx_qdesc_post() failed"));
305207995Sobrien
306206717Sjmallett	/* If efx_tx_qdesc_post() had to refragment, our information about
307206717Sjmallett	 * buffers to free may be associated with the wrong
308206717Sjmallett	 * descriptors.
309206717Sjmallett	 */
310206695Sjmallett	KASSERT(txq->added - old_added == txq->n_pend_desc,
311206695Sjmallett		("efx_tx_qdesc_post() refragmented descriptors"));
312206695Sjmallett
313206695Sjmallett	level = txq->added - txq->reaped;
314206695Sjmallett	KASSERT(level <= txq->entries, ("overfilled TX queue"));
315206695Sjmallett
316206695Sjmallett	/* Clear the fragment list. */
317206695Sjmallett	txq->n_pend_desc = 0;
318205724Smarcel
319205724Smarcel	/*
320205724Smarcel	 * Set the block level to ensure there is space to generate a
321205724Smarcel	 * large number of descriptors for TSO.
322205702Sdelphij	 */
323205702Sdelphij	block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc;
324205702Sdelphij
325205702Sdelphij	/* Have we reached the block level? */
326205471Sdelphij	if (level < block_level)
327205471Sdelphij		return;
328205471Sdelphij
329205146Sed	/* Reap, and check again */
330205146Sed	sfxge_tx_qreap(txq);
331205146Sed	level = txq->added - txq->reaped;
332205146Sed	if (level < block_level)
333204649Sed		return;
334204649Sed
335204426Smarcel	txq->blocked = 1;
336204426Smarcel
337204426Smarcel	/*
338204426Smarcel	 * Avoid a race with completion interrupt handling that could leave
339204426Smarcel	 * the queue blocked.
340204418Santoine	 */
341204418Santoine	mb();
342204418Santoine	sfxge_tx_qreap(txq);
343204418Santoine	level = txq->added - txq->reaped;
344204418Santoine	if (level < block_level) {
345204418Santoine		mb();
346204418Santoine		txq->blocked = 0;
347204418Santoine	}
348204418Santoine}
349204418Santoine
350202843Sdelphijstatic int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
351202843Sdelphij{
352202843Sdelphij	bus_dmamap_t *used_map;
353202719Sgabor	bus_dmamap_t map;
354202719Sgabor	bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG];
355202719Sgabor	unsigned int id;
356202719Sgabor	struct sfxge_tx_mapping *stmp;
357202719Sgabor	efx_desc_t *desc;
358202719Sgabor	int n_dma_seg;
359202719Sgabor	int rc;
360202274Sed	int i;
361202274Sed	int eop;
362202215Sed	int vlan_tagged;
363202216Sed
364202216Sed	KASSERT(!txq->blocked, ("txq->blocked"));
365202216Sed
366202215Sed	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO)
367202215Sed		prefetch_read_many(mbuf->m_data);
368202215Sed
369202215Sed	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) {
370202215Sed		rc = EINTR;
371202215Sed		goto reject;
372202215Sed	}
373202217Sed
374202217Sed	/* Load the packet for DMA. */
375202217Sed	id = txq->added & txq->ptr_mask;
376202216Sed	stmp = &txq->stmp[id];
377234698Sru	rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map,
378201546Sdavidxu				     mbuf, dma_seg, &n_dma_seg, 0);
379201546Sdavidxu	if (rc == EFBIG) {
380201447Santoine		/* Try again. */
381201447Santoine		struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT,
382201447Santoine						   SFXGE_TX_MAPPING_MAX_SEG);
383201213Strasz		if (new_mbuf == NULL)
384201213Strasz			goto reject;
385201213Strasz		++txq->collapses;
386201213Strasz		mbuf = new_mbuf;
387201213Strasz		rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag,
388201213Strasz					     stmp->map, mbuf,
389201213Strasz					     dma_seg, &n_dma_seg, 0);
390201213Strasz	}
391201213Strasz	if (rc != 0)
392201213Strasz		goto reject;
393201213Strasz
394201213Strasz	/* Make the packet visible to the hardware. */
395201213Strasz	bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE);
396201213Strasz
397201213Strasz	used_map = &stmp->map;
398201213Strasz
399201213Strasz	vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf);
400201213Strasz	if (vlan_tagged) {
401201213Strasz		sfxge_next_stmp(txq, &stmp);
402201213Strasz	}
403201213Strasz	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
404201213Strasz		rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged);
405201213Strasz		if (rc < 0)
406200028Sume			goto reject_mapped;
407200028Sume		stmp = &txq->stmp[(rc - 1) & txq->ptr_mask];
408200028Sume	} else {
409200130Santoine		/* Add the mapping to the fragment list, and set flags
410199463Sdelphij		 * for the buffer.
411199463Sdelphij		 */
412200130Santoine
413200130Santoine		i = 0;
414198538Skib		for (;;) {
415198538Skib			desc = &txq->pend_desc[i + vlan_tagged];
416198443Santoine			eop = (i == n_dma_seg - 1);
417198443Santoine			efx_tx_qdesc_dma_create(txq->common,
418198443Santoine						dma_seg[i].ds_addr,
419197081Sdelphij						dma_seg[i].ds_len,
420197081Sdelphij						eop,
421197081Sdelphij						desc);
422196787Sremko			if (eop)
423196787Sremko				break;
424196787Sremko			i++;
425196787Sremko			sfxge_next_stmp(txq, &stmp);
426196787Sremko		}
427196768Sflz		txq->n_pend_desc = n_dma_seg + vlan_tagged;
428196768Sflz	}
429196768Sflz
430196768Sflz	/*
431198443Santoine	 * If the mapping required more than one descriptor
432198443Santoine	 * then we need to associate the DMA map with the last
433198443Santoine	 * descriptor, not the first.
434198443Santoine	 */
435198443Santoine	if (used_map != &stmp->map) {
436198443Santoine		map = stmp->map;
437198443Santoine		stmp->map = *used_map;
438198443Santoine		*used_map = map;
439198443Santoine	}
440198443Santoine
441198443Santoine	stmp->u.mbuf = mbuf;
442198443Santoine	stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF;
443198443Santoine
444198443Santoine	/* Post the fragment list. */
445198443Santoine	sfxge_tx_qlist_post(txq);
446198443Santoine
447198443Santoine	return (0);
448198443Santoine
449198443Santoinereject_mapped:
450198443Santoine	bus_dmamap_unload(txq->packet_dma_tag, *used_map);
451198443Santoinereject:
452198443Santoine	/* Drop the packet on the floor. */
453198443Santoine	m_freem(mbuf);
454198443Santoine	++txq->drops;
455198443Santoine
456198443Santoine	return (rc);
457198443Santoine}
458198443Santoine
459198443Santoine/*
460198443Santoine * Drain the deferred packet list into the transmit queue.
461198443Santoine */
462198443Santoinestatic void
463198443Santoinesfxge_tx_qdpl_drain(struct sfxge_txq *txq)
464198443Santoine{
465198443Santoine	struct sfxge_softc *sc;
466198443Santoine	struct sfxge_tx_dpl *stdp;
467198443Santoine	struct mbuf *mbuf, *next;
468198443Santoine	unsigned int count;
469198443Santoine	unsigned int non_tcp_count;
470198443Santoine	unsigned int pushed;
471198443Santoine	int rc;
472198443Santoine
473198443Santoine	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
474196019Srwatson
475196019Srwatson	sc = txq->sc;
476216483Ssyrinx	stdp = &txq->dpl;
477216483Ssyrinx	pushed = txq->added;
478195767Skensmith
479195767Skensmith	if (__predict_true(txq->init_state == SFXGE_TXQ_STARTED)) {
480195767Skensmith		prefetch_read_many(sc->enp);
481195767Skensmith		prefetch_read_many(txq->common);
482195767Skensmith	}
483195767Skensmith
484195767Skensmith	mbuf = stdp->std_get;
485195767Skensmith	count = stdp->std_get_count;
486195767Skensmith	non_tcp_count = stdp->std_get_non_tcp_count;
487195767Skensmith
488195767Skensmith	if (count > stdp->std_get_hiwat)
489195767Skensmith		stdp->std_get_hiwat = count;
490195767Skensmith
491195767Skensmith	while (count != 0) {
492195767Skensmith		KASSERT(mbuf != NULL, ("mbuf == NULL"));
493195767Skensmith
494195767Skensmith		next = mbuf->m_nextpkt;
495195767Skensmith		mbuf->m_nextpkt = NULL;
496195767Skensmith
497195767Skensmith		ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */
498195767Skensmith
499195767Skensmith		if (next != NULL)
500195767Skensmith			prefetch_read_many(next);
501195767Skensmith
502195767Skensmith		rc = sfxge_tx_queue_mbuf(txq, mbuf);
503195767Skensmith		--count;
504195767Skensmith		non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf);
505195767Skensmith		mbuf = next;
506195767Skensmith		if (rc != 0)
507195767Skensmith			continue;
508195767Skensmith
509195767Skensmith		if (txq->blocked)
510195767Skensmith			break;
511195767Skensmith
512195767Skensmith		/* Push the fragments to the hardware in batches. */
513195767Skensmith		if (txq->added - pushed >= SFXGE_TX_BATCH) {
514195767Skensmith			efx_tx_qpush(txq->common, txq->added, pushed);
515195767Skensmith			pushed = txq->added;
516195767Skensmith		}
517195767Skensmith	}
518195767Skensmith
519195767Skensmith	if (count == 0) {
520195767Skensmith		KASSERT(mbuf == NULL, ("mbuf != NULL"));
521195767Skensmith		KASSERT(non_tcp_count == 0,
522195767Skensmith			("inconsistent TCP/non-TCP detection"));
523195767Skensmith		stdp->std_get = NULL;
524195767Skensmith		stdp->std_get_count = 0;
525195767Skensmith		stdp->std_get_non_tcp_count = 0;
526195767Skensmith		stdp->std_getp = &stdp->std_get;
527195767Skensmith	} else {
528195767Skensmith		stdp->std_get = mbuf;
529195767Skensmith		stdp->std_get_count = count;
530195767Skensmith		stdp->std_get_non_tcp_count = non_tcp_count;
531195767Skensmith	}
532195767Skensmith
533195767Skensmith	if (txq->added != pushed)
534195767Skensmith		efx_tx_qpush(txq->common, txq->added, pushed);
535195767Skensmith
536195767Skensmith	KASSERT(txq->blocked || stdp->std_get_count == 0,
537195767Skensmith		("queue unblocked but count is non-zero"));
538195767Skensmith}
539195767Skensmith
540195767Skensmith#define	SFXGE_TX_QDPL_PENDING(_txq)	((_txq)->dpl.std_put != 0)
541195767Skensmith
542195767Skensmith/*
543195767Skensmith * Service the deferred packet list.
544195767Skensmith *
545195767Skensmith * NOTE: drops the txq mutex!
546195767Skensmith */
547195767Skensmithstatic void
548195767Skensmithsfxge_tx_qdpl_service(struct sfxge_txq *txq)
549195767Skensmith{
550195767Skensmith	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
551195767Skensmith
552195789Santoine	do {
553195767Skensmith		if (SFXGE_TX_QDPL_PENDING(txq))
554195767Skensmith			sfxge_tx_qdpl_swizzle(txq);
555195767Skensmith
556195767Skensmith		if (!txq->blocked)
557195767Skensmith			sfxge_tx_qdpl_drain(txq);
558195767Skensmith
559195767Skensmith		SFXGE_TXQ_UNLOCK(txq);
560195767Skensmith	} while (SFXGE_TX_QDPL_PENDING(txq) &&
561195767Skensmith		 SFXGE_TXQ_TRYLOCK(txq));
562195767Skensmith}
563195767Skensmith
564195767Skensmith/*
565195767Skensmith * Put a packet on the deferred packet get-list.
566195767Skensmith */
567195767Skensmithstatic int
568195767Skensmithsfxge_tx_qdpl_put_locked(struct sfxge_txq *txq, struct mbuf *mbuf)
569195767Skensmith{
570195767Skensmith	struct sfxge_tx_dpl *stdp;
571195767Skensmith
572195767Skensmith	stdp = &txq->dpl;
573195767Skensmith
574195767Skensmith	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
575195767Skensmith
576195767Skensmith	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
577195767Skensmith
578195767Skensmith	if (stdp->std_get_count >= stdp->std_get_max) {
579195767Skensmith		txq->get_overflow++;
580195767Skensmith		return (ENOBUFS);
581195767Skensmith	}
582195767Skensmith	if (sfxge_is_mbuf_non_tcp(mbuf)) {
583195767Skensmith		if (stdp->std_get_non_tcp_count >=
584195767Skensmith		    stdp->std_get_non_tcp_max) {
585195767Skensmith			txq->get_non_tcp_overflow++;
586195767Skensmith			return (ENOBUFS);
587195767Skensmith		}
588195767Skensmith		stdp->std_get_non_tcp_count++;
589195767Skensmith	}
590195767Skensmith
591195767Skensmith	*(stdp->std_getp) = mbuf;
592195767Skensmith	stdp->std_getp = &mbuf->m_nextpkt;
593195767Skensmith	stdp->std_get_count++;
594195767Skensmith
595195767Skensmith	return (0);
596195767Skensmith}
597195767Skensmith
598195767Skensmith/*
599195767Skensmith * Put a packet on the deferred packet put-list.
600195767Skensmith *
601195767Skensmith * We overload the csum_data field in the mbuf to keep track of this length
602195767Skensmith * because there is no cheap alternative to avoid races.
603195767Skensmith */
604195767Skensmithstatic int
605195767Skensmithsfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf)
606195767Skensmith{
607195767Skensmith	struct sfxge_tx_dpl *stdp;
608195767Skensmith	volatile uintptr_t *putp;
609195767Skensmith	uintptr_t old;
610195767Skensmith	uintptr_t new;
611195767Skensmith	unsigned int put_count;
612195767Skensmith
613195767Skensmith	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
614195767Skensmith
615195767Skensmith	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
616195767Skensmith
617195767Skensmith	stdp = &txq->dpl;
618195767Skensmith	putp = &stdp->std_put;
619195767Skensmith	new = (uintptr_t)mbuf;
620195767Skensmith
621195767Skensmith	do {
622195767Skensmith		old = *putp;
623195767Skensmith		if (old != 0) {
624195767Skensmith			struct mbuf *mp = (struct mbuf *)old;
625195767Skensmith			put_count = mp->m_pkthdr.csum_data;
626195767Skensmith		} else
627195767Skensmith			put_count = 0;
628195767Skensmith		if (put_count >= stdp->std_put_max) {
629195767Skensmith			atomic_add_long(&txq->put_overflow, 1);
630195767Skensmith			return (ENOBUFS);
631195767Skensmith		}
632195767Skensmith		mbuf->m_pkthdr.csum_data = put_count + 1;
633195767Skensmith		mbuf->m_nextpkt = (void *)old;
634195767Skensmith	} while (atomic_cmpset_ptr(putp, old, new) == 0);
635195767Skensmith
636195767Skensmith	return (0);
637195767Skensmith}
638195767Skensmith
639195767Skensmith/*
640195767Skensmith * Called from if_transmit - will try to grab the txq lock and enqueue to the
641195767Skensmith * put list if it succeeds, otherwise try to push onto the defer list if space.
642195767Skensmith */
643195767Skensmithstatic int
644195767Skensmithsfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
645195767Skensmith{
646195767Skensmith	int rc;
647195767Skensmith
648195767Skensmith	if (!SFXGE_LINK_UP(txq->sc)) {
649195767Skensmith		atomic_add_long(&txq->netdown_drops, 1);
650195767Skensmith		return (ENETDOWN);
651195767Skensmith	}
652195767Skensmith
653195767Skensmith	/*
654195767Skensmith	 * Try to grab the txq lock.  If we are able to get the lock,
655195767Skensmith	 * the packet will be appended to the "get list" of the deferred
656195767Skensmith	 * packet list.  Otherwise, it will be pushed on the "put list".
657195767Skensmith	 */
658195767Skensmith	if (SFXGE_TXQ_TRYLOCK(txq)) {
659195767Skensmith		/* First swizzle put-list to get-list to keep order */
660195767Skensmith		sfxge_tx_qdpl_swizzle(txq);
661195767Skensmith
662195767Skensmith		rc = sfxge_tx_qdpl_put_locked(txq, m);
663195767Skensmith
664195767Skensmith		/* Try to service the list. */
665195767Skensmith		sfxge_tx_qdpl_service(txq);
666195767Skensmith		/* Lock has been dropped. */
667195767Skensmith	} else {
668195767Skensmith		rc = sfxge_tx_qdpl_put_unlocked(txq, m);
669195767Skensmith
670195767Skensmith		/*
671195767Skensmith		 * Try to grab the lock again.
672195767Skensmith		 *
673195767Skensmith		 * If we are able to get the lock, we need to process
674195767Skensmith		 * the deferred packet list.  If we are not able to get
675195767Skensmith		 * the lock, another thread is processing the list.
676195767Skensmith		 */
677195767Skensmith		if ((rc == 0) && SFXGE_TXQ_TRYLOCK(txq)) {
678195767Skensmith			sfxge_tx_qdpl_service(txq);
679195767Skensmith			/* Lock has been dropped. */
680195767Skensmith		}
681195767Skensmith	}
682195767Skensmith
683195767Skensmith	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
684195767Skensmith
685195754Smarcus	return (rc);
686195754Smarcus}
687237296Sbrueffer
688237296Sbruefferstatic void
689195699Srwatsonsfxge_tx_qdpl_flush(struct sfxge_txq *txq)
690195699Srwatson{
691195699Srwatson	struct sfxge_tx_dpl *stdp = &txq->dpl;
692195699Srwatson	struct mbuf *mbuf, *next;
693195789Santoine
694195789Santoine	SFXGE_TXQ_LOCK(txq);
695195789Santoine
696195789Santoine	sfxge_tx_qdpl_swizzle(txq);
697195656Strasz	for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) {
698195656Strasz		next = mbuf->m_nextpkt;
699195656Strasz		m_freem(mbuf);
700195656Strasz	}
701195656Strasz	stdp->std_get = NULL;
702195230Sdfr	stdp->std_get_count = 0;
703195230Sdfr	stdp->std_get_non_tcp_count = 0;
704194860Sthompsa	stdp->std_getp = &stdp->std_get;
705195096Santoine
706195096Santoine	SFXGE_TXQ_UNLOCK(txq);
707195096Santoine}
708195096Santoine
709195096Santoinevoid
710195096Santoinesfxge_if_qflush(struct ifnet *ifp)
711195096Santoine{
712195096Santoine	struct sfxge_softc *sc;
713195096Santoine	unsigned int i;
714195096Santoine
715195096Santoine	sc = ifp->if_softc;
716195096Santoine
717195096Santoine	for (i = 0; i < sc->txq_count; i++)
718195096Santoine		sfxge_tx_qdpl_flush(sc->txq[i]);
719195096Santoine}
720195096Santoine
721195096Santoine#if SFXGE_TX_PARSE_EARLY
722195096Santoine
723195096Santoine/* There is little space for user data in mbuf pkthdr, so we
724195096Santoine * use l*hlen fields which are not used by the driver otherwise
725195096Santoine * to store header offsets.
726195096Santoine * The fields are 8-bit, but it's ok, no header may be longer than 255 bytes.
727195096Santoine */
728195096Santoine
729195096Santoine
730195096Santoine#define TSO_MBUF_PROTO(_mbuf)    ((_mbuf)->m_pkthdr.PH_loc.sixteen[0])
731195096Santoine/* We abuse l5hlen here because PH_loc can hold only 64 bits of data */
732195096Santoine#define TSO_MBUF_FLAGS(_mbuf)    ((_mbuf)->m_pkthdr.l5hlen)
733195096Santoine#define TSO_MBUF_PACKETID(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1])
734195096Santoine#define TSO_MBUF_SEQNUM(_mbuf)   ((_mbuf)->m_pkthdr.PH_loc.thirtytwo[1])
735195096Santoine
736195096Santoinestatic void sfxge_parse_tx_packet(struct mbuf *mbuf)
737195096Santoine{
738195096Santoine	struct ether_header *eh = mtod(mbuf, struct ether_header *);
739195096Santoine	const struct tcphdr *th;
740195096Santoine	struct tcphdr th_copy;
741195096Santoine
742195096Santoine	/* Find network protocol and header */
743195096Santoine	TSO_MBUF_PROTO(mbuf) = eh->ether_type;
744195096Santoine	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_VLAN)) {
745195096Santoine		struct ether_vlan_header *veh =
746195096Santoine			mtod(mbuf, struct ether_vlan_header *);
747195096Santoine		TSO_MBUF_PROTO(mbuf) = veh->evl_proto;
748195096Santoine		mbuf->m_pkthdr.l2hlen = sizeof(*veh);
749195096Santoine	} else {
750195096Santoine		mbuf->m_pkthdr.l2hlen = sizeof(*eh);
751195096Santoine	}
752195096Santoine
753195096Santoine	/* Find TCP header */
754195096Santoine	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IP)) {
755195096Santoine		const struct ip *iph = (const struct ip *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen);
756195096Santoine
757195096Santoine		KASSERT(iph->ip_p == IPPROTO_TCP,
758195096Santoine			("TSO required on non-TCP packet"));
759195096Santoine		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + 4 * iph->ip_hl;
760195096Santoine		TSO_MBUF_PACKETID(mbuf) = iph->ip_id;
761195096Santoine	} else {
762195096Santoine		KASSERT(TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IPV6),
763195096Santoine			("TSO required on non-IP packet"));
764193513Sed		KASSERT(((const struct ip6_hdr *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen))->ip6_nxt ==
765193513Sed			IPPROTO_TCP,
766193308Sed			("TSO required on non-TCP packet"));
767193308Sed		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + sizeof(struct ip6_hdr);
768193308Sed		TSO_MBUF_PACKETID(mbuf) = 0;
769195096Santoine	}
770195096Santoine
771193113Sdougb	KASSERT(mbuf->m_len >= mbuf->m_pkthdr.l3hlen,
772193113Sdougb		("network header is fragmented in mbuf"));
773192926Sed
774192926Sed	/* We need TCP header including flags (window is the next) */
775192926Sed	if (mbuf->m_len < mbuf->m_pkthdr.l3hlen + offsetof(struct tcphdr, th_win)) {
776192901Sthompsa		m_copydata(mbuf, mbuf->m_pkthdr.l3hlen, sizeof(th_copy),
777192901Sthompsa			   (caddr_t)&th_copy);
778192901Sthompsa		th = &th_copy;
779192901Sthompsa	} else {
780192901Sthompsa		th = (const struct tcphdr *)mtodo(mbuf, mbuf->m_pkthdr.l3hlen);
781192901Sthompsa	}
782192901Sthompsa
783192901Sthompsa	mbuf->m_pkthdr.l4hlen = mbuf->m_pkthdr.l3hlen + 4 * th->th_off;
784192901Sthompsa	TSO_MBUF_SEQNUM(mbuf) = ntohl(th->th_seq);
785192901Sthompsa
786192901Sthompsa	/* These flags must not be duplicated */
787192901Sthompsa	/*
788192901Sthompsa	 * RST should not be duplicated as well, but FreeBSD kernel
789192901Sthompsa	 * generates TSO packets with RST flag. So, do not assert
790192901Sthompsa	 * its absence.
791192901Sthompsa	 */
792192901Sthompsa	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
793192901Sthompsa		("incompatible TCP flag 0x%x on TSO packet",
794192901Sthompsa		 th->th_flags & (TH_URG | TH_SYN)));
795192901Sthompsa	TSO_MBUF_FLAGS(mbuf) = th->th_flags;
796192901Sthompsa}
797192901Sthompsa#endif
798192901Sthompsa
799192901Sthompsa/*
800192901Sthompsa * TX start -- called by the stack.
801192901Sthompsa */
802192901Sthompsaint
803192901Sthompsasfxge_if_transmit(struct ifnet *ifp, struct mbuf *m)
804192901Sthompsa{
805192901Sthompsa	struct sfxge_softc *sc;
806192901Sthompsa	struct sfxge_txq *txq;
807192901Sthompsa	int rc;
808192901Sthompsa
809192916Sdougb	sc = (struct sfxge_softc *)ifp->if_softc;
810195096Santoine
811192916Sdougb	/*
812192580Srwatson	 * Transmit may be called when interface is up from the kernel
813192580Srwatson	 * point of view, but not yet up (in progress) from the driver
814192580Srwatson	 * point of view. I.e. link aggregation bring up.
815192580Srwatson	 * Transmit may be called when interface is up from the driver
816192650Santoine	 * point of view, but already down from the kernel point of
817192580Srwatson	 * view. I.e. Rx when interface shutdown is in progress.
818192650Santoine	 */
819192650Santoine	KASSERT((ifp->if_flags & IFF_UP) || (sc->if_flags & IFF_UP),
820192650Santoine		("interface not up"));
821191250Santoine
822191250Santoine	/* Pick the desired transmit queue. */
823191250Santoine	if (m->m_pkthdr.csum_flags &
824191250Santoine	    (CSUM_DELAY_DATA | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO)) {
825191250Santoine		int index = 0;
826191250Santoine
827191250Santoine#ifdef RSS
828191250Santoine		uint32_t bucket_id;
829191250Santoine
830191250Santoine		/*
831191250Santoine		 * Select a TX queue which matches the corresponding
832191250Santoine		 * RX queue for the hash in order to assign both
833191250Santoine		 * TX and RX parts of the flow to the same CPU
834191250Santoine		 */
835191250Santoine		if (rss_m2bucket(m, &bucket_id) == 0)
836191146Smaxim			index = bucket_id % (sc->txq_count - (SFXGE_TXQ_NTYPES - 1));
837191146Smaxim#else
838191146Smaxim		/* check if flowid is set */
839191146Smaxim		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
840191146Smaxim			uint32_t hash = m->m_pkthdr.flowid;
841191146Smaxim			uint32_t idx = hash % nitems(sc->rx_indir_table);
842190894Sdanger
843190905Sdanger			index = sc->rx_indir_table[idx];
844191250Santoine		}
845191250Santoine#endif
846190751Sed#if SFXGE_TX_PARSE_EARLY
847190864Sru		if (m->m_pkthdr.csum_flags & CSUM_TSO)
848190864Sru			sfxge_parse_tx_packet(m);
849190751Sed#endif
850190751Sed		txq = sc->txq[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index];
851190751Sed	} else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) {
852190751Sed		txq = sc->txq[SFXGE_TXQ_IP_CKSUM];
853190751Sed	} else {
854190751Sed		txq = sc->txq[SFXGE_TXQ_NON_CKSUM];
855190864Sru	}
856190751Sed
857190751Sed	rc = sfxge_tx_packet_add(txq, m);
858190751Sed	if (rc != 0)
859190751Sed		m_freem(m);
860190751Sed
861190751Sed	return (rc);
862190751Sed}
863190751Sed
864190751Sed/*
865190751Sed * Software "TSO".  Not quite as good as doing it in hardware, but
866190864Sru * still faster than segmenting in the stack.
867190751Sed */
868190864Sru
869190772Srustruct sfxge_tso_state {
870190772Sru	/* Output position */
871190772Sru	unsigned out_len;	/* Remaining length in current segment */
872190100Sthompsa	unsigned seqnum;	/* Current sequence number */
873190100Sthompsa	unsigned packet_space;	/* Remaining space in current packet */
874189977Sbrueffer	unsigned segs_space;	/* Remaining number of DMA segments
875189977Sbrueffer				   for the packet (FATSOv2 only) */
876189585Sthompsa
877189585Sthompsa	/* Input position */
878189607Sthompsa	uint64_t dma_addr;	/* DMA address of current position */
879189607Sthompsa	unsigned in_len;	/* Remaining length in current mbuf */
880189607Sthompsa
881189585Sthompsa	const struct mbuf *mbuf; /* Input mbuf (head of chain) */
882191250Santoine	u_short protocol;	/* Network protocol (after VLAN decap) */
883190772Sru	ssize_t nh_off;		/* Offset of network header */
884190772Sru	ssize_t tcph_off;	/* Offset of TCP header */
885190772Sru	unsigned header_len;	/* Number of bytes of header */
886190772Sru	unsigned seg_size;	/* TCP segment size */
887189092Sed	int fw_assisted;	/* Use FW-assisted TSO */
888189092Sed	u_short packet_id;	/* IPv4 packet ID from the original packet */
889190772Sru	uint8_t tcp_flags;	/* TCP flags */
890188948Sthompsa	efx_desc_t header_desc; /* Precomputed header descriptor for
891189000Sthompsa				 * FW-assisted TSO */
892189000Sthompsa};
893189000Sthompsa
894189000Sthompsa#if !SFXGE_TX_PARSE_EARLY
895189000Sthompsastatic const struct ip *tso_iph(const struct sfxge_tso_state *tso)
896189000Sthompsa{
897189000Sthompsa	KASSERT(tso->protocol == htons(ETHERTYPE_IP),
898189000Sthompsa		("tso_iph() in non-IPv4 state"));
899189000Sthompsa	return (const struct ip *)(tso->mbuf->m_data + tso->nh_off);
900189000Sthompsa}
901189000Sthompsa
902189000Sthompsastatic __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
903189000Sthompsa{
904189000Sthompsa	KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
905188948Sthompsa		("tso_ip6h() in non-IPv6 state"));
906188948Sthompsa	return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off);
907188948Sthompsa}
908188948Sthompsa
909188948Sthompsastatic const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
910188948Sthompsa{
911188948Sthompsa	return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off);
912188948Sthompsa}
913188948Sthompsa#endif
914188948Sthompsa
915188948Sthompsa
916188948Sthompsa/* Size of preallocated TSO header buffers.  Larger blocks must be
917188948Sthompsa * allocated from the heap.
918188948Sthompsa */
919188948Sthompsa#define	TSOH_STD_SIZE	128
920188948Sthompsa
921188948Sthompsa/* At most half the descriptors in the queue at any time will refer to
922191250Santoine * a TSO header buffer, since they must always be followed by a
923191250Santoine * payload descriptor referring to an mbuf.
924188948Sthompsa */
925188948Sthompsa#define	TSOH_COUNT(_txq_entries)	((_txq_entries) / 2u)
926188948Sthompsa#define	TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
927188948Sthompsa#define	TSOH_PAGE_COUNT(_txq_entries)	\
928188948Sthompsa	howmany(TSOH_COUNT(_txq_entries), TSOH_PER_PAGE)
929188948Sthompsa
930188948Sthompsastatic int tso_init(struct sfxge_txq *txq)
931188948Sthompsa{
932188948Sthompsa	struct sfxge_softc *sc = txq->sc;
933188948Sthompsa	unsigned int tsoh_page_count = TSOH_PAGE_COUNT(sc->txq_entries);
934188948Sthompsa	int i, rc;
935188948Sthompsa
936188948Sthompsa	/* Allocate TSO header buffers */
937188948Sthompsa	txq->tsoh_buffer = malloc(tsoh_page_count * sizeof(txq->tsoh_buffer[0]),
938188948Sthompsa				  M_SFXGE, M_WAITOK);
939188948Sthompsa
940188948Sthompsa	for (i = 0; i < tsoh_page_count; i++) {
941188948Sthompsa		rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]);
942188948Sthompsa		if (rc != 0)
943188948Sthompsa			goto fail;
944188948Sthompsa	}
945188948Sthompsa
946188948Sthompsa	return (0);
947198005Smarkm
948198005Smarkmfail:
949188652Sed	while (i-- > 0)
950188652Sed		sfxge_dma_free(&txq->tsoh_buffer[i]);
951188652Sed	free(txq->tsoh_buffer, M_SFXGE);
952188652Sed	txq->tsoh_buffer = NULL;
953188102Sgabor	return (rc);
954188102Sgabor}
955187694Santoine
956187694Santoinestatic void tso_fini(struct sfxge_txq *txq)
957187694Santoine{
958187694Santoine	int i;
959187694Santoine
960186716Santoine	if (txq->tsoh_buffer != NULL) {
961186716Santoine		for (i = 0; i < TSOH_PAGE_COUNT(txq->sc->txq_entries); i++)
962186437Sbz			sfxge_dma_free(&txq->tsoh_buffer[i]);
963186437Sbz		free(txq->tsoh_buffer, M_SFXGE);
964185472Santoine	}
965185472Santoine}
966185472Santoine
967185472Santoinestatic void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso,
968185472Santoine		      const bus_dma_segment_t *hdr_dma_seg,
969183442Sed		      struct mbuf *mbuf)
970183442Sed{
971183442Sed	const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp);
972183442Sed#if !SFXGE_TX_PARSE_EARLY
973183442Sed	struct ether_header *eh = mtod(mbuf, struct ether_header *);
974183442Sed	const struct tcphdr *th;
975183113Sattilio	struct tcphdr th_copy;
976183235Santoine#endif
977183235Santoine
978183026Santoine	tso->fw_assisted = txq->tso_fw_assisted;
979183026Santoine	tso->mbuf = mbuf;
980182105Sed
981182105Sed	/* Find network protocol and header */
982182518Santoine#if !SFXGE_TX_PARSE_EARLY
983182518Santoine	tso->protocol = eh->ether_type;
984182518Santoine	if (tso->protocol == htons(ETHERTYPE_VLAN)) {
985182518Santoine		struct ether_vlan_header *veh =
986182518Santoine			mtod(mbuf, struct ether_vlan_header *);
987182518Santoine		tso->protocol = veh->evl_proto;
988182518Santoine		tso->nh_off = sizeof(*veh);
989182518Santoine	} else {
990182518Santoine		tso->nh_off = sizeof(*eh);
991182518Santoine	}
992182518Santoine#else
993182518Santoine	tso->protocol = TSO_MBUF_PROTO(mbuf);
994182518Santoine	tso->nh_off = mbuf->m_pkthdr.l2hlen;
995182518Santoine	tso->tcph_off = mbuf->m_pkthdr.l3hlen;
996182518Santoine	tso->packet_id = ntohs(TSO_MBUF_PACKETID(mbuf));
997182518Santoine#endif
998182518Santoine
999182518Santoine#if !SFXGE_TX_PARSE_EARLY
1000181905Sed	/* Find TCP header */
1001181905Sed	if (tso->protocol == htons(ETHERTYPE_IP)) {
1002181905Sed		KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP,
1003180800Sed			("TSO required on non-TCP packet"));
1004180800Sed		tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl;
1005180614Smarcel		tso->packet_id = ntohs(tso_iph(tso)->ip_id);
1006180614Smarcel	} else {
1007180614Smarcel		KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
1008180614Smarcel			("TSO required on non-IP packet"));
1009180614Smarcel		KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP,
1010180614Smarcel			("TSO required on non-TCP packet"));
1011180331Smarcel		tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr);
1012180331Smarcel		tso->packet_id = 0;
1013180331Smarcel	}
1014180331Smarcel#endif
1015180331Smarcel
1016180267Sjhb
1017180267Sjhb	if (tso->fw_assisted &&
1018180267Sjhb	    __predict_false(tso->tcph_off >
1019180265Sjhb			    encp->enc_tx_tso_tcp_header_offset_limit)) {
1020180265Sjhb		tso->fw_assisted = 0;
1021180265Sjhb	}
1022180259Sjhb
1023180259Sjhb
1024180259Sjhb#if !SFXGE_TX_PARSE_EARLY
1025180259Sjhb	KASSERT(mbuf->m_len >= tso->tcph_off,
1026180259Sjhb		("network header is fragmented in mbuf"));
1027180257Sjhb	/* We need TCP header including flags (window is the next) */
1028180257Sjhb	if (mbuf->m_len < tso->tcph_off + offsetof(struct tcphdr, th_win)) {
1029180257Sjhb		m_copydata(tso->mbuf, tso->tcph_off, sizeof(th_copy),
1030180259Sjhb			   (caddr_t)&th_copy);
1031180257Sjhb		th = &th_copy;
1032180257Sjhb	} else {
1033180248Smarcel		th = tso_tcph(tso);
1034180248Smarcel	}
1035180248Smarcel	tso->header_len = tso->tcph_off + 4 * th->th_off;
1036180248Smarcel#else
1037180248Smarcel	tso->header_len = mbuf->m_pkthdr.l4hlen;
1038180230Smarcel#endif
1039180230Smarcel	tso->seg_size = mbuf->m_pkthdr.tso_segsz;
1040180230Smarcel
1041180230Smarcel#if !SFXGE_TX_PARSE_EARLY
1042180230Smarcel	tso->seqnum = ntohl(th->th_seq);
1043180230Smarcel
1044180230Smarcel	/* These flags must not be duplicated */
1045180230Smarcel	/*
1046180159Sdanger	 * RST should not be duplicated as well, but FreeBSD kernel
1047180159Sdanger	 * generates TSO packets with RST flag. So, do not assert
1048180159Sdanger	 * its absence.
1049180496Santoine	 */
1050180496Santoine	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
1051180496Santoine		("incompatible TCP flag 0x%x on TSO packet",
1052180496Santoine		 th->th_flags & (TH_URG | TH_SYN)));
1053179784Sed	tso->tcp_flags = th->th_flags;
1054179784Sed#else
1055179784Sed	tso->seqnum = TSO_MBUF_SEQNUM(mbuf);
1056179784Sed	tso->tcp_flags = TSO_MBUF_FLAGS(mbuf);
1057179784Sed#endif
1058179692Smarcel
1059179692Smarcel	tso->out_len = mbuf->m_pkthdr.len - tso->header_len;
1060179692Smarcel
1061179315Sbz	if (tso->fw_assisted) {
1062179315Sbz		if (hdr_dma_seg->ds_len >= tso->header_len)
1063179315Sbz			efx_tx_qdesc_dma_create(txq->common,
1064179315Sbz						hdr_dma_seg->ds_addr,
1065179315Sbz						tso->header_len,
1066179315Sbz						B_FALSE,
1067179315Sbz						&tso->header_desc);
1068179315Sbz		else
1069179315Sbz			tso->fw_assisted = 0;
1070179315Sbz	}
1071179315Sbz}
1072179315Sbz
1073179315Sbz/*
1074179315Sbz * tso_fill_packet_with_fragment - form descriptors for the current fragment
1075179315Sbz *
1076179315Sbz * Form descriptors for the current fragment, until we reach the end
1077179315Sbz * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
1078179315Sbz * space.
1079179315Sbz */
1080179315Sbzstatic void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
1081179315Sbz					  struct sfxge_tso_state *tso)
1082179315Sbz{
1083179315Sbz	efx_desc_t *desc;
1084179315Sbz	int n;
1085179315Sbz	uint64_t dma_addr = tso->dma_addr;
1086179315Sbz	boolean_t eop;
1087179315Sbz
1088179315Sbz	if (tso->in_len == 0 || tso->packet_space == 0)
1089179315Sbz		return;
1090179315Sbz
1091179315Sbz	KASSERT(tso->in_len > 0, ("TSO input length went negative"));
1092179315Sbz	KASSERT(tso->packet_space > 0, ("TSO packet space went negative"));
1093179315Sbz
1094179315Sbz	if (tso->fw_assisted & SFXGE_FATSOV2) {
1095179315Sbz		n = tso->in_len;
1096179315Sbz		tso->out_len -= n;
1097179315Sbz		tso->seqnum += n;
1098179315Sbz		tso->in_len = 0;
1099179315Sbz		if (n < tso->packet_space) {
1100179315Sbz			tso->packet_space -= n;
1101179315Sbz			tso->segs_space--;
1102179315Sbz		} else {
1103179315Sbz			tso->packet_space = tso->seg_size -
1104179315Sbz			    (n - tso->packet_space) % tso->seg_size;
1105179315Sbz			tso->segs_space =
1106179315Sbz			    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1 -
1107179315Sbz			    (tso->packet_space != tso->seg_size);
1108179315Sbz		}
1109179315Sbz	} else {
1110179315Sbz		n = min(tso->in_len, tso->packet_space);
1111179315Sbz		tso->packet_space -= n;
1112179315Sbz		tso->out_len -= n;
1113179315Sbz		tso->dma_addr += n;
1114179315Sbz		tso->in_len -= n;
1115179315Sbz	}
1116179315Sbz
1117179315Sbz	/*
1118179315Sbz	 * It is OK to use binary OR below to avoid extra branching
1119179315Sbz	 * since all conditions may always be checked.
1120179315Sbz	 */
1121179315Sbz	eop = (tso->out_len == 0) | (tso->packet_space == 0) |
1122179315Sbz	    (tso->segs_space == 0);
1123179315Sbz
1124179315Sbz	desc = &txq->pend_desc[txq->n_pend_desc++];
1125179315Sbz	efx_tx_qdesc_dma_create(txq->common, dma_addr, n, eop, desc);
1126179315Sbz}
1127179315Sbz
1128179315Sbz/* Callback from bus_dmamap_load() for long TSO headers. */
1129179315Sbzstatic void tso_map_long_header(void *dma_addr_ret,
1130179315Sbz				bus_dma_segment_t *segs, int nseg,
1131179315Sbz				int error)
1132179315Sbz{
1133179315Sbz	*(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) &&
1134179315Sbz				      __predict_true(nseg == 1)) ?
1135179315Sbz				     segs->ds_addr : 0);
1136179315Sbz}
1137179315Sbz
1138179315Sbz/*
1139179315Sbz * tso_start_new_packet - generate a new header and prepare for the new packet
1140179315Sbz *
1141179315Sbz * Generate a new header and prepare for the new packet.  Return 0 on
1142179315Sbz * success, or an error code if failed to alloc header.
1143179315Sbz */
1144179315Sbzstatic int tso_start_new_packet(struct sfxge_txq *txq,
1145179315Sbz				struct sfxge_tso_state *tso,
1146179315Sbz				unsigned int *idp)
1147179315Sbz{
1148179315Sbz	unsigned int id = *idp;
1149179315Sbz	struct tcphdr *tsoh_th;
1150179315Sbz	unsigned ip_length;
1151179315Sbz	caddr_t header;
1152179315Sbz	uint64_t dma_addr;
1153179315Sbz	bus_dmamap_t map;
1154179315Sbz	efx_desc_t *desc;
1155179315Sbz	int rc;
1156179315Sbz
1157179315Sbz	if (tso->fw_assisted) {
1158179315Sbz		if (tso->fw_assisted & SFXGE_FATSOV2) {
1159179315Sbz			/* Add 2 FATSOv2 option descriptors */
1160179315Sbz			desc = &txq->pend_desc[txq->n_pend_desc];
1161179315Sbz			efx_tx_qdesc_tso2_create(txq->common,
1162179315Sbz						 tso->packet_id,
1163179315Sbz						 tso->seqnum,
1164179315Sbz						 tso->seg_size,
1165179315Sbz						 desc,
1166179315Sbz						 EFX_TX_FATSOV2_OPT_NDESCS);
1167179315Sbz			desc += EFX_TX_FATSOV2_OPT_NDESCS;
1168179315Sbz			txq->n_pend_desc += EFX_TX_FATSOV2_OPT_NDESCS;
1169179315Sbz			KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1170179315Sbz			id = (id + EFX_TX_FATSOV2_OPT_NDESCS) & txq->ptr_mask;
1171179315Sbz
1172179315Sbz			tso->segs_space =
1173179315Sbz			    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1;
1174179315Sbz		} else {
1175179361Santoine			uint8_t tcp_flags = tso->tcp_flags;
1176179361Santoine
1177179361Santoine			if (tso->out_len > tso->seg_size)
1178179361Santoine				tcp_flags &= ~(TH_FIN | TH_PUSH);
1179179361Santoine
1180179361Santoine			/* Add FATSOv1 option descriptor */
1181179361Santoine			desc = &txq->pend_desc[txq->n_pend_desc++];
1182179361Santoine			efx_tx_qdesc_tso_create(txq->common,
1183179361Santoine						tso->packet_id,
1184179361Santoine						tso->seqnum,
1185179361Santoine						tcp_flags,
1186179361Santoine						desc++);
1187179361Santoine			KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1188179361Santoine			id = (id + 1) & txq->ptr_mask;
1189179361Santoine
1190179361Santoine			tso->seqnum += tso->seg_size;
1191179361Santoine			tso->segs_space = UINT_MAX;
1192179361Santoine		}
1193178924Santoine
1194178924Santoine		/* Header DMA descriptor */
1195178924Santoine		*desc = tso->header_desc;
1196178924Santoine		txq->n_pend_desc++;
1197178924Santoine		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1198177831Sflz		id = (id + 1) & txq->ptr_mask;
1199177831Sflz	} else {
1200177831Sflz		/* Allocate a DMA-mapped header buffer. */
1201177831Sflz		if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
1202177831Sflz			unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
1203178331Santoine			unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
1204178331Santoine
1205178331Santoine			header = (txq->tsoh_buffer[page_index].esm_base +
1206178331Santoine				  buf_index * TSOH_STD_SIZE);
1207178331Santoine			dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
1208178331Santoine				    buf_index * TSOH_STD_SIZE);
1209178331Santoine			map = txq->tsoh_buffer[page_index].esm_map;
1210178331Santoine
1211178331Santoine			KASSERT(txq->stmp[id].flags == 0,
1212178331Santoine				("stmp flags are not 0"));
1213178331Santoine		} else {
1214178331Santoine			struct sfxge_tx_mapping *stmp = &txq->stmp[id];
1215178331Santoine
1216178331Santoine			/* We cannot use bus_dmamem_alloc() as that may sleep */
1217178331Santoine			header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
1218178331Santoine			if (__predict_false(!header))
1219178924Santoine				return (ENOMEM);
1220178924Santoine			rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
1221239476Savatar					     header, tso->header_len,
1222239476Savatar					     tso_map_long_header, &dma_addr,
1223239476Savatar					     BUS_DMA_NOWAIT);
1224239476Savatar			if (__predict_false(dma_addr == 0)) {
1225239476Savatar				if (rc == 0) {
1226176422Sthompsa					/* Succeeded but got >1 segment */
1227176422Sthompsa					bus_dmamap_unload(txq->packet_dma_tag,
1228175690Sbrueffer							  stmp->map);
1229175690Sbrueffer					rc = EINVAL;
1230175690Sbrueffer				}
1231175576Sattilio				free(header, M_SFXGE);
1232175576Sattilio				return (rc);
1233175227Sjhb			}
1234175227Sjhb			map = stmp->map;
1235175227Sjhb
1236174426Sdougb			txq->tso_long_headers++;
1237174426Sdougb			stmp->u.heap_buf = header;
1238174426Sdougb			stmp->flags = TX_BUF_UNMAP;
1239177153Sbrueffer		}
1240177153Sbrueffer
1241174092Sbrooks		tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
1242174092Sbrooks
1243174092Sbrooks		/* Copy and update the headers. */
1244174092Sbrooks		m_copydata(tso->mbuf, 0, tso->header_len, header);
1245176956Santoine
1246176956Santoine		tsoh_th->th_seq = htonl(tso->seqnum);
1247176956Santoine		tso->seqnum += tso->seg_size;
1248174061Sjb		if (tso->out_len > tso->seg_size) {
1249174061Sjb			/* This packet will not finish the TSO burst. */
1250175227Sjhb			ip_length = tso->header_len - tso->nh_off + tso->seg_size;
1251175227Sjhb			tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
1252173466Simp		} else {
1253173466Simp			/* This packet will be the last in the TSO burst. */
1254173662Smarcel			ip_length = tso->header_len - tso->nh_off + tso->out_len;
1255173662Smarcel		}
1256173662Smarcel
1257173662Smarcel		if (tso->protocol == htons(ETHERTYPE_IP)) {
1258173662Smarcel			struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
1259173662Smarcel			tsoh_iph->ip_len = htons(ip_length);
1260172983Smtm			/* XXX We should increment ip_id, but FreeBSD doesn't
1261172983Smtm			 * currently allocate extra IDs for multiple segments.
1262172390Sbushman			 */
1263173176Sbushman		} else {
1264172390Sbushman			struct ip6_hdr *tsoh_iph =
1265172390Sbushman				(struct ip6_hdr *)(header + tso->nh_off);
1266172570Sru			tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
1267172570Sru		}
1268171786Smarcel
1269171786Smarcel		/* Make the header visible to the hardware. */
1270171786Smarcel		bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
1271171786Smarcel
1272171696Sbz		/* Form a descriptor for this header. */
1273171696Sbz		desc = &txq->pend_desc[txq->n_pend_desc++];
1274179368Sbz		efx_tx_qdesc_dma_create(txq->common,
1275171461Srwatson					dma_addr,
1276171461Srwatson					tso->header_len,
1277171461Srwatson					0,
1278171461Srwatson					desc);
1279171461Srwatson		id = (id + 1) & txq->ptr_mask;
1280171461Srwatson
1281171461Srwatson		tso->segs_space = UINT_MAX;
1282171461Srwatson	}
1283171461Srwatson	tso->packet_space = tso->seg_size;
1284171461Srwatson	txq->tso_packets++;
1285171461Srwatson	*idp = id;
1286171461Srwatson
1287171461Srwatson	return (0);
1288171461Srwatson}
1289171461Srwatson
1290171461Srwatsonstatic int
1291171461Srwatsonsfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
1292171461Srwatson		   const bus_dma_segment_t *dma_seg, int n_dma_seg,
1293171461Srwatson		   int vlan_tagged)
1294171461Srwatson{
1295171461Srwatson	struct sfxge_tso_state tso;
1296171461Srwatson	unsigned int id;
1297171461Srwatson	unsigned skipped = 0;
1298171461Srwatson
1299171461Srwatson	tso_start(txq, &tso, dma_seg, mbuf);
1300171461Srwatson
1301171461Srwatson	while (dma_seg->ds_len + skipped <= tso.header_len) {
1302171461Srwatson		skipped += dma_seg->ds_len;
1303171461Srwatson		--n_dma_seg;
1304171461Srwatson		KASSERT(n_dma_seg, ("no payload found in TSO packet"));
1305171461Srwatson		++dma_seg;
1306171461Srwatson	}
1307171461Srwatson	tso.in_len = dma_seg->ds_len - (tso.header_len - skipped);
1308171461Srwatson	tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped);
1309171461Srwatson
1310171461Srwatson	id = (txq->added + vlan_tagged) & txq->ptr_mask;
1311171461Srwatson	if (__predict_false(tso_start_new_packet(txq, &tso, &id)))
1312171461Srwatson		return (-1);
1313171461Srwatson
1314171461Srwatson	while (1) {
1315171461Srwatson		tso_fill_packet_with_fragment(txq, &tso);
1316171461Srwatson		/* Exactly one DMA descriptor is added */
1317171461Srwatson		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1318171461Srwatson		id = (id + 1) & txq->ptr_mask;
1319171461Srwatson
1320171461Srwatson		/* Move onto the next fragment? */
1321171461Srwatson		if (tso.in_len == 0) {
1322171461Srwatson			--n_dma_seg;
1323171461Srwatson			if (n_dma_seg == 0)
1324171461Srwatson				break;
1325171461Srwatson			++dma_seg;
1326171461Srwatson			tso.in_len = dma_seg->ds_len;
1327171461Srwatson			tso.dma_addr = dma_seg->ds_addr;
1328171461Srwatson		}
1329171461Srwatson
1330171461Srwatson		/* End of packet? */
1331171461Srwatson		if ((tso.packet_space == 0) | (tso.segs_space == 0)) {
1332171461Srwatson			unsigned int n_fatso_opt_desc =
1333171461Srwatson			    (tso.fw_assisted & SFXGE_FATSOV2) ?
1334171461Srwatson			    EFX_TX_FATSOV2_OPT_NDESCS :
1335171461Srwatson			    (tso.fw_assisted & SFXGE_FATSOV1) ? 1 : 0;
1336171461Srwatson
1337171461Srwatson			/* If the queue is now full due to tiny MSS,
1338171461Srwatson			 * or we can't create another header, discard
1339171461Srwatson			 * the remainder of the input mbuf but do not
1340171461Srwatson			 * roll back the work we have done.
1341171461Srwatson			 */
1342171461Srwatson			if (txq->n_pend_desc + n_fatso_opt_desc +
1343171461Srwatson			    1 /* header */ + n_dma_seg > txq->max_pkt_desc) {
1344171461Srwatson				txq->tso_pdrop_too_many++;
1345171461Srwatson				break;
1346171461Srwatson			}
1347176956Santoine			if (__predict_false(tso_start_new_packet(txq, &tso,
1348176956Santoine								 &id))) {
1349176956Santoine				txq->tso_pdrop_no_rsrc++;
1350176956Santoine				break;
1351171274Sbz			}
1352171274Sbz		}
1353171274Sbz	}
1354171274Sbz
1355171274Sbz	txq->tso_bursts++;
1356171274Sbz	return (id);
1357171274Sbz}
1358171274Sbz
1359171274Sbzstatic void
1360171175Smlaiersfxge_tx_qunblock(struct sfxge_txq *txq)
1361171175Smlaier{
1362171137Sbz	struct sfxge_softc *sc;
1363171137Sbz	struct sfxge_evq *evq;
1364171137Sbz
1365171137Sbz	sc = txq->sc;
1366171137Sbz	evq = sc->evq[txq->evq_index];
1367171137Sbz
1368171137Sbz	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
1369171137Sbz
1370171137Sbz	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED))
1371171137Sbz		return;
1372171137Sbz
1373171137Sbz	SFXGE_TXQ_LOCK(txq);
1374171137Sbz
1375171137Sbz	if (txq->blocked) {
1376171137Sbz		unsigned int level;
1377171137Sbz
1378171137Sbz		level = txq->added - txq->completed;
1379171137Sbz		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) {
1380171137Sbz			/* reaped must be in sync with blocked */
1381171131Sthompsa			sfxge_tx_qreap(txq);
1382171131Sthompsa			txq->blocked = 0;
1383171143Sthompsa		}
1384171023Srafan	}
1385171023Srafan
1386171023Srafan	sfxge_tx_qdpl_service(txq);
1387171023Srafan	/* note: lock has been dropped */
1388171023Srafan}
1389171023Srafan
1390171388Sdougbvoid
1391171388Sdougbsfxge_tx_qflush_done(struct sfxge_txq *txq)
1392171388Sdougb{
1393171388Sdougb
1394170926Srafan	txq->flush_state = SFXGE_FLUSH_DONE;
1395170926Srafan}
1396170926Srafan
1397170926Srafanstatic void
1398170926Srafansfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index)
1399170926Srafan{
1400170926Srafan	struct sfxge_txq *txq;
1401170926Srafan	struct sfxge_evq *evq;
1402170926Srafan	unsigned int count;
1403170926Srafan
1404170926Srafan	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1405170926Srafan
1406170926Srafan	txq = sc->txq[index];
1407170926Srafan	evq = sc->evq[txq->evq_index];
1408170926Srafan
1409170926Srafan	SFXGE_EVQ_LOCK(evq);
1410170926Srafan	SFXGE_TXQ_LOCK(txq);
1411170926Srafan
1412170926Srafan	KASSERT(txq->init_state == SFXGE_TXQ_STARTED,
1413170926Srafan	    ("txq->init_state != SFXGE_TXQ_STARTED"));
1414170926Srafan
1415170926Srafan	txq->init_state = SFXGE_TXQ_INITIALIZED;
1416170926Srafan
1417170926Srafan	if (txq->flush_state != SFXGE_FLUSH_DONE) {
1418170926Srafan		txq->flush_state = SFXGE_FLUSH_PENDING;
1419170926Srafan
1420170926Srafan		SFXGE_EVQ_UNLOCK(evq);
1421170926Srafan		SFXGE_TXQ_UNLOCK(txq);
1422170926Srafan
1423170926Srafan		/* Flush the transmit queue. */
1424170926Srafan		if (efx_tx_qflush(txq->common) != 0) {
1425170926Srafan			log(LOG_ERR, "%s: Flushing Tx queue %u failed\n",
1426170926Srafan			    device_get_nameunit(sc->dev), index);
1427170926Srafan			txq->flush_state = SFXGE_FLUSH_DONE;
1428170926Srafan		} else {
1429170926Srafan			count = 0;
1430170926Srafan			do {
1431170926Srafan				/* Spin for 100ms. */
1432170926Srafan				DELAY(100000);
1433170926Srafan				if (txq->flush_state != SFXGE_FLUSH_PENDING)
1434170926Srafan					break;
1435176956Santoine			} while (++count < 20);
1436176956Santoine		}
1437176956Santoine		SFXGE_EVQ_LOCK(evq);
1438176956Santoine		SFXGE_TXQ_LOCK(txq);
1439176956Santoine
1440176956Santoine		KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
1441176956Santoine		    ("txq->flush_state == SFXGE_FLUSH_FAILED"));
1442176956Santoine
1443176956Santoine		if (txq->flush_state != SFXGE_FLUSH_DONE) {
1444176956Santoine			/* Flush timeout */
1445176956Santoine			log(LOG_ERR, "%s: Cannot flush Tx queue %u\n",
1446176956Santoine			    device_get_nameunit(sc->dev), index);
1447176956Santoine			txq->flush_state = SFXGE_FLUSH_DONE;
1448176956Santoine		}
1449176956Santoine	}
1450176956Santoine
1451176956Santoine	txq->blocked = 0;
1452176956Santoine	txq->pending = txq->added;
1453176956Santoine
1454176956Santoine	sfxge_tx_qcomplete(txq, evq);
1455176956Santoine	KASSERT(txq->completed == txq->added,
1456176956Santoine	    ("txq->completed != txq->added"));
1457176956Santoine
1458176956Santoine	sfxge_tx_qreap(txq);
1459176956Santoine	KASSERT(txq->reaped == txq->completed,
1460176956Santoine	    ("txq->reaped != txq->completed"));
1461176956Santoine
1462176956Santoine	txq->added = 0;
1463176956Santoine	txq->pending = 0;
1464176956Santoine	txq->completed = 0;
1465176956Santoine	txq->reaped = 0;
1466176956Santoine
1467176956Santoine	/* Destroy the common code transmit queue. */
1468176956Santoine	efx_tx_qdestroy(txq->common);
1469171476Sdelphij	txq->common = NULL;
1470171476Sdelphij
1471170312Sdelphij	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1472170312Sdelphij	    EFX_TXQ_NBUFS(sc->txq_entries));
1473170926Srafan
1474173816Sru	SFXGE_EVQ_UNLOCK(evq);
1475169815Sdelphij	SFXGE_TXQ_UNLOCK(txq);
1476169815Sdelphij}
1477169815Sdelphij
1478169815Sdelphij/*
1479169815Sdelphij * Estimate maximum number of Tx descriptors required for TSO packet.
1480169815Sdelphij * With minimum MSS and maximum mbuf length we might need more (even
1481169815Sdelphij * than a ring-ful of descriptors), but this should not happen in
1482169815Sdelphij * practice except due to deliberate attack.  In that case we will
1483169815Sdelphij * truncate the output at a packet boundary.
1484169815Sdelphij */
1485169815Sdelphijstatic unsigned int
1486169815Sdelphijsfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type,
1487170204Sru		      unsigned int tso_fw_assisted)
1488169815Sdelphij{
1489169815Sdelphij	/* One descriptor for every input fragment */
1490169815Sdelphij	unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG;
1491169815Sdelphij	unsigned int sw_tso_max_descs;
1492170204Sru	unsigned int fa_tso_v1_max_descs = 0;
1493169815Sdelphij	unsigned int fa_tso_v2_max_descs = 0;
1494169815Sdelphij
1495169815Sdelphij	/* VLAN tagging Tx option descriptor may be required */
1496169815Sdelphij	if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled)
1497169815Sdelphij		max_descs++;
1498169815Sdelphij
1499169815Sdelphij	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) {
1500169815Sdelphij		/*
1501169815Sdelphij		 * Plus header and payload descriptor for each output segment.
1502169815Sdelphij		 * Minus one since header fragment is already counted.
1503169815Sdelphij		 * Even if FATSO is used, we should be ready to fallback
1504169815Sdelphij		 * to do it in the driver.
1505169815Sdelphij		 */
1506169815Sdelphij		sw_tso_max_descs = SFXGE_TSO_MAX_SEGS * 2 - 1;
1507169815Sdelphij
1508169815Sdelphij		/* FW assisted TSOv1 requires one more descriptor per segment
1509169815Sdelphij		 * in comparison to SW TSO */
1510169815Sdelphij		if (tso_fw_assisted & SFXGE_FATSOV1)
1511169815Sdelphij			fa_tso_v1_max_descs =
1512169815Sdelphij			    sw_tso_max_descs + SFXGE_TSO_MAX_SEGS;
1513169815Sdelphij
1514169815Sdelphij		/* FW assisted TSOv2 requires 3 (2 FATSO plus header) extra
1515169815Sdelphij		 * descriptors per superframe limited by number of DMA fetches
1516169815Sdelphij		 * per packet. The first packet header is already counted.
1517169815Sdelphij		 */
1518169815Sdelphij		if (tso_fw_assisted & SFXGE_FATSOV2) {
1519169815Sdelphij			fa_tso_v2_max_descs =
1520169815Sdelphij			    howmany(SFXGE_TX_MAPPING_MAX_SEG,
1521169815Sdelphij				    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1) *
1522169815Sdelphij			    (EFX_TX_FATSOV2_OPT_NDESCS + 1) - 1;
1523169815Sdelphij		}
1524169815Sdelphij
1525169815Sdelphij		max_descs += MAX(sw_tso_max_descs,
1526169815Sdelphij				 MAX(fa_tso_v1_max_descs, fa_tso_v2_max_descs));
1527169815Sdelphij	}
1528169815Sdelphij
1529170204Sru	return (max_descs);
1530169815Sdelphij}
1531169815Sdelphij
1532169815Sdelphijstatic int
1533170917Srafansfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
1534169815Sdelphij{
1535169815Sdelphij	struct sfxge_txq *txq;
1536169815Sdelphij	efsys_mem_t *esmp;
1537169815Sdelphij	uint16_t flags;
1538169815Sdelphij	unsigned int tso_fw_assisted;
1539169815Sdelphij	struct sfxge_evq *evq;
1540169815Sdelphij	unsigned int desc_index;
1541169815Sdelphij	int rc;
1542169815Sdelphij
1543169815Sdelphij	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1544169815Sdelphij
1545169815Sdelphij	txq = sc->txq[index];
1546169815Sdelphij	esmp = &txq->mem;
1547169815Sdelphij	evq = sc->evq[txq->evq_index];
1548169815Sdelphij
1549169815Sdelphij	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1550169815Sdelphij	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1551169815Sdelphij	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1552169815Sdelphij	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1553169815Sdelphij
1554169815Sdelphij	/* Program the buffer table. */
1555169815Sdelphij	if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp,
1556169815Sdelphij	    EFX_TXQ_NBUFS(sc->txq_entries))) != 0)
1557169815Sdelphij		return (rc);
1558169815Sdelphij
1559169815Sdelphij	/* Determine the kind of queue we are creating. */
1560169815Sdelphij	tso_fw_assisted = 0;
1561169815Sdelphij	switch (txq->type) {
1562169815Sdelphij	case SFXGE_TXQ_NON_CKSUM:
1563169815Sdelphij		flags = 0;
1564169815Sdelphij		break;
1565169815Sdelphij	case SFXGE_TXQ_IP_CKSUM:
1566169815Sdelphij		flags = EFX_TXQ_CKSUM_IPV4;
1567169815Sdelphij		break;
1568169815Sdelphij	case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1569169815Sdelphij		flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP;
1570169815Sdelphij		tso_fw_assisted = sc->tso_fw_assisted;
1571169815Sdelphij		if (tso_fw_assisted & SFXGE_FATSOV2)
1572169815Sdelphij			flags |= EFX_TXQ_FATSOV2;
1573169815Sdelphij		break;
1574169815Sdelphij	default:
1575169815Sdelphij		KASSERT(0, ("Impossible TX queue"));
1576169815Sdelphij		flags = 0;
1577169815Sdelphij		break;
1578170204Sru	}
1579169815Sdelphij
1580169815Sdelphij	/* Create the common code transmit queue. */
1581169815Sdelphij	if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1582169815Sdelphij	    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1583169815Sdelphij	    &txq->common, &desc_index)) != 0) {
1584170190Sru		/* Retry if no FATSOv2 resources, otherwise fail */
1585169815Sdelphij		if ((rc != ENOSPC) || (~flags & EFX_TXQ_FATSOV2))
1586169815Sdelphij			goto fail;
1587169815Sdelphij
1588169815Sdelphij		/* Looks like all FATSOv2 contexts are used */
1589169815Sdelphij		flags &= ~EFX_TXQ_FATSOV2;
1590169815Sdelphij		tso_fw_assisted &= ~SFXGE_FATSOV2;
1591169815Sdelphij		if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1592169815Sdelphij		    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1593169815Sdelphij		    &txq->common, &desc_index)) != 0)
1594169815Sdelphij			goto fail;
1595169815Sdelphij	}
1596170190Sru
1597170190Sru	/* Initialise queue descriptor indexes */
1598170190Sru	txq->added = txq->pending = txq->completed = txq->reaped = desc_index;
1599170190Sru
1600170190Sru	SFXGE_TXQ_LOCK(txq);
1601170190Sru
1602170190Sru	/* Enable the transmit queue. */
1603171476Sdelphij	efx_tx_qenable(txq->common);
1604171476Sdelphij
1605171476Sdelphij	txq->init_state = SFXGE_TXQ_STARTED;
1606171476Sdelphij	txq->flush_state = SFXGE_FLUSH_REQUIRED;
1607171476Sdelphij	txq->tso_fw_assisted = tso_fw_assisted;
1608171476Sdelphij
1609171476Sdelphij	txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, txq->type,
1610171476Sdelphij						  tso_fw_assisted);
1611171476Sdelphij
1612171476Sdelphij	SFXGE_TXQ_UNLOCK(txq);
1613171476Sdelphij
1614171476Sdelphij	return (0);
1615171476Sdelphij
1616171476Sdelphijfail:
1617171476Sdelphij	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1618171476Sdelphij	    EFX_TXQ_NBUFS(sc->txq_entries));
1619171476Sdelphij	return (rc);
1620171476Sdelphij}
1621171476Sdelphij
1622171476Sdelphijvoid
1623171476Sdelphijsfxge_tx_stop(struct sfxge_softc *sc)
1624171476Sdelphij{
1625171476Sdelphij	int index;
1626171476Sdelphij
1627171476Sdelphij	index = sc->txq_count;
1628171476Sdelphij	while (--index >= 0)
1629171476Sdelphij		sfxge_tx_qstop(sc, index);
1630171476Sdelphij
1631171476Sdelphij	/* Tear down the transmit module */
1632171476Sdelphij	efx_tx_fini(sc->enp);
1633171476Sdelphij}
1634171476Sdelphij
1635171476Sdelphijint
1636171476Sdelphijsfxge_tx_start(struct sfxge_softc *sc)
1637171476Sdelphij{
1638171476Sdelphij	int index;
1639171476Sdelphij	int rc;
1640171476Sdelphij
1641171476Sdelphij	/* Initialize the common code transmit module. */
1642171476Sdelphij	if ((rc = efx_tx_init(sc->enp)) != 0)
1643171476Sdelphij		return (rc);
1644171476Sdelphij
1645171476Sdelphij	for (index = 0; index < sc->txq_count; index++) {
1646171476Sdelphij		if ((rc = sfxge_tx_qstart(sc, index)) != 0)
1647171476Sdelphij			goto fail;
1648171476Sdelphij	}
1649171476Sdelphij
1650171476Sdelphij	return (0);
1651171476Sdelphij
1652171476Sdelphijfail:
1653171476Sdelphij	while (--index >= 0)
1654171476Sdelphij		sfxge_tx_qstop(sc, index);
1655171476Sdelphij
1656171476Sdelphij	efx_tx_fini(sc->enp);
1657171476Sdelphij
1658171476Sdelphij	return (rc);
1659171476Sdelphij}
1660171476Sdelphij
1661171476Sdelphijstatic int
1662171476Sdelphijsfxge_txq_stat_init(struct sfxge_txq *txq, struct sysctl_oid *txq_node)
1663171476Sdelphij{
1664171476Sdelphij	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(txq->sc->dev);
1665171476Sdelphij	struct sysctl_oid *stat_node;
1666171476Sdelphij	unsigned int id;
1667171476Sdelphij
1668171476Sdelphij	stat_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1669171476Sdelphij				    "stats", CTLFLAG_RD, NULL,
1670171476Sdelphij				    "Tx queue statistics");
1671171476Sdelphij	if (stat_node == NULL)
1672171476Sdelphij		return (ENOMEM);
1673171476Sdelphij
1674171476Sdelphij	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1675171476Sdelphij		SYSCTL_ADD_ULONG(
1676171476Sdelphij		    ctx, SYSCTL_CHILDREN(stat_node), OID_AUTO,
1677171476Sdelphij		    sfxge_tx_stats[id].name, CTLFLAG_RD | CTLFLAG_STATS,
1678171476Sdelphij		    (unsigned long *)((caddr_t)txq + sfxge_tx_stats[id].offset),
1679171476Sdelphij		    "");
1680171476Sdelphij	}
1681171476Sdelphij
1682171476Sdelphij	return (0);
1683171476Sdelphij}
1684171476Sdelphij
1685171476Sdelphij/**
1686171476Sdelphij * Destroy a transmit queue.
1687171476Sdelphij */
1688171476Sdelphijstatic void
1689171476Sdelphijsfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index)
1690171476Sdelphij{
1691171476Sdelphij	struct sfxge_txq *txq;
1692171476Sdelphij	unsigned int nmaps;
1693171476Sdelphij
1694171476Sdelphij	txq = sc->txq[index];
1695171476Sdelphij
1696171476Sdelphij	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1697171476Sdelphij	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1698171476Sdelphij
1699171476Sdelphij	if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM)
1700171476Sdelphij		tso_fini(txq);
1701171476Sdelphij
1702171476Sdelphij	/* Free the context arrays. */
1703171476Sdelphij	free(txq->pend_desc, M_SFXGE);
1704171476Sdelphij	nmaps = sc->txq_entries;
1705171476Sdelphij	while (nmaps-- != 0)
1706171476Sdelphij		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1707171476Sdelphij	free(txq->stmp, M_SFXGE);
1708171476Sdelphij
1709171476Sdelphij	/* Release DMA memory mapping. */
1710171476Sdelphij	sfxge_dma_free(&txq->mem);
1711171476Sdelphij
1712171476Sdelphij	sc->txq[index] = NULL;
1713171476Sdelphij
1714171476Sdelphij	SFXGE_TXQ_LOCK_DESTROY(txq);
1715171476Sdelphij
1716171476Sdelphij	free(txq, M_SFXGE);
1717171476Sdelphij}
1718171476Sdelphij
1719171476Sdelphijstatic int
1720171476Sdelphijsfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
1721171476Sdelphij	       enum sfxge_txq_type type, unsigned int evq_index)
1722171476Sdelphij{
1723171476Sdelphij	char name[16];
1724171476Sdelphij	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1725171476Sdelphij	struct sysctl_oid *txq_node;
1726171476Sdelphij	struct sfxge_txq *txq;
1727171476Sdelphij	struct sfxge_evq *evq;
1728171476Sdelphij	struct sfxge_tx_dpl *stdp;
1729171476Sdelphij	struct sysctl_oid *dpl_node;
1730171476Sdelphij	efsys_mem_t *esmp;
1731171476Sdelphij	unsigned int nmaps;
1732171476Sdelphij	int rc;
1733171476Sdelphij
1734171476Sdelphij	txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK);
1735171476Sdelphij	txq->sc = sc;
1736171476Sdelphij	txq->entries = sc->txq_entries;
1737171476Sdelphij	txq->ptr_mask = txq->entries - 1;
1738171476Sdelphij
1739171476Sdelphij	sc->txq[txq_index] = txq;
1740171476Sdelphij	esmp = &txq->mem;
1741171476Sdelphij
1742171476Sdelphij	evq = sc->evq[evq_index];
1743171476Sdelphij
1744171476Sdelphij	/* Allocate and zero DMA space for the descriptor ring. */
1745171476Sdelphij	if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(sc->txq_entries), esmp)) != 0)
1746171476Sdelphij		return (rc);
1747171476Sdelphij
1748171476Sdelphij	/* Allocate buffer table entries. */
1749171476Sdelphij	sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(sc->txq_entries),
1750171476Sdelphij				 &txq->buf_base_id);
1751171476Sdelphij
1752171476Sdelphij	/* Create a DMA tag for packet mappings. */
1753171476Sdelphij	if (bus_dma_tag_create(sc->parent_dma_tag, 1, 0x1000,
1754171476Sdelphij	    MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL,
1755171476Sdelphij	    NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG, 0x1000, 0, NULL, NULL,
1756171476Sdelphij	    &txq->packet_dma_tag) != 0) {
1757171476Sdelphij		device_printf(sc->dev, "Couldn't allocate txq DMA tag\n");
1758171476Sdelphij		rc = ENOMEM;
1759171476Sdelphij		goto fail;
1760171476Sdelphij	}
1761171476Sdelphij
1762171476Sdelphij	/* Allocate pending descriptor array for batching writes. */
1763171476Sdelphij	txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries,
1764171476Sdelphij				M_SFXGE, M_ZERO | M_WAITOK);
1765171476Sdelphij
1766171476Sdelphij	/* Allocate and initialise mbuf DMA mapping array. */
1767171476Sdelphij	txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * sc->txq_entries,
1768171476Sdelphij	    M_SFXGE, M_ZERO | M_WAITOK);
1769171476Sdelphij	for (nmaps = 0; nmaps < sc->txq_entries; nmaps++) {
1770171476Sdelphij		rc = bus_dmamap_create(txq->packet_dma_tag, 0,
1771171476Sdelphij				       &txq->stmp[nmaps].map);
1772171476Sdelphij		if (rc != 0)
1773171476Sdelphij			goto fail2;
1774171476Sdelphij	}
1775171476Sdelphij
1776171476Sdelphij	snprintf(name, sizeof(name), "%u", txq_index);
1777171476Sdelphij	txq_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->txqs_node),
1778171476Sdelphij				   OID_AUTO, name, CTLFLAG_RD, NULL, "");
1779171476Sdelphij	if (txq_node == NULL) {
1780171476Sdelphij		rc = ENOMEM;
1781171476Sdelphij		goto fail_txq_node;
1782171476Sdelphij	}
1783171476Sdelphij
1784171476Sdelphij	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM &&
1785171476Sdelphij	    (rc = tso_init(txq)) != 0)
1786171476Sdelphij		goto fail3;
1787171476Sdelphij
1788171476Sdelphij	if (sfxge_tx_dpl_get_max <= 0) {
1789171476Sdelphij		log(LOG_ERR, "%s=%d must be greater than 0",
1790171476Sdelphij		    SFXGE_PARAM_TX_DPL_GET_MAX, sfxge_tx_dpl_get_max);
1791171476Sdelphij		rc = EINVAL;
1792171476Sdelphij		goto fail_tx_dpl_get_max;
1793171476Sdelphij	}
1794171476Sdelphij	if (sfxge_tx_dpl_get_non_tcp_max <= 0) {
1795171476Sdelphij		log(LOG_ERR, "%s=%d must be greater than 0",
1796171476Sdelphij		    SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX,
1797171476Sdelphij		    sfxge_tx_dpl_get_non_tcp_max);
1798171476Sdelphij		rc = EINVAL;
1799171476Sdelphij		goto fail_tx_dpl_get_max;
1800171476Sdelphij	}
1801171476Sdelphij	if (sfxge_tx_dpl_put_max < 0) {
1802171476Sdelphij		log(LOG_ERR, "%s=%d must be greater or equal to 0",
1803171476Sdelphij		    SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max);
1804171476Sdelphij		rc = EINVAL;
1805171476Sdelphij		goto fail_tx_dpl_put_max;
1806171476Sdelphij	}
1807171476Sdelphij
1808171476Sdelphij	/* Initialize the deferred packet list. */
1809171476Sdelphij	stdp = &txq->dpl;
1810171476Sdelphij	stdp->std_put_max = sfxge_tx_dpl_put_max;
1811171476Sdelphij	stdp->std_get_max = sfxge_tx_dpl_get_max;
1812171476Sdelphij	stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max;
1813171476Sdelphij	stdp->std_getp = &stdp->std_get;
1814171476Sdelphij
1815171476Sdelphij	SFXGE_TXQ_LOCK_INIT(txq, device_get_nameunit(sc->dev), txq_index);
1816171476Sdelphij
1817171476Sdelphij	dpl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1818171476Sdelphij				   "dpl", CTLFLAG_RD, NULL,
1819171476Sdelphij				   "Deferred packet list statistics");
1820171476Sdelphij	if (dpl_node == NULL) {
1821171476Sdelphij		rc = ENOMEM;
1822171476Sdelphij		goto fail_dpl_node;
1823171476Sdelphij	}
1824171476Sdelphij
1825171476Sdelphij	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1826171476Sdelphij			"get_count", CTLFLAG_RD | CTLFLAG_STATS,
1827169445Sroberto			&stdp->std_get_count, 0, "");
1828169445Sroberto	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1829169445Sroberto			"get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS,
1830169026Semax			&stdp->std_get_non_tcp_count, 0, "");
1831215669Snetchild	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1832168916Sbrueffer			"get_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1833168916Sbrueffer			&stdp->std_get_hiwat, 0, "");
1834168796Sthompsa	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1835168796Sthompsa			"put_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1836168544Spjd			&stdp->std_put_hiwat, 0, "");
1837168544Spjd
1838167980Sdelphij	rc = sfxge_txq_stat_init(txq, txq_node);
1839167980Sdelphij	if (rc != 0)
1840167699Sdelphij		goto fail_txq_stat_init;
1841167699Sdelphij
1842176956Santoine	txq->type = type;
1843167137Sbms	txq->evq_index = evq_index;
1844167137Sbms	txq->txq_index = txq_index;
1845170190Sru	txq->init_state = SFXGE_TXQ_INITIALIZED;
1846166981Sru	txq->hw_vlan_tci = 0;
1847166981Sru
1848170192Sru	return (0);
1849170218Struckman
1850166668Sbruefferfail_txq_stat_init:
1851166668Sbruefferfail_dpl_node:
1852166389Srafanfail_tx_dpl_put_max:
1853166389Srafanfail_tx_dpl_get_max:
1854166389Srafanfail3:
1855172882Srufail_txq_node:
1856172882Sru	free(txq->pend_desc, M_SFXGE);
1857172882Srufail2:
1858172882Sru	while (nmaps-- != 0)
1859170190Sru		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1860170190Sru	free(txq->stmp, M_SFXGE);
1861170190Sru	bus_dma_tag_destroy(txq->packet_dma_tag);
1862172882Sru
1863172882Srufail:
1864172882Sru	sfxge_dma_free(esmp);
1865166308Sphk
1866166308Sphk	return (rc);
1867170192Sru}
1868170192Sru
1869166246Speterstatic int
1870166246Spetersfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS)
1871166246Speter{
1872166246Speter	struct sfxge_softc *sc = arg1;
1873166246Speter	unsigned int id = arg2;
1874164796Spiso	unsigned long sum;
1875164796Spiso	unsigned int index;
1876164796Spiso
1877164796Spiso	/* Sum across all TX queues */
1878164796Spiso	sum = 0;
1879164796Spiso	for (index = 0; index < sc->txq_count; index++)
1880164796Spiso		sum += *(unsigned long *)((caddr_t)sc->txq[index] +
1881164796Spiso					  sfxge_tx_stats[id].offset);
1882164796Spiso
1883166672Sbrueffer	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1884164796Spiso}
1885164796Spiso
1886164796Spisostatic void
1887164796Spisosfxge_tx_stat_init(struct sfxge_softc *sc)
1888164796Spiso{
1889164796Spiso	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1890164796Spiso	struct sysctl_oid_list *stat_list;
1891164796Spiso	unsigned int id;
1892165726Skientzle
1893165726Skientzle	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1894164610Simp
1895164610Simp	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1896164537Srodrigc		SYSCTL_ADD_PROC(
1897164537Srodrigc			ctx, stat_list,
1898164537Srodrigc			OID_AUTO, sfxge_tx_stats[id].name,
1899164537Srodrigc			CTLTYPE_ULONG|CTLFLAG_RD,
1900164537Srodrigc			sc, id, sfxge_tx_stat_handler, "LU",
1901164537Srodrigc			"");
1902164537Srodrigc	}
1903170190Sru}
1904170190Sru
1905170190Sruuint64_t
1906170190Srusfxge_tx_get_drops(struct sfxge_softc *sc)
1907170190Sru{
1908170190Sru	unsigned int index;
1909164537Srodrigc	uint64_t drops = 0;
1910164537Srodrigc	struct sfxge_txq *txq;
1911164537Srodrigc
1912164537Srodrigc	/* Sum across all TX queues */
1913164537Srodrigc	for (index = 0; index < sc->txq_count; index++) {
1914164537Srodrigc		txq = sc->txq[index];
1915164344Sbrueffer		/*
1916164344Sbrueffer		 * In theory, txq->put_overflow and txq->netdown_drops
1917170220Struckman		 * should use atomic operation and other should be
1918170220Struckman		 * obtained under txq lock, but it is just statistics.
1919164088Smarcel		 */
1920164088Smarcel		drops += txq->drops + txq->get_overflow +
1921164088Smarcel			 txq->get_non_tcp_overflow +
1922164088Smarcel			 txq->put_overflow + txq->netdown_drops +
1923163570Sru			 txq->tso_pdrop_too_many + txq->tso_pdrop_no_rsrc;
1924163570Sru	}
1925162837Sdelphij	return (drops);
1926162837Sdelphij}
1927162780Sbms
1928162780Sbmsvoid
1929162780Sbmssfxge_tx_fini(struct sfxge_softc *sc)
1930162780Sbms{
1931162780Sbms	int index;
1932162780Sbms
1933162780Sbms	index = sc->txq_count;
1934162780Sbms	while (--index >= 0)
1935162780Sbms		sfxge_tx_qfini(sc, index);
1936162598Ssimon
1937162598Ssimon	sc->txq_count = 0;
1938162598Ssimon}
1939162716Sdelphij
1940162716Sdelphij
1941161529Sflzint
1942161529Sflzsfxge_tx_init(struct sfxge_softc *sc)
1943161529Sflz{
1944170192Sru	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
1945170192Sru	struct sfxge_intr *intr;
1946170192Sru	int index;
1947170192Sru	int rc;
1948170192Sru
1949170192Sru	intr = &sc->intr;
1950170192Sru
1951170192Sru	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1952170192Sru	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1953170192Sru
1954170192Sru	sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc;
1955170192Sru
1956170192Sru	sc->tso_fw_assisted = sfxge_tso_fw_assisted;
1957160983Sbrooks	if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) ||
1958160983Sbrooks	    (!encp->enc_fw_assisted_tso_enabled))
1959170255Struckman		sc->tso_fw_assisted &= ~SFXGE_FATSOV1;
1960170255Struckman	if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO_V2) ||
1961170255Struckman	    (!encp->enc_fw_assisted_tso_v2_enabled))
1962170255Struckman		sc->tso_fw_assisted &= ~SFXGE_FATSOV2;
1963158687Sphk
1964158687Sphk	sc->txqs_node = SYSCTL_ADD_NODE(
1965158687Sphk		device_get_sysctl_ctx(sc->dev),
1966158687Sphk		SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1967158687Sphk		OID_AUTO, "txq", CTLFLAG_RD, NULL, "Tx queues");
1968158687Sphk	if (sc->txqs_node == NULL) {
1969158687Sphk		rc = ENOMEM;
1970158687Sphk		goto fail_txq_node;
1971158687Sphk	}
1972158687Sphk
1973158687Sphk	/* Initialize the transmit queues */
1974158687Sphk	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM,
1975158687Sphk	    SFXGE_TXQ_NON_CKSUM, 0)) != 0)
1976158687Sphk		goto fail;
1977158687Sphk
1978158687Sphk	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM,
1979158687Sphk	    SFXGE_TXQ_IP_CKSUM, 0)) != 0)
1980158687Sphk		goto fail2;
1981158687Sphk
1982158687Sphk	for (index = 0;
1983158687Sphk	     index < sc->txq_count - SFXGE_TXQ_NTYPES + 1;
1984158687Sphk	     index++) {
1985158687Sphk		if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NTYPES - 1 + index,
1986158687Sphk		    SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0)
1987158687Sphk			goto fail3;
1988158687Sphk	}
1989158687Sphk
1990158687Sphk	sfxge_tx_stat_init(sc);
1991158687Sphk
1992158687Sphk	return (0);
1993158687Sphk
1994158687Sphkfail3:
1995158687Sphk	while (--index >= 0)
1996158687Sphk		sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index);
1997158687Sphk
1998158687Sphk	sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM);
1999158687Sphk
2000158687Sphkfail2:
2001158687Sphk	sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM);
2002158687Sphk
2003158687Sphkfail:
2004158687Sphkfail_txq_node:
2005158687Sphk	sc->txq_count = 0;
2006158687Sphk	return (rc);
2007158687Sphk}
2008158687Sphk