sfxge_tx.c revision 291924
1/*-
2 * Copyright (c) 2010-2015 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * This software was developed in part by Philip Paeps under contract for
6 * Solarflare Communications, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 *    this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * The views and conclusions contained in the software and documentation are
30 * those of the authors and should not be interpreted as representing official
31 * policies, either expressed or implied, of the FreeBSD Project.
32 */
33
34/* Theory of operation:
35 *
36 * Tx queues allocation and mapping
37 *
38 * One Tx queue with enabled checksum offload is allocated per Rx channel
39 * (event queue).  Also 2 Tx queues (one without checksum offload and one
40 * with IP checksum offload only) are allocated and bound to event queue 0.
41 * sfxge_txq_type is used as Tx queue label.
42 *
43 * So, event queue plus label mapping to Tx queue index is:
44 *	if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES)
45 *	else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1
46 * See sfxge_get_txq_by_label() sfxge_ev.c
47 */
48
49#include <sys/cdefs.h>
50__FBSDID("$FreeBSD: head/sys/dev/sfxge/sfxge_tx.c 291924 2015-12-07 06:04:24Z arybchik $");
51
52#include <sys/types.h>
53#include <sys/mbuf.h>
54#include <sys/smp.h>
55#include <sys/socket.h>
56#include <sys/sysctl.h>
57#include <sys/syslog.h>
58
59#include <net/bpf.h>
60#include <net/ethernet.h>
61#include <net/if.h>
62#include <net/if_vlan_var.h>
63
64#include <netinet/in.h>
65#include <netinet/ip.h>
66#include <netinet/ip6.h>
67#include <netinet/tcp.h>
68
69#include "common/efx.h"
70
71#include "sfxge.h"
72#include "sfxge_tx.h"
73
74
75#define	SFXGE_PARAM_TX_DPL_GET_MAX	SFXGE_PARAM(tx_dpl_get_max)
76static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
77TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max);
78SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN,
79	   &sfxge_tx_dpl_get_max, 0,
80	   "Maximum number of any packets in deferred packet get-list");
81
82#define	SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \
83	SFXGE_PARAM(tx_dpl_get_non_tcp_max)
84static int sfxge_tx_dpl_get_non_tcp_max =
85	SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT;
86TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max);
87SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN,
88	   &sfxge_tx_dpl_get_non_tcp_max, 0,
89	   "Maximum number of non-TCP packets in deferred packet get-list");
90
91#define	SFXGE_PARAM_TX_DPL_PUT_MAX	SFXGE_PARAM(tx_dpl_put_max)
92static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT;
93TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max);
94SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
95	   &sfxge_tx_dpl_put_max, 0,
96	   "Maximum number of any packets in deferred packet put-list");
97
98#define	SFXGE_PARAM_TSO_FW_ASSISTED	SFXGE_PARAM(tso_fw_assisted)
99static int sfxge_tso_fw_assisted = 1;
100TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted);
101SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN,
102	   &sfxge_tso_fw_assisted, 0,
103	   "Use FW-assisted TSO if supported by NIC firmware");
104
105
106static const struct {
107	const char *name;
108	size_t offset;
109} sfxge_tx_stats[] = {
110#define	SFXGE_TX_STAT(name, member) \
111	{ #name, offsetof(struct sfxge_txq, member) }
112	SFXGE_TX_STAT(tso_bursts, tso_bursts),
113	SFXGE_TX_STAT(tso_packets, tso_packets),
114	SFXGE_TX_STAT(tso_long_headers, tso_long_headers),
115	SFXGE_TX_STAT(tso_pdrop_too_many, tso_pdrop_too_many),
116	SFXGE_TX_STAT(tso_pdrop_no_rsrc, tso_pdrop_no_rsrc),
117	SFXGE_TX_STAT(tx_collapses, collapses),
118	SFXGE_TX_STAT(tx_drops, drops),
119	SFXGE_TX_STAT(tx_get_overflow, get_overflow),
120	SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow),
121	SFXGE_TX_STAT(tx_put_overflow, put_overflow),
122	SFXGE_TX_STAT(tx_netdown_drops, netdown_drops),
123};
124
125
126/* Forward declarations. */
127static void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
128static void sfxge_tx_qlist_post(struct sfxge_txq *txq);
129static void sfxge_tx_qunblock(struct sfxge_txq *txq);
130static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
131			      const bus_dma_segment_t *dma_seg, int n_dma_seg,
132			      int vlan_tagged);
133
134static int
135sfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf)
136{
137	uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ?
138			     mbuf->m_pkthdr.ether_vtag :
139			     0);
140
141	if (this_tag == txq->hw_vlan_tci)
142		return (0);
143
144	efx_tx_qdesc_vlantci_create(txq->common,
145				    bswap16(this_tag),
146				    &txq->pend_desc[0]);
147	txq->n_pend_desc = 1;
148	txq->hw_vlan_tci = this_tag;
149	return (1);
150}
151
152static inline void
153sfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp)
154{
155	KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0"));
156	if (__predict_false(*pstmp ==
157			    &txq->stmp[txq->ptr_mask]))
158		*pstmp = &txq->stmp[0];
159	else
160		(*pstmp)++;
161}
162
163
164void
165sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
166{
167	unsigned int completed;
168
169	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
170
171	completed = txq->completed;
172	while (completed != txq->pending) {
173		struct sfxge_tx_mapping *stmp;
174		unsigned int id;
175
176		id = completed++ & txq->ptr_mask;
177
178		stmp = &txq->stmp[id];
179		if (stmp->flags & TX_BUF_UNMAP) {
180			bus_dmamap_unload(txq->packet_dma_tag, stmp->map);
181			if (stmp->flags & TX_BUF_MBUF) {
182				struct mbuf *m = stmp->u.mbuf;
183				do
184					m = m_free(m);
185				while (m != NULL);
186			} else {
187				free(stmp->u.heap_buf, M_SFXGE);
188			}
189			stmp->flags = 0;
190		}
191	}
192	txq->completed = completed;
193
194	/* Check whether we need to unblock the queue. */
195	mb();
196	if (txq->blocked) {
197		unsigned int level;
198
199		level = txq->added - txq->completed;
200		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries))
201			sfxge_tx_qunblock(txq);
202	}
203}
204
205static unsigned int
206sfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
207{
208	/* Absense of TCP checksum flags does not mean that it is non-TCP
209	 * but it should be true if user wants to achieve high throughput.
210	 */
211	return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)));
212}
213
214/*
215 * Reorder the put list and append it to the get list.
216 */
217static void
218sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
219{
220	struct sfxge_tx_dpl *stdp;
221	struct mbuf *mbuf, *get_next, **get_tailp;
222	volatile uintptr_t *putp;
223	uintptr_t put;
224	unsigned int count;
225	unsigned int non_tcp_count;
226
227	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
228
229	stdp = &txq->dpl;
230
231	/* Acquire the put list. */
232	putp = &stdp->std_put;
233	put = atomic_readandclear_ptr(putp);
234	mbuf = (void *)put;
235
236	if (mbuf == NULL)
237		return;
238
239	/* Reverse the put list. */
240	get_tailp = &mbuf->m_nextpkt;
241	get_next = NULL;
242
243	count = 0;
244	non_tcp_count = 0;
245	do {
246		struct mbuf *put_next;
247
248		non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf);
249		put_next = mbuf->m_nextpkt;
250		mbuf->m_nextpkt = get_next;
251		get_next = mbuf;
252		mbuf = put_next;
253
254		count++;
255	} while (mbuf != NULL);
256
257	if (count > stdp->std_put_hiwat)
258		stdp->std_put_hiwat = count;
259
260	/* Append the reversed put list to the get list. */
261	KASSERT(*get_tailp == NULL, ("*get_tailp != NULL"));
262	*stdp->std_getp = get_next;
263	stdp->std_getp = get_tailp;
264	stdp->std_get_count += count;
265	stdp->std_get_non_tcp_count += non_tcp_count;
266}
267
268static void
269sfxge_tx_qreap(struct sfxge_txq *txq)
270{
271	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
272
273	txq->reaped = txq->completed;
274}
275
276static void
277sfxge_tx_qlist_post(struct sfxge_txq *txq)
278{
279	unsigned int old_added;
280	unsigned int block_level;
281	unsigned int level;
282	int rc;
283
284	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
285
286	KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0"));
287	KASSERT(txq->n_pend_desc <= txq->max_pkt_desc,
288		("txq->n_pend_desc too large"));
289	KASSERT(!txq->blocked, ("txq->blocked"));
290
291	old_added = txq->added;
292
293	/* Post the fragment list. */
294	rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc,
295			  txq->reaped, &txq->added);
296	KASSERT(rc == 0, ("efx_tx_qdesc_post() failed"));
297
298	/* If efx_tx_qdesc_post() had to refragment, our information about
299	 * buffers to free may be associated with the wrong
300	 * descriptors.
301	 */
302	KASSERT(txq->added - old_added == txq->n_pend_desc,
303		("efx_tx_qdesc_post() refragmented descriptors"));
304
305	level = txq->added - txq->reaped;
306	KASSERT(level <= txq->entries, ("overfilled TX queue"));
307
308	/* Clear the fragment list. */
309	txq->n_pend_desc = 0;
310
311	/*
312	 * Set the block level to ensure there is space to generate a
313	 * large number of descriptors for TSO.
314	 */
315	block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc;
316
317	/* Have we reached the block level? */
318	if (level < block_level)
319		return;
320
321	/* Reap, and check again */
322	sfxge_tx_qreap(txq);
323	level = txq->added - txq->reaped;
324	if (level < block_level)
325		return;
326
327	txq->blocked = 1;
328
329	/*
330	 * Avoid a race with completion interrupt handling that could leave
331	 * the queue blocked.
332	 */
333	mb();
334	sfxge_tx_qreap(txq);
335	level = txq->added - txq->reaped;
336	if (level < block_level) {
337		mb();
338		txq->blocked = 0;
339	}
340}
341
342static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
343{
344	bus_dmamap_t *used_map;
345	bus_dmamap_t map;
346	bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG];
347	unsigned int id;
348	struct sfxge_tx_mapping *stmp;
349	efx_desc_t *desc;
350	int n_dma_seg;
351	int rc;
352	int i;
353	int eop;
354	int vlan_tagged;
355
356	KASSERT(!txq->blocked, ("txq->blocked"));
357
358	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO)
359		prefetch_read_many(mbuf->m_data);
360
361	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) {
362		rc = EINTR;
363		goto reject;
364	}
365
366	/* Load the packet for DMA. */
367	id = txq->added & txq->ptr_mask;
368	stmp = &txq->stmp[id];
369	rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map,
370				     mbuf, dma_seg, &n_dma_seg, 0);
371	if (rc == EFBIG) {
372		/* Try again. */
373		struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT,
374						   SFXGE_TX_MAPPING_MAX_SEG);
375		if (new_mbuf == NULL)
376			goto reject;
377		++txq->collapses;
378		mbuf = new_mbuf;
379		rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag,
380					     stmp->map, mbuf,
381					     dma_seg, &n_dma_seg, 0);
382	}
383	if (rc != 0)
384		goto reject;
385
386	/* Make the packet visible to the hardware. */
387	bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE);
388
389	used_map = &stmp->map;
390
391	vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf);
392	if (vlan_tagged) {
393		sfxge_next_stmp(txq, &stmp);
394	}
395	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
396		rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged);
397		if (rc < 0)
398			goto reject_mapped;
399		stmp = &txq->stmp[(rc - 1) & txq->ptr_mask];
400	} else {
401		/* Add the mapping to the fragment list, and set flags
402		 * for the buffer.
403		 */
404
405		i = 0;
406		for (;;) {
407			desc = &txq->pend_desc[i + vlan_tagged];
408			eop = (i == n_dma_seg - 1);
409			efx_tx_qdesc_dma_create(txq->common,
410						dma_seg[i].ds_addr,
411						dma_seg[i].ds_len,
412						eop,
413						desc);
414			if (eop)
415				break;
416			i++;
417			sfxge_next_stmp(txq, &stmp);
418		}
419		txq->n_pend_desc = n_dma_seg + vlan_tagged;
420	}
421
422	/*
423	 * If the mapping required more than one descriptor
424	 * then we need to associate the DMA map with the last
425	 * descriptor, not the first.
426	 */
427	if (used_map != &stmp->map) {
428		map = stmp->map;
429		stmp->map = *used_map;
430		*used_map = map;
431	}
432
433	stmp->u.mbuf = mbuf;
434	stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF;
435
436	/* Post the fragment list. */
437	sfxge_tx_qlist_post(txq);
438
439	return (0);
440
441reject_mapped:
442	bus_dmamap_unload(txq->packet_dma_tag, *used_map);
443reject:
444	/* Drop the packet on the floor. */
445	m_freem(mbuf);
446	++txq->drops;
447
448	return (rc);
449}
450
451/*
452 * Drain the deferred packet list into the transmit queue.
453 */
454static void
455sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
456{
457	struct sfxge_softc *sc;
458	struct sfxge_tx_dpl *stdp;
459	struct mbuf *mbuf, *next;
460	unsigned int count;
461	unsigned int non_tcp_count;
462	unsigned int pushed;
463	int rc;
464
465	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
466
467	sc = txq->sc;
468	stdp = &txq->dpl;
469	pushed = txq->added;
470
471	if (__predict_true(txq->init_state == SFXGE_TXQ_STARTED)) {
472		prefetch_read_many(sc->enp);
473		prefetch_read_many(txq->common);
474	}
475
476	mbuf = stdp->std_get;
477	count = stdp->std_get_count;
478	non_tcp_count = stdp->std_get_non_tcp_count;
479
480	if (count > stdp->std_get_hiwat)
481		stdp->std_get_hiwat = count;
482
483	while (count != 0) {
484		KASSERT(mbuf != NULL, ("mbuf == NULL"));
485
486		next = mbuf->m_nextpkt;
487		mbuf->m_nextpkt = NULL;
488
489		ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */
490
491		if (next != NULL)
492			prefetch_read_many(next);
493
494		rc = sfxge_tx_queue_mbuf(txq, mbuf);
495		--count;
496		non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf);
497		mbuf = next;
498		if (rc != 0)
499			continue;
500
501		if (txq->blocked)
502			break;
503
504		/* Push the fragments to the hardware in batches. */
505		if (txq->added - pushed >= SFXGE_TX_BATCH) {
506			efx_tx_qpush(txq->common, txq->added, pushed);
507			pushed = txq->added;
508		}
509	}
510
511	if (count == 0) {
512		KASSERT(mbuf == NULL, ("mbuf != NULL"));
513		KASSERT(non_tcp_count == 0,
514			("inconsistent TCP/non-TCP detection"));
515		stdp->std_get = NULL;
516		stdp->std_get_count = 0;
517		stdp->std_get_non_tcp_count = 0;
518		stdp->std_getp = &stdp->std_get;
519	} else {
520		stdp->std_get = mbuf;
521		stdp->std_get_count = count;
522		stdp->std_get_non_tcp_count = non_tcp_count;
523	}
524
525	if (txq->added != pushed)
526		efx_tx_qpush(txq->common, txq->added, pushed);
527
528	KASSERT(txq->blocked || stdp->std_get_count == 0,
529		("queue unblocked but count is non-zero"));
530}
531
532#define	SFXGE_TX_QDPL_PENDING(_txq)	((_txq)->dpl.std_put != 0)
533
534/*
535 * Service the deferred packet list.
536 *
537 * NOTE: drops the txq mutex!
538 */
539static void
540sfxge_tx_qdpl_service(struct sfxge_txq *txq)
541{
542	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
543
544	do {
545		if (SFXGE_TX_QDPL_PENDING(txq))
546			sfxge_tx_qdpl_swizzle(txq);
547
548		if (!txq->blocked)
549			sfxge_tx_qdpl_drain(txq);
550
551		SFXGE_TXQ_UNLOCK(txq);
552	} while (SFXGE_TX_QDPL_PENDING(txq) &&
553		 SFXGE_TXQ_TRYLOCK(txq));
554}
555
556/*
557 * Put a packet on the deferred packet get-list.
558 */
559static int
560sfxge_tx_qdpl_put_locked(struct sfxge_txq *txq, struct mbuf *mbuf)
561{
562	struct sfxge_tx_dpl *stdp;
563
564	stdp = &txq->dpl;
565
566	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
567
568	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
569
570	if (stdp->std_get_count >= stdp->std_get_max) {
571		txq->get_overflow++;
572		return (ENOBUFS);
573	}
574	if (sfxge_is_mbuf_non_tcp(mbuf)) {
575		if (stdp->std_get_non_tcp_count >=
576		    stdp->std_get_non_tcp_max) {
577			txq->get_non_tcp_overflow++;
578			return (ENOBUFS);
579		}
580		stdp->std_get_non_tcp_count++;
581	}
582
583	*(stdp->std_getp) = mbuf;
584	stdp->std_getp = &mbuf->m_nextpkt;
585	stdp->std_get_count++;
586
587	return (0);
588}
589
590/*
591 * Put a packet on the deferred packet put-list.
592 *
593 * We overload the csum_data field in the mbuf to keep track of this length
594 * because there is no cheap alternative to avoid races.
595 */
596static int
597sfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf)
598{
599	struct sfxge_tx_dpl *stdp;
600	volatile uintptr_t *putp;
601	uintptr_t old;
602	uintptr_t new;
603	unsigned old_len;
604
605	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
606
607	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
608
609	stdp = &txq->dpl;
610	putp = &stdp->std_put;
611	new = (uintptr_t)mbuf;
612
613	do {
614		old = *putp;
615		if (old != 0) {
616			struct mbuf *mp = (struct mbuf *)old;
617			old_len = mp->m_pkthdr.csum_data;
618		} else
619			old_len = 0;
620		if (old_len >= stdp->std_put_max) {
621			atomic_add_long(&txq->put_overflow, 1);
622			return (ENOBUFS);
623		}
624		mbuf->m_pkthdr.csum_data = old_len + 1;
625		mbuf->m_nextpkt = (void *)old;
626	} while (atomic_cmpset_ptr(putp, old, new) == 0);
627
628	return (0);
629}
630
631/*
632 * Called from if_transmit - will try to grab the txq lock and enqueue to the
633 * put list if it succeeds, otherwise try to push onto the defer list if space.
634 */
635static int
636sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
637{
638	int rc;
639
640	if (!SFXGE_LINK_UP(txq->sc)) {
641		atomic_add_long(&txq->netdown_drops, 1);
642		return (ENETDOWN);
643	}
644
645	/*
646	 * Try to grab the txq lock.  If we are able to get the lock,
647	 * the packet will be appended to the "get list" of the deferred
648	 * packet list.  Otherwise, it will be pushed on the "put list".
649	 */
650	if (SFXGE_TXQ_TRYLOCK(txq)) {
651		/* First swizzle put-list to get-list to keep order */
652		sfxge_tx_qdpl_swizzle(txq);
653
654		rc = sfxge_tx_qdpl_put_locked(txq, m);
655
656		/* Try to service the list. */
657		sfxge_tx_qdpl_service(txq);
658		/* Lock has been dropped. */
659	} else {
660		rc = sfxge_tx_qdpl_put_unlocked(txq, m);
661
662		/*
663		 * Try to grab the lock again.
664		 *
665		 * If we are able to get the lock, we need to process
666		 * the deferred packet list.  If we are not able to get
667		 * the lock, another thread is processing the list.
668		 */
669		if ((rc == 0) && SFXGE_TXQ_TRYLOCK(txq)) {
670			sfxge_tx_qdpl_service(txq);
671			/* Lock has been dropped. */
672		}
673	}
674
675	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
676
677	return (rc);
678}
679
680static void
681sfxge_tx_qdpl_flush(struct sfxge_txq *txq)
682{
683	struct sfxge_tx_dpl *stdp = &txq->dpl;
684	struct mbuf *mbuf, *next;
685
686	SFXGE_TXQ_LOCK(txq);
687
688	sfxge_tx_qdpl_swizzle(txq);
689	for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) {
690		next = mbuf->m_nextpkt;
691		m_freem(mbuf);
692	}
693	stdp->std_get = NULL;
694	stdp->std_get_count = 0;
695	stdp->std_get_non_tcp_count = 0;
696	stdp->std_getp = &stdp->std_get;
697
698	SFXGE_TXQ_UNLOCK(txq);
699}
700
701void
702sfxge_if_qflush(struct ifnet *ifp)
703{
704	struct sfxge_softc *sc;
705	unsigned int i;
706
707	sc = ifp->if_softc;
708
709	for (i = 0; i < sc->txq_count; i++)
710		sfxge_tx_qdpl_flush(sc->txq[i]);
711}
712
713#if SFXGE_TX_PARSE_EARLY
714
715/* There is little space for user data in mbuf pkthdr, so we
716 * use l*hlen fields which are not used by the driver otherwise
717 * to store header offsets.
718 * The fields are 8-bit, but it's ok, no header may be longer than 255 bytes.
719 */
720
721
722#define TSO_MBUF_PROTO(_mbuf)    ((_mbuf)->m_pkthdr.PH_loc.sixteen[0])
723/* We abuse l5hlen here because PH_loc can hold only 64 bits of data */
724#define TSO_MBUF_FLAGS(_mbuf)    ((_mbuf)->m_pkthdr.l5hlen)
725#define TSO_MBUF_PACKETID(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1])
726#define TSO_MBUF_SEQNUM(_mbuf)   ((_mbuf)->m_pkthdr.PH_loc.thirtytwo[1])
727
728static void sfxge_parse_tx_packet(struct mbuf *mbuf)
729{
730	struct ether_header *eh = mtod(mbuf, struct ether_header *);
731	const struct tcphdr *th;
732	struct tcphdr th_copy;
733
734	/* Find network protocol and header */
735	TSO_MBUF_PROTO(mbuf) = eh->ether_type;
736	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_VLAN)) {
737		struct ether_vlan_header *veh =
738			mtod(mbuf, struct ether_vlan_header *);
739		TSO_MBUF_PROTO(mbuf) = veh->evl_proto;
740		mbuf->m_pkthdr.l2hlen = sizeof(*veh);
741	} else {
742		mbuf->m_pkthdr.l2hlen = sizeof(*eh);
743	}
744
745	/* Find TCP header */
746	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IP)) {
747		const struct ip *iph = (const struct ip *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen);
748
749		KASSERT(iph->ip_p == IPPROTO_TCP,
750			("TSO required on non-TCP packet"));
751		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + 4 * iph->ip_hl;
752		TSO_MBUF_PACKETID(mbuf) = iph->ip_id;
753	} else {
754		KASSERT(TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IPV6),
755			("TSO required on non-IP packet"));
756		KASSERT(((const struct ip6_hdr *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen))->ip6_nxt ==
757			IPPROTO_TCP,
758			("TSO required on non-TCP packet"));
759		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + sizeof(struct ip6_hdr);
760		TSO_MBUF_PACKETID(mbuf) = 0;
761	}
762
763	KASSERT(mbuf->m_len >= mbuf->m_pkthdr.l3hlen,
764		("network header is fragmented in mbuf"));
765
766	/* We need TCP header including flags (window is the next) */
767	if (mbuf->m_len < mbuf->m_pkthdr.l3hlen + offsetof(struct tcphdr, th_win)) {
768		m_copydata(mbuf, mbuf->m_pkthdr.l3hlen, sizeof(th_copy),
769			   (caddr_t)&th_copy);
770		th = &th_copy;
771	} else {
772		th = (const struct tcphdr *)mtodo(mbuf, mbuf->m_pkthdr.l3hlen);
773	}
774
775	mbuf->m_pkthdr.l4hlen = mbuf->m_pkthdr.l3hlen + 4 * th->th_off;
776	TSO_MBUF_SEQNUM(mbuf) = ntohl(th->th_seq);
777
778	/* These flags must not be duplicated */
779	/*
780	 * RST should not be duplicated as well, but FreeBSD kernel
781	 * generates TSO packets with RST flag. So, do not assert
782	 * its absence.
783	 */
784	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
785		("incompatible TCP flag 0x%x on TSO packet",
786		 th->th_flags & (TH_URG | TH_SYN)));
787	TSO_MBUF_FLAGS(mbuf) = th->th_flags;
788}
789#endif
790
791/*
792 * TX start -- called by the stack.
793 */
794int
795sfxge_if_transmit(struct ifnet *ifp, struct mbuf *m)
796{
797	struct sfxge_softc *sc;
798	struct sfxge_txq *txq;
799	int rc;
800
801	sc = (struct sfxge_softc *)ifp->if_softc;
802
803	/*
804	 * Transmit may be called when interface is up from the kernel
805	 * point of view, but not yet up (in progress) from the driver
806	 * point of view. I.e. link aggregation bring up.
807	 * Transmit may be called when interface is up from the driver
808	 * point of view, but already down from the kernel point of
809	 * view. I.e. Rx when interface shutdown is in progress.
810	 */
811	KASSERT((ifp->if_flags & IFF_UP) || (sc->if_flags & IFF_UP),
812		("interface not up"));
813
814	/* Pick the desired transmit queue. */
815	if (m->m_pkthdr.csum_flags &
816	    (CSUM_DELAY_DATA | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO)) {
817		int index = 0;
818
819		/* check if flowid is set */
820		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
821			uint32_t hash = m->m_pkthdr.flowid;
822
823			index = sc->rx_indir_table[hash % SFXGE_RX_SCALE_MAX];
824		}
825#if SFXGE_TX_PARSE_EARLY
826		if (m->m_pkthdr.csum_flags & CSUM_TSO)
827			sfxge_parse_tx_packet(m);
828#endif
829		txq = sc->txq[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index];
830	} else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) {
831		txq = sc->txq[SFXGE_TXQ_IP_CKSUM];
832	} else {
833		txq = sc->txq[SFXGE_TXQ_NON_CKSUM];
834	}
835
836	rc = sfxge_tx_packet_add(txq, m);
837	if (rc != 0)
838		m_freem(m);
839
840	return (rc);
841}
842
843/*
844 * Software "TSO".  Not quite as good as doing it in hardware, but
845 * still faster than segmenting in the stack.
846 */
847
848struct sfxge_tso_state {
849	/* Output position */
850	unsigned out_len;	/* Remaining length in current segment */
851	unsigned seqnum;	/* Current sequence number */
852	unsigned packet_space;	/* Remaining space in current packet */
853
854	/* Input position */
855	uint64_t dma_addr;	/* DMA address of current position */
856	unsigned in_len;	/* Remaining length in current mbuf */
857
858	const struct mbuf *mbuf; /* Input mbuf (head of chain) */
859	u_short protocol;	/* Network protocol (after VLAN decap) */
860	ssize_t nh_off;		/* Offset of network header */
861	ssize_t tcph_off;	/* Offset of TCP header */
862	unsigned header_len;	/* Number of bytes of header */
863	unsigned seg_size;	/* TCP segment size */
864	int fw_assisted;	/* Use FW-assisted TSO */
865	u_short packet_id;	/* IPv4 packet ID from the original packet */
866	uint8_t tcp_flags;	/* TCP flags */
867	efx_desc_t header_desc; /* Precomputed header descriptor for
868				 * FW-assisted TSO */
869};
870
871#if !SFXGE_TX_PARSE_EARLY
872static const struct ip *tso_iph(const struct sfxge_tso_state *tso)
873{
874	KASSERT(tso->protocol == htons(ETHERTYPE_IP),
875		("tso_iph() in non-IPv4 state"));
876	return (const struct ip *)(tso->mbuf->m_data + tso->nh_off);
877}
878
879static __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
880{
881	KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
882		("tso_ip6h() in non-IPv6 state"));
883	return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off);
884}
885
886static const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
887{
888	return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off);
889}
890#endif
891
892
893/* Size of preallocated TSO header buffers.  Larger blocks must be
894 * allocated from the heap.
895 */
896#define	TSOH_STD_SIZE	128
897
898/* At most half the descriptors in the queue at any time will refer to
899 * a TSO header buffer, since they must always be followed by a
900 * payload descriptor referring to an mbuf.
901 */
902#define	TSOH_COUNT(_txq_entries)	((_txq_entries) / 2u)
903#define	TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
904#define	TSOH_PAGE_COUNT(_txq_entries)	\
905	((TSOH_COUNT(_txq_entries) + TSOH_PER_PAGE - 1) / TSOH_PER_PAGE)
906
907static int tso_init(struct sfxge_txq *txq)
908{
909	struct sfxge_softc *sc = txq->sc;
910	unsigned int tsoh_page_count = TSOH_PAGE_COUNT(sc->txq_entries);
911	int i, rc;
912
913	/* Allocate TSO header buffers */
914	txq->tsoh_buffer = malloc(tsoh_page_count * sizeof(txq->tsoh_buffer[0]),
915				  M_SFXGE, M_WAITOK);
916
917	for (i = 0; i < tsoh_page_count; i++) {
918		rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]);
919		if (rc != 0)
920			goto fail;
921	}
922
923	return (0);
924
925fail:
926	while (i-- > 0)
927		sfxge_dma_free(&txq->tsoh_buffer[i]);
928	free(txq->tsoh_buffer, M_SFXGE);
929	txq->tsoh_buffer = NULL;
930	return (rc);
931}
932
933static void tso_fini(struct sfxge_txq *txq)
934{
935	int i;
936
937	if (txq->tsoh_buffer != NULL) {
938		for (i = 0; i < TSOH_PAGE_COUNT(txq->sc->txq_entries); i++)
939			sfxge_dma_free(&txq->tsoh_buffer[i]);
940		free(txq->tsoh_buffer, M_SFXGE);
941	}
942}
943
944static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso,
945		      const bus_dma_segment_t *hdr_dma_seg,
946		      struct mbuf *mbuf)
947{
948	const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp);
949#if !SFXGE_TX_PARSE_EARLY
950	struct ether_header *eh = mtod(mbuf, struct ether_header *);
951	const struct tcphdr *th;
952	struct tcphdr th_copy;
953#endif
954
955	tso->fw_assisted = txq->sc->tso_fw_assisted;
956	tso->mbuf = mbuf;
957
958	/* Find network protocol and header */
959#if !SFXGE_TX_PARSE_EARLY
960	tso->protocol = eh->ether_type;
961	if (tso->protocol == htons(ETHERTYPE_VLAN)) {
962		struct ether_vlan_header *veh =
963			mtod(mbuf, struct ether_vlan_header *);
964		tso->protocol = veh->evl_proto;
965		tso->nh_off = sizeof(*veh);
966	} else {
967		tso->nh_off = sizeof(*eh);
968	}
969#else
970	tso->protocol = TSO_MBUF_PROTO(mbuf);
971	tso->nh_off = mbuf->m_pkthdr.l2hlen;
972	tso->tcph_off = mbuf->m_pkthdr.l3hlen;
973	tso->packet_id = TSO_MBUF_PACKETID(mbuf);
974#endif
975
976#if !SFXGE_TX_PARSE_EARLY
977	/* Find TCP header */
978	if (tso->protocol == htons(ETHERTYPE_IP)) {
979		KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP,
980			("TSO required on non-TCP packet"));
981		tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl;
982		tso->packet_id = tso_iph(tso)->ip_id;
983	} else {
984		KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
985			("TSO required on non-IP packet"));
986		KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP,
987			("TSO required on non-TCP packet"));
988		tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr);
989		tso->packet_id = 0;
990	}
991#endif
992
993
994	if (tso->fw_assisted &&
995	    __predict_false(tso->tcph_off >
996			    encp->enc_tx_tso_tcp_header_offset_limit)) {
997		tso->fw_assisted = 0;
998	}
999
1000
1001#if !SFXGE_TX_PARSE_EARLY
1002	KASSERT(mbuf->m_len >= tso->tcph_off,
1003		("network header is fragmented in mbuf"));
1004	/* We need TCP header including flags (window is the next) */
1005	if (mbuf->m_len < tso->tcph_off + offsetof(struct tcphdr, th_win)) {
1006		m_copydata(tso->mbuf, tso->tcph_off, sizeof(th_copy),
1007			   (caddr_t)&th_copy);
1008		th = &th_copy;
1009	} else {
1010		th = tso_tcph(tso);
1011	}
1012	tso->header_len = tso->tcph_off + 4 * th->th_off;
1013#else
1014	tso->header_len = mbuf->m_pkthdr.l4hlen;
1015#endif
1016	tso->seg_size = mbuf->m_pkthdr.tso_segsz;
1017
1018#if !SFXGE_TX_PARSE_EARLY
1019	tso->seqnum = ntohl(th->th_seq);
1020
1021	/* These flags must not be duplicated */
1022	/*
1023	 * RST should not be duplicated as well, but FreeBSD kernel
1024	 * generates TSO packets with RST flag. So, do not assert
1025	 * its absence.
1026	 */
1027	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
1028		("incompatible TCP flag 0x%x on TSO packet",
1029		 th->th_flags & (TH_URG | TH_SYN)));
1030	tso->tcp_flags = th->th_flags;
1031#else
1032	tso->seqnum = TSO_MBUF_SEQNUM(mbuf);
1033	tso->tcp_flags = TSO_MBUF_FLAGS(mbuf);
1034#endif
1035
1036	tso->out_len = mbuf->m_pkthdr.len - tso->header_len;
1037
1038	if (tso->fw_assisted) {
1039		if (hdr_dma_seg->ds_len >= tso->header_len)
1040			efx_tx_qdesc_dma_create(txq->common,
1041						hdr_dma_seg->ds_addr,
1042						tso->header_len,
1043						B_FALSE,
1044						&tso->header_desc);
1045		else
1046			tso->fw_assisted = 0;
1047	}
1048}
1049
1050/*
1051 * tso_fill_packet_with_fragment - form descriptors for the current fragment
1052 *
1053 * Form descriptors for the current fragment, until we reach the end
1054 * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
1055 * space.
1056 */
1057static void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
1058					  struct sfxge_tso_state *tso)
1059{
1060	efx_desc_t *desc;
1061	int n;
1062
1063	if (tso->in_len == 0 || tso->packet_space == 0)
1064		return;
1065
1066	KASSERT(tso->in_len > 0, ("TSO input length went negative"));
1067	KASSERT(tso->packet_space > 0, ("TSO packet space went negative"));
1068
1069	n = min(tso->in_len, tso->packet_space);
1070
1071	tso->packet_space -= n;
1072	tso->out_len -= n;
1073	tso->in_len -= n;
1074
1075	desc = &txq->pend_desc[txq->n_pend_desc++];
1076	efx_tx_qdesc_dma_create(txq->common,
1077				tso->dma_addr,
1078				n,
1079				tso->out_len == 0 || tso->packet_space == 0,
1080				desc);
1081
1082	tso->dma_addr += n;
1083}
1084
1085/* Callback from bus_dmamap_load() for long TSO headers. */
1086static void tso_map_long_header(void *dma_addr_ret,
1087				bus_dma_segment_t *segs, int nseg,
1088				int error)
1089{
1090	*(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) &&
1091				      __predict_true(nseg == 1)) ?
1092				     segs->ds_addr : 0);
1093}
1094
1095/*
1096 * tso_start_new_packet - generate a new header and prepare for the new packet
1097 *
1098 * Generate a new header and prepare for the new packet.  Return 0 on
1099 * success, or an error code if failed to alloc header.
1100 */
1101static int tso_start_new_packet(struct sfxge_txq *txq,
1102				struct sfxge_tso_state *tso,
1103				unsigned int *idp)
1104{
1105	unsigned int id = *idp;
1106	struct tcphdr *tsoh_th;
1107	unsigned ip_length;
1108	caddr_t header;
1109	uint64_t dma_addr;
1110	bus_dmamap_t map;
1111	efx_desc_t *desc;
1112	int rc;
1113
1114	if (tso->fw_assisted) {
1115		uint8_t tcp_flags = tso->tcp_flags;
1116
1117		if (tso->out_len > tso->seg_size)
1118			tcp_flags &= ~(TH_FIN | TH_PUSH);
1119
1120		/* TSO option descriptor */
1121		desc = &txq->pend_desc[txq->n_pend_desc++];
1122		efx_tx_qdesc_tso_create(txq->common,
1123					tso->packet_id,
1124					tso->seqnum,
1125					tcp_flags,
1126					desc++);
1127		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1128		id = (id + 1) & txq->ptr_mask;
1129
1130		/* Header DMA descriptor */
1131		*desc = tso->header_desc;
1132		txq->n_pend_desc++;
1133		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1134		id = (id + 1) & txq->ptr_mask;
1135
1136		tso->seqnum += tso->seg_size;
1137	} else {
1138		/* Allocate a DMA-mapped header buffer. */
1139		if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
1140			unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
1141			unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
1142
1143			header = (txq->tsoh_buffer[page_index].esm_base +
1144				  buf_index * TSOH_STD_SIZE);
1145			dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
1146				    buf_index * TSOH_STD_SIZE);
1147			map = txq->tsoh_buffer[page_index].esm_map;
1148
1149			KASSERT(txq->stmp[id].flags == 0,
1150				("stmp flags are not 0"));
1151		} else {
1152			struct sfxge_tx_mapping *stmp = &txq->stmp[id];
1153
1154			/* We cannot use bus_dmamem_alloc() as that may sleep */
1155			header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
1156			if (__predict_false(!header))
1157				return (ENOMEM);
1158			rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
1159					     header, tso->header_len,
1160					     tso_map_long_header, &dma_addr,
1161					     BUS_DMA_NOWAIT);
1162			if (__predict_false(dma_addr == 0)) {
1163				if (rc == 0) {
1164					/* Succeeded but got >1 segment */
1165					bus_dmamap_unload(txq->packet_dma_tag,
1166							  stmp->map);
1167					rc = EINVAL;
1168				}
1169				free(header, M_SFXGE);
1170				return (rc);
1171			}
1172			map = stmp->map;
1173
1174			txq->tso_long_headers++;
1175			stmp->u.heap_buf = header;
1176			stmp->flags = TX_BUF_UNMAP;
1177		}
1178
1179		tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
1180
1181		/* Copy and update the headers. */
1182		m_copydata(tso->mbuf, 0, tso->header_len, header);
1183
1184		tsoh_th->th_seq = htonl(tso->seqnum);
1185		tso->seqnum += tso->seg_size;
1186		if (tso->out_len > tso->seg_size) {
1187			/* This packet will not finish the TSO burst. */
1188			ip_length = tso->header_len - tso->nh_off + tso->seg_size;
1189			tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
1190		} else {
1191			/* This packet will be the last in the TSO burst. */
1192			ip_length = tso->header_len - tso->nh_off + tso->out_len;
1193		}
1194
1195		if (tso->protocol == htons(ETHERTYPE_IP)) {
1196			struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
1197			tsoh_iph->ip_len = htons(ip_length);
1198			/* XXX We should increment ip_id, but FreeBSD doesn't
1199			 * currently allocate extra IDs for multiple segments.
1200			 */
1201		} else {
1202			struct ip6_hdr *tsoh_iph =
1203				(struct ip6_hdr *)(header + tso->nh_off);
1204			tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
1205		}
1206
1207		/* Make the header visible to the hardware. */
1208		bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
1209
1210		/* Form a descriptor for this header. */
1211		desc = &txq->pend_desc[txq->n_pend_desc++];
1212		efx_tx_qdesc_dma_create(txq->common,
1213					dma_addr,
1214					tso->header_len,
1215					0,
1216					desc);
1217		id = (id + 1) & txq->ptr_mask;
1218	}
1219	tso->packet_space = tso->seg_size;
1220	txq->tso_packets++;
1221	*idp = id;
1222
1223	return (0);
1224}
1225
1226static int
1227sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
1228		   const bus_dma_segment_t *dma_seg, int n_dma_seg,
1229		   int vlan_tagged)
1230{
1231	struct sfxge_tso_state tso;
1232	unsigned int id;
1233	unsigned skipped = 0;
1234
1235	tso_start(txq, &tso, dma_seg, mbuf);
1236
1237	while (dma_seg->ds_len + skipped <= tso.header_len) {
1238		skipped += dma_seg->ds_len;
1239		--n_dma_seg;
1240		KASSERT(n_dma_seg, ("no payload found in TSO packet"));
1241		++dma_seg;
1242	}
1243	tso.in_len = dma_seg->ds_len - (tso.header_len - skipped);
1244	tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped);
1245
1246	id = (txq->added + vlan_tagged) & txq->ptr_mask;
1247	if (__predict_false(tso_start_new_packet(txq, &tso, &id)))
1248		return (-1);
1249
1250	while (1) {
1251		tso_fill_packet_with_fragment(txq, &tso);
1252		/* Exactly one DMA descriptor is added */
1253		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1254		id = (id + 1) & txq->ptr_mask;
1255
1256		/* Move onto the next fragment? */
1257		if (tso.in_len == 0) {
1258			--n_dma_seg;
1259			if (n_dma_seg == 0)
1260				break;
1261			++dma_seg;
1262			tso.in_len = dma_seg->ds_len;
1263			tso.dma_addr = dma_seg->ds_addr;
1264		}
1265
1266		/* End of packet? */
1267		if (tso.packet_space == 0) {
1268			/* If the queue is now full due to tiny MSS,
1269			 * or we can't create another header, discard
1270			 * the remainder of the input mbuf but do not
1271			 * roll back the work we have done.
1272			 */
1273			if (txq->n_pend_desc + tso.fw_assisted +
1274			    1 /* header */ + n_dma_seg >
1275			    txq->max_pkt_desc) {
1276				txq->tso_pdrop_too_many++;
1277				break;
1278			}
1279			if (__predict_false(tso_start_new_packet(txq, &tso,
1280								 &id))) {
1281				txq->tso_pdrop_no_rsrc++;
1282				break;
1283			}
1284		}
1285	}
1286
1287	txq->tso_bursts++;
1288	return (id);
1289}
1290
1291static void
1292sfxge_tx_qunblock(struct sfxge_txq *txq)
1293{
1294	struct sfxge_softc *sc;
1295	struct sfxge_evq *evq;
1296
1297	sc = txq->sc;
1298	evq = sc->evq[txq->evq_index];
1299
1300	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
1301
1302	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED))
1303		return;
1304
1305	SFXGE_TXQ_LOCK(txq);
1306
1307	if (txq->blocked) {
1308		unsigned int level;
1309
1310		level = txq->added - txq->completed;
1311		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) {
1312			/* reaped must be in sync with blocked */
1313			sfxge_tx_qreap(txq);
1314			txq->blocked = 0;
1315		}
1316	}
1317
1318	sfxge_tx_qdpl_service(txq);
1319	/* note: lock has been dropped */
1320}
1321
1322void
1323sfxge_tx_qflush_done(struct sfxge_txq *txq)
1324{
1325
1326	txq->flush_state = SFXGE_FLUSH_DONE;
1327}
1328
1329static void
1330sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index)
1331{
1332	struct sfxge_txq *txq;
1333	struct sfxge_evq *evq;
1334	unsigned int count;
1335
1336	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1337
1338	txq = sc->txq[index];
1339	evq = sc->evq[txq->evq_index];
1340
1341	SFXGE_EVQ_LOCK(evq);
1342	SFXGE_TXQ_LOCK(txq);
1343
1344	KASSERT(txq->init_state == SFXGE_TXQ_STARTED,
1345	    ("txq->init_state != SFXGE_TXQ_STARTED"));
1346
1347	txq->init_state = SFXGE_TXQ_INITIALIZED;
1348
1349	if (txq->flush_state != SFXGE_FLUSH_DONE) {
1350		txq->flush_state = SFXGE_FLUSH_PENDING;
1351
1352		SFXGE_EVQ_UNLOCK(evq);
1353		SFXGE_TXQ_UNLOCK(txq);
1354
1355		/* Flush the transmit queue. */
1356		if (efx_tx_qflush(txq->common) != 0) {
1357			log(LOG_ERR, "%s: Flushing Tx queue %u failed\n",
1358			    device_get_nameunit(sc->dev), index);
1359			txq->flush_state = SFXGE_FLUSH_DONE;
1360		} else {
1361			count = 0;
1362			do {
1363				/* Spin for 100ms. */
1364				DELAY(100000);
1365				if (txq->flush_state != SFXGE_FLUSH_PENDING)
1366					break;
1367			} while (++count < 20);
1368		}
1369		SFXGE_EVQ_LOCK(evq);
1370		SFXGE_TXQ_LOCK(txq);
1371
1372		KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
1373		    ("txq->flush_state == SFXGE_FLUSH_FAILED"));
1374
1375		if (txq->flush_state != SFXGE_FLUSH_DONE) {
1376			/* Flush timeout */
1377			log(LOG_ERR, "%s: Cannot flush Tx queue %u\n",
1378			    device_get_nameunit(sc->dev), index);
1379			txq->flush_state = SFXGE_FLUSH_DONE;
1380		}
1381	}
1382
1383	txq->blocked = 0;
1384	txq->pending = txq->added;
1385
1386	sfxge_tx_qcomplete(txq, evq);
1387	KASSERT(txq->completed == txq->added,
1388	    ("txq->completed != txq->added"));
1389
1390	sfxge_tx_qreap(txq);
1391	KASSERT(txq->reaped == txq->completed,
1392	    ("txq->reaped != txq->completed"));
1393
1394	txq->added = 0;
1395	txq->pending = 0;
1396	txq->completed = 0;
1397	txq->reaped = 0;
1398
1399	/* Destroy the common code transmit queue. */
1400	efx_tx_qdestroy(txq->common);
1401	txq->common = NULL;
1402
1403	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1404	    EFX_TXQ_NBUFS(sc->txq_entries));
1405
1406	SFXGE_EVQ_UNLOCK(evq);
1407	SFXGE_TXQ_UNLOCK(txq);
1408}
1409
1410static int
1411sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
1412{
1413	struct sfxge_txq *txq;
1414	efsys_mem_t *esmp;
1415	uint16_t flags;
1416	struct sfxge_evq *evq;
1417	unsigned int desc_index;
1418	int rc;
1419
1420	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1421
1422	txq = sc->txq[index];
1423	esmp = &txq->mem;
1424	evq = sc->evq[txq->evq_index];
1425
1426	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1427	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1428	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1429	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1430
1431	/* Program the buffer table. */
1432	if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp,
1433	    EFX_TXQ_NBUFS(sc->txq_entries))) != 0)
1434		return (rc);
1435
1436	/* Determine the kind of queue we are creating. */
1437	switch (txq->type) {
1438	case SFXGE_TXQ_NON_CKSUM:
1439		flags = 0;
1440		break;
1441	case SFXGE_TXQ_IP_CKSUM:
1442		flags = EFX_TXQ_CKSUM_IPV4;
1443		break;
1444	case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1445		flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP;
1446		break;
1447	default:
1448		KASSERT(0, ("Impossible TX queue"));
1449		flags = 0;
1450		break;
1451	}
1452
1453	/* Create the common code transmit queue. */
1454	if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1455	    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1456	    &txq->common, &desc_index)) != 0)
1457		goto fail;
1458
1459	/* Initialise queue descriptor indexes */
1460	txq->added = txq->pending = txq->completed = txq->reaped = desc_index;
1461
1462	SFXGE_TXQ_LOCK(txq);
1463
1464	/* Enable the transmit queue. */
1465	efx_tx_qenable(txq->common);
1466
1467	txq->init_state = SFXGE_TXQ_STARTED;
1468	txq->flush_state = SFXGE_FLUSH_REQUIRED;
1469
1470	SFXGE_TXQ_UNLOCK(txq);
1471
1472	return (0);
1473
1474fail:
1475	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1476	    EFX_TXQ_NBUFS(sc->txq_entries));
1477	return (rc);
1478}
1479
1480void
1481sfxge_tx_stop(struct sfxge_softc *sc)
1482{
1483	int index;
1484
1485	index = sc->txq_count;
1486	while (--index >= 0)
1487		sfxge_tx_qstop(sc, index);
1488
1489	/* Tear down the transmit module */
1490	efx_tx_fini(sc->enp);
1491}
1492
1493int
1494sfxge_tx_start(struct sfxge_softc *sc)
1495{
1496	int index;
1497	int rc;
1498
1499	/* Initialize the common code transmit module. */
1500	if ((rc = efx_tx_init(sc->enp)) != 0)
1501		return (rc);
1502
1503	for (index = 0; index < sc->txq_count; index++) {
1504		if ((rc = sfxge_tx_qstart(sc, index)) != 0)
1505			goto fail;
1506	}
1507
1508	return (0);
1509
1510fail:
1511	while (--index >= 0)
1512		sfxge_tx_qstop(sc, index);
1513
1514	efx_tx_fini(sc->enp);
1515
1516	return (rc);
1517}
1518
1519static int
1520sfxge_txq_stat_init(struct sfxge_txq *txq, struct sysctl_oid *txq_node)
1521{
1522	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(txq->sc->dev);
1523	struct sysctl_oid *stat_node;
1524	unsigned int id;
1525
1526	stat_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1527				    "stats", CTLFLAG_RD, NULL,
1528				    "Tx queue statistics");
1529	if (stat_node == NULL)
1530		return (ENOMEM);
1531
1532	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1533		SYSCTL_ADD_ULONG(
1534		    ctx, SYSCTL_CHILDREN(stat_node), OID_AUTO,
1535		    sfxge_tx_stats[id].name, CTLFLAG_RD | CTLFLAG_STATS,
1536		    (unsigned long *)((caddr_t)txq + sfxge_tx_stats[id].offset),
1537		    "");
1538	}
1539
1540	return (0);
1541}
1542
1543/**
1544 * Destroy a transmit queue.
1545 */
1546static void
1547sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index)
1548{
1549	struct sfxge_txq *txq;
1550	unsigned int nmaps;
1551
1552	txq = sc->txq[index];
1553
1554	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1555	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1556
1557	if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM)
1558		tso_fini(txq);
1559
1560	/* Free the context arrays. */
1561	free(txq->pend_desc, M_SFXGE);
1562	nmaps = sc->txq_entries;
1563	while (nmaps-- != 0)
1564		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1565	free(txq->stmp, M_SFXGE);
1566
1567	/* Release DMA memory mapping. */
1568	sfxge_dma_free(&txq->mem);
1569
1570	sc->txq[index] = NULL;
1571
1572	SFXGE_TXQ_LOCK_DESTROY(txq);
1573
1574	free(txq, M_SFXGE);
1575}
1576
1577/*
1578 * Estimate maximum number of Tx descriptors required for TSO packet.
1579 * With minimum MSS and maximum mbuf length we might need more (even
1580 * than a ring-ful of descriptors), but this should not happen in
1581 * practice except due to deliberate attack.  In that case we will
1582 * truncate the output at a packet boundary.
1583 */
1584static unsigned int
1585sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type)
1586{
1587	/* One descriptor for every input fragment */
1588	unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG;
1589
1590	/* VLAN tagging Tx option descriptor may be required */
1591	if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled)
1592		max_descs++;
1593
1594	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) {
1595		/*
1596		 * Plus header and payload descriptor for each output segment.
1597		 * Minus one since header fragment is already counted.
1598		 */
1599		max_descs += SFXGE_TSO_MAX_SEGS * 2 - 1;
1600
1601		/* FW assisted TSO requires one more descriptor per segment */
1602		if (sc->tso_fw_assisted)
1603			max_descs += SFXGE_TSO_MAX_SEGS;
1604	}
1605
1606	return (max_descs);
1607}
1608
1609static int
1610sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
1611	       enum sfxge_txq_type type, unsigned int evq_index)
1612{
1613	char name[16];
1614	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1615	struct sysctl_oid *txq_node;
1616	struct sfxge_txq *txq;
1617	struct sfxge_evq *evq;
1618	struct sfxge_tx_dpl *stdp;
1619	struct sysctl_oid *dpl_node;
1620	efsys_mem_t *esmp;
1621	unsigned int nmaps;
1622	int rc;
1623
1624	txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK);
1625	txq->sc = sc;
1626	txq->entries = sc->txq_entries;
1627	txq->ptr_mask = txq->entries - 1;
1628
1629	sc->txq[txq_index] = txq;
1630	esmp = &txq->mem;
1631
1632	evq = sc->evq[evq_index];
1633
1634	/* Allocate and zero DMA space for the descriptor ring. */
1635	if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(sc->txq_entries), esmp)) != 0)
1636		return (rc);
1637
1638	/* Allocate buffer table entries. */
1639	sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(sc->txq_entries),
1640				 &txq->buf_base_id);
1641
1642	/* Create a DMA tag for packet mappings. */
1643	if (bus_dma_tag_create(sc->parent_dma_tag, 1, 0x1000,
1644	    MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL,
1645	    NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG, 0x1000, 0, NULL, NULL,
1646	    &txq->packet_dma_tag) != 0) {
1647		device_printf(sc->dev, "Couldn't allocate txq DMA tag\n");
1648		rc = ENOMEM;
1649		goto fail;
1650	}
1651
1652	/* Allocate pending descriptor array for batching writes. */
1653	txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries,
1654				M_SFXGE, M_ZERO | M_WAITOK);
1655
1656	/* Allocate and initialise mbuf DMA mapping array. */
1657	txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * sc->txq_entries,
1658	    M_SFXGE, M_ZERO | M_WAITOK);
1659	for (nmaps = 0; nmaps < sc->txq_entries; nmaps++) {
1660		rc = bus_dmamap_create(txq->packet_dma_tag, 0,
1661				       &txq->stmp[nmaps].map);
1662		if (rc != 0)
1663			goto fail2;
1664	}
1665
1666	snprintf(name, sizeof(name), "%u", txq_index);
1667	txq_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->txqs_node),
1668				   OID_AUTO, name, CTLFLAG_RD, NULL, "");
1669	if (txq_node == NULL) {
1670		rc = ENOMEM;
1671		goto fail_txq_node;
1672	}
1673
1674	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM &&
1675	    (rc = tso_init(txq)) != 0)
1676		goto fail3;
1677
1678	if (sfxge_tx_dpl_get_max <= 0) {
1679		log(LOG_ERR, "%s=%d must be greater than 0",
1680		    SFXGE_PARAM_TX_DPL_GET_MAX, sfxge_tx_dpl_get_max);
1681		rc = EINVAL;
1682		goto fail_tx_dpl_get_max;
1683	}
1684	if (sfxge_tx_dpl_get_non_tcp_max <= 0) {
1685		log(LOG_ERR, "%s=%d must be greater than 0",
1686		    SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX,
1687		    sfxge_tx_dpl_get_non_tcp_max);
1688		rc = EINVAL;
1689		goto fail_tx_dpl_get_max;
1690	}
1691	if (sfxge_tx_dpl_put_max < 0) {
1692		log(LOG_ERR, "%s=%d must be greater or equal to 0",
1693		    SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max);
1694		rc = EINVAL;
1695		goto fail_tx_dpl_put_max;
1696	}
1697
1698	/* Initialize the deferred packet list. */
1699	stdp = &txq->dpl;
1700	stdp->std_put_max = sfxge_tx_dpl_put_max;
1701	stdp->std_get_max = sfxge_tx_dpl_get_max;
1702	stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max;
1703	stdp->std_getp = &stdp->std_get;
1704
1705	SFXGE_TXQ_LOCK_INIT(txq, device_get_nameunit(sc->dev), txq_index);
1706
1707	dpl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1708				   "dpl", CTLFLAG_RD, NULL,
1709				   "Deferred packet list statistics");
1710	if (dpl_node == NULL) {
1711		rc = ENOMEM;
1712		goto fail_dpl_node;
1713	}
1714
1715	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1716			"get_count", CTLFLAG_RD | CTLFLAG_STATS,
1717			&stdp->std_get_count, 0, "");
1718	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1719			"get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS,
1720			&stdp->std_get_non_tcp_count, 0, "");
1721	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1722			"get_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1723			&stdp->std_get_hiwat, 0, "");
1724	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1725			"put_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1726			&stdp->std_put_hiwat, 0, "");
1727
1728	rc = sfxge_txq_stat_init(txq, txq_node);
1729	if (rc != 0)
1730		goto fail_txq_stat_init;
1731
1732	txq->type = type;
1733	txq->evq_index = evq_index;
1734	txq->txq_index = txq_index;
1735	txq->init_state = SFXGE_TXQ_INITIALIZED;
1736	txq->hw_vlan_tci = 0;
1737
1738	txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, type);
1739
1740	return (0);
1741
1742fail_txq_stat_init:
1743fail_dpl_node:
1744fail_tx_dpl_put_max:
1745fail_tx_dpl_get_max:
1746fail3:
1747fail_txq_node:
1748	free(txq->pend_desc, M_SFXGE);
1749fail2:
1750	while (nmaps-- != 0)
1751		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1752	free(txq->stmp, M_SFXGE);
1753	bus_dma_tag_destroy(txq->packet_dma_tag);
1754
1755fail:
1756	sfxge_dma_free(esmp);
1757
1758	return (rc);
1759}
1760
1761static int
1762sfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS)
1763{
1764	struct sfxge_softc *sc = arg1;
1765	unsigned int id = arg2;
1766	unsigned long sum;
1767	unsigned int index;
1768
1769	/* Sum across all TX queues */
1770	sum = 0;
1771	for (index = 0; index < sc->txq_count; index++)
1772		sum += *(unsigned long *)((caddr_t)sc->txq[index] +
1773					  sfxge_tx_stats[id].offset);
1774
1775	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1776}
1777
1778static void
1779sfxge_tx_stat_init(struct sfxge_softc *sc)
1780{
1781	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1782	struct sysctl_oid_list *stat_list;
1783	unsigned int id;
1784
1785	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1786
1787	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1788		SYSCTL_ADD_PROC(
1789			ctx, stat_list,
1790			OID_AUTO, sfxge_tx_stats[id].name,
1791			CTLTYPE_ULONG|CTLFLAG_RD,
1792			sc, id, sfxge_tx_stat_handler, "LU",
1793			"");
1794	}
1795}
1796
1797uint64_t
1798sfxge_tx_get_drops(struct sfxge_softc *sc)
1799{
1800	unsigned int index;
1801	uint64_t drops = 0;
1802	struct sfxge_txq *txq;
1803
1804	/* Sum across all TX queues */
1805	for (index = 0; index < sc->txq_count; index++) {
1806		txq = sc->txq[index];
1807		/*
1808		 * In theory, txq->put_overflow and txq->netdown_drops
1809		 * should use atomic operation and other should be
1810		 * obtained under txq lock, but it is just statistics.
1811		 */
1812		drops += txq->drops + txq->get_overflow +
1813			 txq->get_non_tcp_overflow +
1814			 txq->put_overflow + txq->netdown_drops +
1815			 txq->tso_pdrop_too_many + txq->tso_pdrop_no_rsrc;
1816	}
1817	return (drops);
1818}
1819
1820void
1821sfxge_tx_fini(struct sfxge_softc *sc)
1822{
1823	int index;
1824
1825	index = sc->txq_count;
1826	while (--index >= 0)
1827		sfxge_tx_qfini(sc, index);
1828
1829	sc->txq_count = 0;
1830}
1831
1832
1833int
1834sfxge_tx_init(struct sfxge_softc *sc)
1835{
1836	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
1837	struct sfxge_intr *intr;
1838	int index;
1839	int rc;
1840
1841	intr = &sc->intr;
1842
1843	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1844	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1845
1846	sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc;
1847
1848	sc->tso_fw_assisted = sfxge_tso_fw_assisted;
1849	if (sc->tso_fw_assisted)
1850		sc->tso_fw_assisted =
1851		    (encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) &&
1852		    (encp->enc_fw_assisted_tso_enabled);
1853
1854	sc->txqs_node = SYSCTL_ADD_NODE(
1855		device_get_sysctl_ctx(sc->dev),
1856		SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1857		OID_AUTO, "txq", CTLFLAG_RD, NULL, "Tx queues");
1858	if (sc->txqs_node == NULL) {
1859		rc = ENOMEM;
1860		goto fail_txq_node;
1861	}
1862
1863	/* Initialize the transmit queues */
1864	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM,
1865	    SFXGE_TXQ_NON_CKSUM, 0)) != 0)
1866		goto fail;
1867
1868	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM,
1869	    SFXGE_TXQ_IP_CKSUM, 0)) != 0)
1870		goto fail2;
1871
1872	for (index = 0;
1873	     index < sc->txq_count - SFXGE_TXQ_NTYPES + 1;
1874	     index++) {
1875		if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NTYPES - 1 + index,
1876		    SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0)
1877			goto fail3;
1878	}
1879
1880	sfxge_tx_stat_init(sc);
1881
1882	return (0);
1883
1884fail3:
1885	while (--index >= 0)
1886		sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index);
1887
1888	sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM);
1889
1890fail2:
1891	sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM);
1892
1893fail:
1894fail_txq_node:
1895	sc->txq_count = 0;
1896	return (rc);
1897}
1898