sfxge_tx.c revision 342451
1/*-
2 * Copyright (c) 2010-2016 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * This software was developed in part by Philip Paeps under contract for
6 * Solarflare Communications, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 *    this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * The views and conclusions contained in the software and documentation are
30 * those of the authors and should not be interpreted as representing official
31 * policies, either expressed or implied, of the FreeBSD Project.
32 */
33
34/* Theory of operation:
35 *
36 * Tx queues allocation and mapping
37 *
38 * One Tx queue with enabled checksum offload is allocated per Rx channel
39 * (event queue).  Also 2 Tx queues (one without checksum offload and one
40 * with IP checksum offload only) are allocated and bound to event queue 0.
41 * sfxge_txq_type is used as Tx queue label.
42 *
43 * So, event queue plus label mapping to Tx queue index is:
44 *	if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES)
45 *	else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1
46 * See sfxge_get_txq_by_label() sfxge_ev.c
47 */
48
49#include <sys/cdefs.h>
50__FBSDID("$FreeBSD: stable/11/sys/dev/sfxge/sfxge_tx.c 342451 2018-12-25 07:33:45Z arybchik $");
51
52#include "opt_rss.h"
53
54#include <sys/param.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/smp.h>
58#include <sys/socket.h>
59#include <sys/sysctl.h>
60#include <sys/syslog.h>
61#include <sys/limits.h>
62
63#include <net/bpf.h>
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in.h>
69#include <netinet/ip.h>
70#include <netinet/ip6.h>
71#include <netinet/tcp.h>
72
73#ifdef RSS
74#include <net/rss_config.h>
75#endif
76
77#include "common/efx.h"
78
79#include "sfxge.h"
80#include "sfxge_tx.h"
81
82
83#define	SFXGE_PARAM_TX_DPL_GET_MAX	SFXGE_PARAM(tx_dpl_get_max)
84static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
85TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max);
86SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN,
87	   &sfxge_tx_dpl_get_max, 0,
88	   "Maximum number of any packets in deferred packet get-list");
89
90#define	SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \
91	SFXGE_PARAM(tx_dpl_get_non_tcp_max)
92static int sfxge_tx_dpl_get_non_tcp_max =
93	SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT;
94TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max);
95SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN,
96	   &sfxge_tx_dpl_get_non_tcp_max, 0,
97	   "Maximum number of non-TCP packets in deferred packet get-list");
98
99#define	SFXGE_PARAM_TX_DPL_PUT_MAX	SFXGE_PARAM(tx_dpl_put_max)
100static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT;
101TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max);
102SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
103	   &sfxge_tx_dpl_put_max, 0,
104	   "Maximum number of any packets in deferred packet put-list");
105
106#define	SFXGE_PARAM_TSO_FW_ASSISTED	SFXGE_PARAM(tso_fw_assisted)
107static int sfxge_tso_fw_assisted = (SFXGE_FATSOV1 | SFXGE_FATSOV2);
108TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted);
109SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN,
110	   &sfxge_tso_fw_assisted, 0,
111	   "Bitmask of FW-assisted TSO allowed to use if supported by NIC firmware");
112
113
114static const struct {
115	const char *name;
116	size_t offset;
117} sfxge_tx_stats[] = {
118#define	SFXGE_TX_STAT(name, member) \
119	{ #name, offsetof(struct sfxge_txq, member) }
120	SFXGE_TX_STAT(tso_bursts, tso_bursts),
121	SFXGE_TX_STAT(tso_packets, tso_packets),
122	SFXGE_TX_STAT(tso_long_headers, tso_long_headers),
123	SFXGE_TX_STAT(tso_pdrop_too_many, tso_pdrop_too_many),
124	SFXGE_TX_STAT(tso_pdrop_no_rsrc, tso_pdrop_no_rsrc),
125	SFXGE_TX_STAT(tx_collapses, collapses),
126	SFXGE_TX_STAT(tx_drops, drops),
127	SFXGE_TX_STAT(tx_get_overflow, get_overflow),
128	SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow),
129	SFXGE_TX_STAT(tx_put_overflow, put_overflow),
130	SFXGE_TX_STAT(tx_netdown_drops, netdown_drops),
131};
132
133
134/* Forward declarations. */
135static void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
136static void sfxge_tx_qlist_post(struct sfxge_txq *txq);
137static void sfxge_tx_qunblock(struct sfxge_txq *txq);
138static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
139			      const bus_dma_segment_t *dma_seg, int n_dma_seg,
140			      int vlan_tagged);
141
142static int
143sfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf)
144{
145	uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ?
146			     mbuf->m_pkthdr.ether_vtag :
147			     0);
148
149	if (this_tag == txq->hw_vlan_tci)
150		return (0);
151
152	efx_tx_qdesc_vlantci_create(txq->common,
153				    bswap16(this_tag),
154				    &txq->pend_desc[0]);
155	txq->n_pend_desc = 1;
156	txq->hw_vlan_tci = this_tag;
157	return (1);
158}
159
160static inline void
161sfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp)
162{
163	KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0"));
164	if (__predict_false(*pstmp ==
165			    &txq->stmp[txq->ptr_mask]))
166		*pstmp = &txq->stmp[0];
167	else
168		(*pstmp)++;
169}
170
171
172void
173sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
174{
175	unsigned int completed;
176
177	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
178
179	completed = txq->completed;
180	while (completed != txq->pending) {
181		struct sfxge_tx_mapping *stmp;
182		unsigned int id;
183
184		id = completed++ & txq->ptr_mask;
185
186		stmp = &txq->stmp[id];
187		if (stmp->flags & TX_BUF_UNMAP) {
188			bus_dmamap_unload(txq->packet_dma_tag, stmp->map);
189			if (stmp->flags & TX_BUF_MBUF) {
190				struct mbuf *m = stmp->u.mbuf;
191				do
192					m = m_free(m);
193				while (m != NULL);
194			} else {
195				free(stmp->u.heap_buf, M_SFXGE);
196			}
197			stmp->flags = 0;
198		}
199	}
200	txq->completed = completed;
201
202	/* Check whether we need to unblock the queue. */
203	mb();
204	if (txq->blocked) {
205		unsigned int level;
206
207		level = txq->added - txq->completed;
208		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries))
209			sfxge_tx_qunblock(txq);
210	}
211}
212
213static unsigned int
214sfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
215{
216	/* Absence of TCP checksum flags does not mean that it is non-TCP
217	 * but it should be true if user wants to achieve high throughput.
218	 */
219	return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)));
220}
221
222/*
223 * Reorder the put list and append it to the get list.
224 */
225static void
226sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
227{
228	struct sfxge_tx_dpl *stdp;
229	struct mbuf *mbuf, *get_next, **get_tailp;
230	volatile uintptr_t *putp;
231	uintptr_t put;
232	unsigned int count;
233	unsigned int non_tcp_count;
234
235	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
236
237	stdp = &txq->dpl;
238
239	/* Acquire the put list. */
240	putp = &stdp->std_put;
241	put = atomic_readandclear_ptr(putp);
242	mbuf = (void *)put;
243
244	if (mbuf == NULL)
245		return;
246
247	/* Reverse the put list. */
248	get_tailp = &mbuf->m_nextpkt;
249	get_next = NULL;
250
251	count = 0;
252	non_tcp_count = 0;
253	do {
254		struct mbuf *put_next;
255
256		non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf);
257		put_next = mbuf->m_nextpkt;
258		mbuf->m_nextpkt = get_next;
259		get_next = mbuf;
260		mbuf = put_next;
261
262		count++;
263	} while (mbuf != NULL);
264
265	if (count > stdp->std_put_hiwat)
266		stdp->std_put_hiwat = count;
267
268	/* Append the reversed put list to the get list. */
269	KASSERT(*get_tailp == NULL, ("*get_tailp != NULL"));
270	*stdp->std_getp = get_next;
271	stdp->std_getp = get_tailp;
272	stdp->std_get_count += count;
273	stdp->std_get_non_tcp_count += non_tcp_count;
274}
275
276static void
277sfxge_tx_qreap(struct sfxge_txq *txq)
278{
279	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
280
281	txq->reaped = txq->completed;
282}
283
284static void
285sfxge_tx_qlist_post(struct sfxge_txq *txq)
286{
287	unsigned int old_added;
288	unsigned int block_level;
289	unsigned int level;
290	int rc;
291
292	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
293
294	KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0"));
295	KASSERT(txq->n_pend_desc <= txq->max_pkt_desc,
296		("txq->n_pend_desc too large"));
297	KASSERT(!txq->blocked, ("txq->blocked"));
298
299	old_added = txq->added;
300
301	/* Post the fragment list. */
302	rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc,
303			  txq->reaped, &txq->added);
304	KASSERT(rc == 0, ("efx_tx_qdesc_post() failed"));
305
306	/* If efx_tx_qdesc_post() had to refragment, our information about
307	 * buffers to free may be associated with the wrong
308	 * descriptors.
309	 */
310	KASSERT(txq->added - old_added == txq->n_pend_desc,
311		("efx_tx_qdesc_post() refragmented descriptors"));
312
313	level = txq->added - txq->reaped;
314	KASSERT(level <= txq->entries, ("overfilled TX queue"));
315
316	/* Clear the fragment list. */
317	txq->n_pend_desc = 0;
318
319	/*
320	 * Set the block level to ensure there is space to generate a
321	 * large number of descriptors for TSO.
322	 */
323	block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc;
324
325	/* Have we reached the block level? */
326	if (level < block_level)
327		return;
328
329	/* Reap, and check again */
330	sfxge_tx_qreap(txq);
331	level = txq->added - txq->reaped;
332	if (level < block_level)
333		return;
334
335	txq->blocked = 1;
336
337	/*
338	 * Avoid a race with completion interrupt handling that could leave
339	 * the queue blocked.
340	 */
341	mb();
342	sfxge_tx_qreap(txq);
343	level = txq->added - txq->reaped;
344	if (level < block_level) {
345		mb();
346		txq->blocked = 0;
347	}
348}
349
350static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
351{
352	bus_dmamap_t *used_map;
353	bus_dmamap_t map;
354	bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG];
355	unsigned int id;
356	struct sfxge_tx_mapping *stmp;
357	efx_desc_t *desc;
358	int n_dma_seg;
359	int rc;
360	int i;
361	int eop;
362	uint16_t hw_vlan_tci_prev;
363	int vlan_tagged;
364
365	KASSERT(!txq->blocked, ("txq->blocked"));
366
367#if SFXGE_TX_PARSE_EARLY
368	/*
369	 * If software TSO is used, we still need to copy packet header,
370	 * even if we have already parsed it early before enqueue.
371	 */
372	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) &&
373	    (txq->tso_fw_assisted == 0))
374		prefetch_read_many(mbuf->m_data);
375#else
376	/*
377	 * Prefetch packet header since we need to parse it and extract
378	 * IP ID, TCP sequence number and flags.
379	 */
380	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO)
381		prefetch_read_many(mbuf->m_data);
382#endif
383
384	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) {
385		rc = EINTR;
386		goto reject;
387	}
388
389	/* Load the packet for DMA. */
390	id = txq->added & txq->ptr_mask;
391	stmp = &txq->stmp[id];
392	rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map,
393				     mbuf, dma_seg, &n_dma_seg, 0);
394	if (rc == EFBIG) {
395		/* Try again. */
396		struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT,
397						   SFXGE_TX_MAPPING_MAX_SEG);
398		if (new_mbuf == NULL)
399			goto reject;
400		++txq->collapses;
401		mbuf = new_mbuf;
402		rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag,
403					     stmp->map, mbuf,
404					     dma_seg, &n_dma_seg, 0);
405	}
406	if (rc != 0)
407		goto reject;
408
409	/* Make the packet visible to the hardware. */
410	bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE);
411
412	used_map = &stmp->map;
413
414	hw_vlan_tci_prev = txq->hw_vlan_tci;
415
416	vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf);
417	if (vlan_tagged) {
418		sfxge_next_stmp(txq, &stmp);
419	}
420	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
421		rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged);
422		if (rc < 0)
423			goto reject_mapped;
424		stmp = &txq->stmp[(rc - 1) & txq->ptr_mask];
425	} else {
426		/* Add the mapping to the fragment list, and set flags
427		 * for the buffer.
428		 */
429
430		i = 0;
431		for (;;) {
432			desc = &txq->pend_desc[i + vlan_tagged];
433			eop = (i == n_dma_seg - 1);
434			efx_tx_qdesc_dma_create(txq->common,
435						dma_seg[i].ds_addr,
436						dma_seg[i].ds_len,
437						eop,
438						desc);
439			if (eop)
440				break;
441			i++;
442			sfxge_next_stmp(txq, &stmp);
443		}
444		txq->n_pend_desc = n_dma_seg + vlan_tagged;
445	}
446
447	/*
448	 * If the mapping required more than one descriptor
449	 * then we need to associate the DMA map with the last
450	 * descriptor, not the first.
451	 */
452	if (used_map != &stmp->map) {
453		map = stmp->map;
454		stmp->map = *used_map;
455		*used_map = map;
456	}
457
458	stmp->u.mbuf = mbuf;
459	stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF;
460
461	/* Post the fragment list. */
462	sfxge_tx_qlist_post(txq);
463
464	return (0);
465
466reject_mapped:
467	txq->hw_vlan_tci = hw_vlan_tci_prev;
468	bus_dmamap_unload(txq->packet_dma_tag, *used_map);
469reject:
470	/* Drop the packet on the floor. */
471	m_freem(mbuf);
472	++txq->drops;
473
474	return (rc);
475}
476
477/*
478 * Drain the deferred packet list into the transmit queue.
479 */
480static void
481sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
482{
483	struct sfxge_softc *sc;
484	struct sfxge_tx_dpl *stdp;
485	struct mbuf *mbuf, *next;
486	unsigned int count;
487	unsigned int non_tcp_count;
488	unsigned int pushed;
489	int rc;
490
491	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
492
493	sc = txq->sc;
494	stdp = &txq->dpl;
495	pushed = txq->added;
496
497	if (__predict_true(txq->init_state == SFXGE_TXQ_STARTED)) {
498		prefetch_read_many(sc->enp);
499		prefetch_read_many(txq->common);
500	}
501
502	mbuf = stdp->std_get;
503	count = stdp->std_get_count;
504	non_tcp_count = stdp->std_get_non_tcp_count;
505
506	if (count > stdp->std_get_hiwat)
507		stdp->std_get_hiwat = count;
508
509	while (count != 0) {
510		KASSERT(mbuf != NULL, ("mbuf == NULL"));
511
512		next = mbuf->m_nextpkt;
513		mbuf->m_nextpkt = NULL;
514
515		ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */
516
517		if (next != NULL)
518			prefetch_read_many(next);
519
520		rc = sfxge_tx_queue_mbuf(txq, mbuf);
521		--count;
522		non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf);
523		mbuf = next;
524		if (rc != 0)
525			continue;
526
527		if (txq->blocked)
528			break;
529
530		/* Push the fragments to the hardware in batches. */
531		if (txq->added - pushed >= SFXGE_TX_BATCH) {
532			efx_tx_qpush(txq->common, txq->added, pushed);
533			pushed = txq->added;
534		}
535	}
536
537	if (count == 0) {
538		KASSERT(mbuf == NULL, ("mbuf != NULL"));
539		KASSERT(non_tcp_count == 0,
540			("inconsistent TCP/non-TCP detection"));
541		stdp->std_get = NULL;
542		stdp->std_get_count = 0;
543		stdp->std_get_non_tcp_count = 0;
544		stdp->std_getp = &stdp->std_get;
545	} else {
546		stdp->std_get = mbuf;
547		stdp->std_get_count = count;
548		stdp->std_get_non_tcp_count = non_tcp_count;
549	}
550
551	if (txq->added != pushed)
552		efx_tx_qpush(txq->common, txq->added, pushed);
553
554	KASSERT(txq->blocked || stdp->std_get_count == 0,
555		("queue unblocked but count is non-zero"));
556}
557
558#define	SFXGE_TX_QDPL_PENDING(_txq)	((_txq)->dpl.std_put != 0)
559
560/*
561 * Service the deferred packet list.
562 *
563 * NOTE: drops the txq mutex!
564 */
565static void
566sfxge_tx_qdpl_service(struct sfxge_txq *txq)
567{
568	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
569
570	do {
571		if (SFXGE_TX_QDPL_PENDING(txq))
572			sfxge_tx_qdpl_swizzle(txq);
573
574		if (!txq->blocked)
575			sfxge_tx_qdpl_drain(txq);
576
577		SFXGE_TXQ_UNLOCK(txq);
578	} while (SFXGE_TX_QDPL_PENDING(txq) &&
579		 SFXGE_TXQ_TRYLOCK(txq));
580}
581
582/*
583 * Put a packet on the deferred packet get-list.
584 */
585static int
586sfxge_tx_qdpl_put_locked(struct sfxge_txq *txq, struct mbuf *mbuf)
587{
588	struct sfxge_tx_dpl *stdp;
589
590	stdp = &txq->dpl;
591
592	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
593
594	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
595
596	if (stdp->std_get_count >= stdp->std_get_max) {
597		txq->get_overflow++;
598		return (ENOBUFS);
599	}
600	if (sfxge_is_mbuf_non_tcp(mbuf)) {
601		if (stdp->std_get_non_tcp_count >=
602		    stdp->std_get_non_tcp_max) {
603			txq->get_non_tcp_overflow++;
604			return (ENOBUFS);
605		}
606		stdp->std_get_non_tcp_count++;
607	}
608
609	*(stdp->std_getp) = mbuf;
610	stdp->std_getp = &mbuf->m_nextpkt;
611	stdp->std_get_count++;
612
613	return (0);
614}
615
616/*
617 * Put a packet on the deferred packet put-list.
618 *
619 * We overload the csum_data field in the mbuf to keep track of this length
620 * because there is no cheap alternative to avoid races.
621 */
622static int
623sfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf)
624{
625	struct sfxge_tx_dpl *stdp;
626	volatile uintptr_t *putp;
627	uintptr_t old;
628	uintptr_t new;
629	unsigned int put_count;
630
631	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
632
633	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
634
635	stdp = &txq->dpl;
636	putp = &stdp->std_put;
637	new = (uintptr_t)mbuf;
638
639	do {
640		old = *putp;
641		if (old != 0) {
642			struct mbuf *mp = (struct mbuf *)old;
643			put_count = mp->m_pkthdr.csum_data;
644		} else
645			put_count = 0;
646		if (put_count >= stdp->std_put_max) {
647			atomic_add_long(&txq->put_overflow, 1);
648			return (ENOBUFS);
649		}
650		mbuf->m_pkthdr.csum_data = put_count + 1;
651		mbuf->m_nextpkt = (void *)old;
652	} while (atomic_cmpset_ptr(putp, old, new) == 0);
653
654	return (0);
655}
656
657/*
658 * Called from if_transmit - will try to grab the txq lock and enqueue to the
659 * put list if it succeeds, otherwise try to push onto the defer list if space.
660 */
661static int
662sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
663{
664	int rc;
665
666	if (!SFXGE_LINK_UP(txq->sc)) {
667		atomic_add_long(&txq->netdown_drops, 1);
668		return (ENETDOWN);
669	}
670
671	/*
672	 * Try to grab the txq lock.  If we are able to get the lock,
673	 * the packet will be appended to the "get list" of the deferred
674	 * packet list.  Otherwise, it will be pushed on the "put list".
675	 */
676	if (SFXGE_TXQ_TRYLOCK(txq)) {
677		/* First swizzle put-list to get-list to keep order */
678		sfxge_tx_qdpl_swizzle(txq);
679
680		rc = sfxge_tx_qdpl_put_locked(txq, m);
681
682		/* Try to service the list. */
683		sfxge_tx_qdpl_service(txq);
684		/* Lock has been dropped. */
685	} else {
686		rc = sfxge_tx_qdpl_put_unlocked(txq, m);
687
688		/*
689		 * Try to grab the lock again.
690		 *
691		 * If we are able to get the lock, we need to process
692		 * the deferred packet list.  If we are not able to get
693		 * the lock, another thread is processing the list.
694		 */
695		if ((rc == 0) && SFXGE_TXQ_TRYLOCK(txq)) {
696			sfxge_tx_qdpl_service(txq);
697			/* Lock has been dropped. */
698		}
699	}
700
701	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
702
703	return (rc);
704}
705
706static void
707sfxge_tx_qdpl_flush(struct sfxge_txq *txq)
708{
709	struct sfxge_tx_dpl *stdp = &txq->dpl;
710	struct mbuf *mbuf, *next;
711
712	SFXGE_TXQ_LOCK(txq);
713
714	sfxge_tx_qdpl_swizzle(txq);
715	for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) {
716		next = mbuf->m_nextpkt;
717		m_freem(mbuf);
718	}
719	stdp->std_get = NULL;
720	stdp->std_get_count = 0;
721	stdp->std_get_non_tcp_count = 0;
722	stdp->std_getp = &stdp->std_get;
723
724	SFXGE_TXQ_UNLOCK(txq);
725}
726
727void
728sfxge_if_qflush(struct ifnet *ifp)
729{
730	struct sfxge_softc *sc;
731	unsigned int i;
732
733	sc = ifp->if_softc;
734
735	for (i = 0; i < sc->txq_count; i++)
736		sfxge_tx_qdpl_flush(sc->txq[i]);
737}
738
739#if SFXGE_TX_PARSE_EARLY
740
741/* There is little space for user data in mbuf pkthdr, so we
742 * use l*hlen fields which are not used by the driver otherwise
743 * to store header offsets.
744 * The fields are 8-bit, but it's ok, no header may be longer than 255 bytes.
745 */
746
747
748#define TSO_MBUF_PROTO(_mbuf)    ((_mbuf)->m_pkthdr.PH_loc.sixteen[0])
749/* We abuse l5hlen here because PH_loc can hold only 64 bits of data */
750#define TSO_MBUF_FLAGS(_mbuf)    ((_mbuf)->m_pkthdr.l5hlen)
751#define TSO_MBUF_PACKETID(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1])
752#define TSO_MBUF_SEQNUM(_mbuf)   ((_mbuf)->m_pkthdr.PH_loc.thirtytwo[1])
753
754static void sfxge_parse_tx_packet(struct mbuf *mbuf)
755{
756	struct ether_header *eh = mtod(mbuf, struct ether_header *);
757	const struct tcphdr *th;
758	struct tcphdr th_copy;
759
760	/* Find network protocol and header */
761	TSO_MBUF_PROTO(mbuf) = eh->ether_type;
762	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_VLAN)) {
763		struct ether_vlan_header *veh =
764			mtod(mbuf, struct ether_vlan_header *);
765		TSO_MBUF_PROTO(mbuf) = veh->evl_proto;
766		mbuf->m_pkthdr.l2hlen = sizeof(*veh);
767	} else {
768		mbuf->m_pkthdr.l2hlen = sizeof(*eh);
769	}
770
771	/* Find TCP header */
772	if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IP)) {
773		const struct ip *iph = (const struct ip *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen);
774
775		KASSERT(iph->ip_p == IPPROTO_TCP,
776			("TSO required on non-TCP packet"));
777		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + 4 * iph->ip_hl;
778		TSO_MBUF_PACKETID(mbuf) = iph->ip_id;
779	} else {
780		KASSERT(TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IPV6),
781			("TSO required on non-IP packet"));
782		KASSERT(((const struct ip6_hdr *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen))->ip6_nxt ==
783			IPPROTO_TCP,
784			("TSO required on non-TCP packet"));
785		mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + sizeof(struct ip6_hdr);
786		TSO_MBUF_PACKETID(mbuf) = 0;
787	}
788
789	KASSERT(mbuf->m_len >= mbuf->m_pkthdr.l3hlen,
790		("network header is fragmented in mbuf"));
791
792	/* We need TCP header including flags (window is the next) */
793	if (mbuf->m_len < mbuf->m_pkthdr.l3hlen + offsetof(struct tcphdr, th_win)) {
794		m_copydata(mbuf, mbuf->m_pkthdr.l3hlen, sizeof(th_copy),
795			   (caddr_t)&th_copy);
796		th = &th_copy;
797	} else {
798		th = (const struct tcphdr *)mtodo(mbuf, mbuf->m_pkthdr.l3hlen);
799	}
800
801	mbuf->m_pkthdr.l4hlen = mbuf->m_pkthdr.l3hlen + 4 * th->th_off;
802	TSO_MBUF_SEQNUM(mbuf) = ntohl(th->th_seq);
803
804	/* These flags must not be duplicated */
805	/*
806	 * RST should not be duplicated as well, but FreeBSD kernel
807	 * generates TSO packets with RST flag. So, do not assert
808	 * its absence.
809	 */
810	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
811		("incompatible TCP flag 0x%x on TSO packet",
812		 th->th_flags & (TH_URG | TH_SYN)));
813	TSO_MBUF_FLAGS(mbuf) = th->th_flags;
814}
815#endif
816
817/*
818 * TX start -- called by the stack.
819 */
820int
821sfxge_if_transmit(struct ifnet *ifp, struct mbuf *m)
822{
823	struct sfxge_softc *sc;
824	struct sfxge_txq *txq;
825	int rc;
826
827	sc = (struct sfxge_softc *)ifp->if_softc;
828
829	/*
830	 * Transmit may be called when interface is up from the kernel
831	 * point of view, but not yet up (in progress) from the driver
832	 * point of view. I.e. link aggregation bring up.
833	 * Transmit may be called when interface is up from the driver
834	 * point of view, but already down from the kernel point of
835	 * view. I.e. Rx when interface shutdown is in progress.
836	 */
837	KASSERT((ifp->if_flags & IFF_UP) || (sc->if_flags & IFF_UP),
838		("interface not up"));
839
840	/* Pick the desired transmit queue. */
841	if (m->m_pkthdr.csum_flags &
842	    (CSUM_DELAY_DATA | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO)) {
843		int index = 0;
844
845#ifdef RSS
846		uint32_t bucket_id;
847
848		/*
849		 * Select a TX queue which matches the corresponding
850		 * RX queue for the hash in order to assign both
851		 * TX and RX parts of the flow to the same CPU
852		 */
853		if (rss_m2bucket(m, &bucket_id) == 0)
854			index = bucket_id % (sc->txq_count - (SFXGE_TXQ_NTYPES - 1));
855#else
856		/* check if flowid is set */
857		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
858			uint32_t hash = m->m_pkthdr.flowid;
859			uint32_t idx = hash % nitems(sc->rx_indir_table);
860
861			index = sc->rx_indir_table[idx];
862		}
863#endif
864#if SFXGE_TX_PARSE_EARLY
865		if (m->m_pkthdr.csum_flags & CSUM_TSO)
866			sfxge_parse_tx_packet(m);
867#endif
868		txq = sc->txq[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index];
869	} else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) {
870		txq = sc->txq[SFXGE_TXQ_IP_CKSUM];
871	} else {
872		txq = sc->txq[SFXGE_TXQ_NON_CKSUM];
873	}
874
875	rc = sfxge_tx_packet_add(txq, m);
876	if (rc != 0)
877		m_freem(m);
878
879	return (rc);
880}
881
882/*
883 * Software "TSO".  Not quite as good as doing it in hardware, but
884 * still faster than segmenting in the stack.
885 */
886
887struct sfxge_tso_state {
888	/* Output position */
889	unsigned out_len;	/* Remaining length in current segment */
890	unsigned seqnum;	/* Current sequence number */
891	unsigned packet_space;	/* Remaining space in current packet */
892	unsigned segs_space;	/* Remaining number of DMA segments
893				   for the packet (FATSOv2 only) */
894
895	/* Input position */
896	uint64_t dma_addr;	/* DMA address of current position */
897	unsigned in_len;	/* Remaining length in current mbuf */
898
899	const struct mbuf *mbuf; /* Input mbuf (head of chain) */
900	u_short protocol;	/* Network protocol (after VLAN decap) */
901	ssize_t nh_off;		/* Offset of network header */
902	ssize_t tcph_off;	/* Offset of TCP header */
903	unsigned header_len;	/* Number of bytes of header */
904	unsigned seg_size;	/* TCP segment size */
905	int fw_assisted;	/* Use FW-assisted TSO */
906	u_short packet_id;	/* IPv4 packet ID from the original packet */
907	uint8_t tcp_flags;	/* TCP flags */
908	efx_desc_t header_desc; /* Precomputed header descriptor for
909				 * FW-assisted TSO */
910};
911
912#if !SFXGE_TX_PARSE_EARLY
913static const struct ip *tso_iph(const struct sfxge_tso_state *tso)
914{
915	KASSERT(tso->protocol == htons(ETHERTYPE_IP),
916		("tso_iph() in non-IPv4 state"));
917	return (const struct ip *)(tso->mbuf->m_data + tso->nh_off);
918}
919
920static __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
921{
922	KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
923		("tso_ip6h() in non-IPv6 state"));
924	return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off);
925}
926
927static const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
928{
929	return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off);
930}
931#endif
932
933
934/* Size of preallocated TSO header buffers.  Larger blocks must be
935 * allocated from the heap.
936 */
937#define	TSOH_STD_SIZE	128
938
939/* At most half the descriptors in the queue at any time will refer to
940 * a TSO header buffer, since they must always be followed by a
941 * payload descriptor referring to an mbuf.
942 */
943#define	TSOH_COUNT(_txq_entries)	((_txq_entries) / 2u)
944#define	TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
945#define	TSOH_PAGE_COUNT(_txq_entries)	\
946	howmany(TSOH_COUNT(_txq_entries), TSOH_PER_PAGE)
947
948static int tso_init(struct sfxge_txq *txq)
949{
950	struct sfxge_softc *sc = txq->sc;
951	unsigned int tsoh_page_count = TSOH_PAGE_COUNT(sc->txq_entries);
952	int i, rc;
953
954	/* Allocate TSO header buffers */
955	txq->tsoh_buffer = malloc(tsoh_page_count * sizeof(txq->tsoh_buffer[0]),
956				  M_SFXGE, M_WAITOK);
957
958	for (i = 0; i < tsoh_page_count; i++) {
959		rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]);
960		if (rc != 0)
961			goto fail;
962	}
963
964	return (0);
965
966fail:
967	while (i-- > 0)
968		sfxge_dma_free(&txq->tsoh_buffer[i]);
969	free(txq->tsoh_buffer, M_SFXGE);
970	txq->tsoh_buffer = NULL;
971	return (rc);
972}
973
974static void tso_fini(struct sfxge_txq *txq)
975{
976	int i;
977
978	if (txq->tsoh_buffer != NULL) {
979		for (i = 0; i < TSOH_PAGE_COUNT(txq->sc->txq_entries); i++)
980			sfxge_dma_free(&txq->tsoh_buffer[i]);
981		free(txq->tsoh_buffer, M_SFXGE);
982	}
983}
984
985static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso,
986		      const bus_dma_segment_t *hdr_dma_seg,
987		      struct mbuf *mbuf)
988{
989	const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp);
990#if !SFXGE_TX_PARSE_EARLY
991	struct ether_header *eh = mtod(mbuf, struct ether_header *);
992	const struct tcphdr *th;
993	struct tcphdr th_copy;
994#endif
995
996	tso->fw_assisted = txq->tso_fw_assisted;
997	tso->mbuf = mbuf;
998
999	/* Find network protocol and header */
1000#if !SFXGE_TX_PARSE_EARLY
1001	tso->protocol = eh->ether_type;
1002	if (tso->protocol == htons(ETHERTYPE_VLAN)) {
1003		struct ether_vlan_header *veh =
1004			mtod(mbuf, struct ether_vlan_header *);
1005		tso->protocol = veh->evl_proto;
1006		tso->nh_off = sizeof(*veh);
1007	} else {
1008		tso->nh_off = sizeof(*eh);
1009	}
1010#else
1011	tso->protocol = TSO_MBUF_PROTO(mbuf);
1012	tso->nh_off = mbuf->m_pkthdr.l2hlen;
1013	tso->tcph_off = mbuf->m_pkthdr.l3hlen;
1014	tso->packet_id = ntohs(TSO_MBUF_PACKETID(mbuf));
1015#endif
1016
1017#if !SFXGE_TX_PARSE_EARLY
1018	/* Find TCP header */
1019	if (tso->protocol == htons(ETHERTYPE_IP)) {
1020		KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP,
1021			("TSO required on non-TCP packet"));
1022		tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl;
1023		tso->packet_id = ntohs(tso_iph(tso)->ip_id);
1024	} else {
1025		KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
1026			("TSO required on non-IP packet"));
1027		KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP,
1028			("TSO required on non-TCP packet"));
1029		tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr);
1030		tso->packet_id = 0;
1031	}
1032#endif
1033
1034
1035	if (tso->fw_assisted &&
1036	    __predict_false(tso->tcph_off >
1037			    encp->enc_tx_tso_tcp_header_offset_limit)) {
1038		tso->fw_assisted = 0;
1039	}
1040
1041
1042#if !SFXGE_TX_PARSE_EARLY
1043	KASSERT(mbuf->m_len >= tso->tcph_off,
1044		("network header is fragmented in mbuf"));
1045	/* We need TCP header including flags (window is the next) */
1046	if (mbuf->m_len < tso->tcph_off + offsetof(struct tcphdr, th_win)) {
1047		m_copydata(tso->mbuf, tso->tcph_off, sizeof(th_copy),
1048			   (caddr_t)&th_copy);
1049		th = &th_copy;
1050	} else {
1051		th = tso_tcph(tso);
1052	}
1053	tso->header_len = tso->tcph_off + 4 * th->th_off;
1054#else
1055	tso->header_len = mbuf->m_pkthdr.l4hlen;
1056#endif
1057	tso->seg_size = mbuf->m_pkthdr.tso_segsz;
1058
1059#if !SFXGE_TX_PARSE_EARLY
1060	tso->seqnum = ntohl(th->th_seq);
1061
1062	/* These flags must not be duplicated */
1063	/*
1064	 * RST should not be duplicated as well, but FreeBSD kernel
1065	 * generates TSO packets with RST flag. So, do not assert
1066	 * its absence.
1067	 */
1068	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
1069		("incompatible TCP flag 0x%x on TSO packet",
1070		 th->th_flags & (TH_URG | TH_SYN)));
1071	tso->tcp_flags = th->th_flags;
1072#else
1073	tso->seqnum = TSO_MBUF_SEQNUM(mbuf);
1074	tso->tcp_flags = TSO_MBUF_FLAGS(mbuf);
1075#endif
1076
1077	tso->out_len = mbuf->m_pkthdr.len - tso->header_len;
1078
1079	if (tso->fw_assisted) {
1080		if (hdr_dma_seg->ds_len >= tso->header_len)
1081			efx_tx_qdesc_dma_create(txq->common,
1082						hdr_dma_seg->ds_addr,
1083						tso->header_len,
1084						B_FALSE,
1085						&tso->header_desc);
1086		else
1087			tso->fw_assisted = 0;
1088	}
1089}
1090
1091/*
1092 * tso_fill_packet_with_fragment - form descriptors for the current fragment
1093 *
1094 * Form descriptors for the current fragment, until we reach the end
1095 * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
1096 * space.
1097 */
1098static void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
1099					  struct sfxge_tso_state *tso)
1100{
1101	efx_desc_t *desc;
1102	int n;
1103	uint64_t dma_addr = tso->dma_addr;
1104	boolean_t eop;
1105
1106	if (tso->in_len == 0 || tso->packet_space == 0)
1107		return;
1108
1109	KASSERT(tso->in_len > 0, ("TSO input length went negative"));
1110	KASSERT(tso->packet_space > 0, ("TSO packet space went negative"));
1111
1112	if (tso->fw_assisted & SFXGE_FATSOV2) {
1113		n = tso->in_len;
1114		tso->out_len -= n;
1115		tso->seqnum += n;
1116		tso->in_len = 0;
1117		if (n < tso->packet_space) {
1118			tso->packet_space -= n;
1119			tso->segs_space--;
1120		} else {
1121			tso->packet_space = tso->seg_size -
1122			    (n - tso->packet_space) % tso->seg_size;
1123			tso->segs_space =
1124			    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1 -
1125			    (tso->packet_space != tso->seg_size);
1126		}
1127	} else {
1128		n = min(tso->in_len, tso->packet_space);
1129		tso->packet_space -= n;
1130		tso->out_len -= n;
1131		tso->dma_addr += n;
1132		tso->in_len -= n;
1133	}
1134
1135	/*
1136	 * It is OK to use binary OR below to avoid extra branching
1137	 * since all conditions may always be checked.
1138	 */
1139	eop = (tso->out_len == 0) | (tso->packet_space == 0) |
1140	    (tso->segs_space == 0);
1141
1142	desc = &txq->pend_desc[txq->n_pend_desc++];
1143	efx_tx_qdesc_dma_create(txq->common, dma_addr, n, eop, desc);
1144}
1145
1146/* Callback from bus_dmamap_load() for long TSO headers. */
1147static void tso_map_long_header(void *dma_addr_ret,
1148				bus_dma_segment_t *segs, int nseg,
1149				int error)
1150{
1151	*(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) &&
1152				      __predict_true(nseg == 1)) ?
1153				     segs->ds_addr : 0);
1154}
1155
1156/*
1157 * tso_start_new_packet - generate a new header and prepare for the new packet
1158 *
1159 * Generate a new header and prepare for the new packet.  Return 0 on
1160 * success, or an error code if failed to alloc header.
1161 */
1162static int tso_start_new_packet(struct sfxge_txq *txq,
1163				struct sfxge_tso_state *tso,
1164				unsigned int *idp)
1165{
1166	unsigned int id = *idp;
1167	struct tcphdr *tsoh_th;
1168	unsigned ip_length;
1169	caddr_t header;
1170	uint64_t dma_addr;
1171	bus_dmamap_t map;
1172	efx_desc_t *desc;
1173	int rc;
1174
1175	if (tso->fw_assisted) {
1176		if (tso->fw_assisted & SFXGE_FATSOV2) {
1177			/* Add 2 FATSOv2 option descriptors */
1178			desc = &txq->pend_desc[txq->n_pend_desc];
1179			efx_tx_qdesc_tso2_create(txq->common,
1180						 tso->packet_id,
1181						 tso->seqnum,
1182						 tso->seg_size,
1183						 desc,
1184						 EFX_TX_FATSOV2_OPT_NDESCS);
1185			desc += EFX_TX_FATSOV2_OPT_NDESCS;
1186			txq->n_pend_desc += EFX_TX_FATSOV2_OPT_NDESCS;
1187			KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1188			id = (id + EFX_TX_FATSOV2_OPT_NDESCS) & txq->ptr_mask;
1189
1190			tso->segs_space =
1191			    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1;
1192		} else {
1193			uint8_t tcp_flags = tso->tcp_flags;
1194
1195			if (tso->out_len > tso->seg_size)
1196				tcp_flags &= ~(TH_FIN | TH_PUSH);
1197
1198			/* Add FATSOv1 option descriptor */
1199			desc = &txq->pend_desc[txq->n_pend_desc++];
1200			efx_tx_qdesc_tso_create(txq->common,
1201						tso->packet_id,
1202						tso->seqnum,
1203						tcp_flags,
1204						desc++);
1205			KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1206			id = (id + 1) & txq->ptr_mask;
1207
1208			tso->seqnum += tso->seg_size;
1209			tso->segs_space = UINT_MAX;
1210		}
1211
1212		/* Header DMA descriptor */
1213		*desc = tso->header_desc;
1214		txq->n_pend_desc++;
1215		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1216		id = (id + 1) & txq->ptr_mask;
1217	} else {
1218		/* Allocate a DMA-mapped header buffer. */
1219		if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
1220			unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
1221			unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
1222
1223			header = (txq->tsoh_buffer[page_index].esm_base +
1224				  buf_index * TSOH_STD_SIZE);
1225			dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
1226				    buf_index * TSOH_STD_SIZE);
1227			map = txq->tsoh_buffer[page_index].esm_map;
1228
1229			KASSERT(txq->stmp[id].flags == 0,
1230				("stmp flags are not 0"));
1231		} else {
1232			struct sfxge_tx_mapping *stmp = &txq->stmp[id];
1233
1234			/* We cannot use bus_dmamem_alloc() as that may sleep */
1235			header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
1236			if (__predict_false(!header))
1237				return (ENOMEM);
1238			rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
1239					     header, tso->header_len,
1240					     tso_map_long_header, &dma_addr,
1241					     BUS_DMA_NOWAIT);
1242			if (__predict_false(dma_addr == 0)) {
1243				if (rc == 0) {
1244					/* Succeeded but got >1 segment */
1245					bus_dmamap_unload(txq->packet_dma_tag,
1246							  stmp->map);
1247					rc = EINVAL;
1248				}
1249				free(header, M_SFXGE);
1250				return (rc);
1251			}
1252			map = stmp->map;
1253
1254			txq->tso_long_headers++;
1255			stmp->u.heap_buf = header;
1256			stmp->flags = TX_BUF_UNMAP;
1257		}
1258
1259		tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
1260
1261		/* Copy and update the headers. */
1262		m_copydata(tso->mbuf, 0, tso->header_len, header);
1263
1264		tsoh_th->th_seq = htonl(tso->seqnum);
1265		tso->seqnum += tso->seg_size;
1266		if (tso->out_len > tso->seg_size) {
1267			/* This packet will not finish the TSO burst. */
1268			ip_length = tso->header_len - tso->nh_off + tso->seg_size;
1269			tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
1270		} else {
1271			/* This packet will be the last in the TSO burst. */
1272			ip_length = tso->header_len - tso->nh_off + tso->out_len;
1273		}
1274
1275		if (tso->protocol == htons(ETHERTYPE_IP)) {
1276			struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
1277			tsoh_iph->ip_len = htons(ip_length);
1278			/* XXX We should increment ip_id, but FreeBSD doesn't
1279			 * currently allocate extra IDs for multiple segments.
1280			 */
1281		} else {
1282			struct ip6_hdr *tsoh_iph =
1283				(struct ip6_hdr *)(header + tso->nh_off);
1284			tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
1285		}
1286
1287		/* Make the header visible to the hardware. */
1288		bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
1289
1290		/* Form a descriptor for this header. */
1291		desc = &txq->pend_desc[txq->n_pend_desc++];
1292		efx_tx_qdesc_dma_create(txq->common,
1293					dma_addr,
1294					tso->header_len,
1295					0,
1296					desc);
1297		id = (id + 1) & txq->ptr_mask;
1298
1299		tso->segs_space = UINT_MAX;
1300	}
1301	tso->packet_space = tso->seg_size;
1302	txq->tso_packets++;
1303	*idp = id;
1304
1305	return (0);
1306}
1307
1308static int
1309sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
1310		   const bus_dma_segment_t *dma_seg, int n_dma_seg,
1311		   int vlan_tagged)
1312{
1313	struct sfxge_tso_state tso;
1314	unsigned int id;
1315	unsigned skipped = 0;
1316
1317	tso_start(txq, &tso, dma_seg, mbuf);
1318
1319	while (dma_seg->ds_len + skipped <= tso.header_len) {
1320		skipped += dma_seg->ds_len;
1321		--n_dma_seg;
1322		KASSERT(n_dma_seg, ("no payload found in TSO packet"));
1323		++dma_seg;
1324	}
1325	tso.in_len = dma_seg->ds_len - (tso.header_len - skipped);
1326	tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped);
1327
1328	id = (txq->added + vlan_tagged) & txq->ptr_mask;
1329	if (__predict_false(tso_start_new_packet(txq, &tso, &id)))
1330		return (-1);
1331
1332	while (1) {
1333		tso_fill_packet_with_fragment(txq, &tso);
1334		/* Exactly one DMA descriptor is added */
1335		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1336		id = (id + 1) & txq->ptr_mask;
1337
1338		/* Move onto the next fragment? */
1339		if (tso.in_len == 0) {
1340			--n_dma_seg;
1341			if (n_dma_seg == 0)
1342				break;
1343			++dma_seg;
1344			tso.in_len = dma_seg->ds_len;
1345			tso.dma_addr = dma_seg->ds_addr;
1346		}
1347
1348		/* End of packet? */
1349		if ((tso.packet_space == 0) | (tso.segs_space == 0)) {
1350			unsigned int n_fatso_opt_desc =
1351			    (tso.fw_assisted & SFXGE_FATSOV2) ?
1352			    EFX_TX_FATSOV2_OPT_NDESCS :
1353			    (tso.fw_assisted & SFXGE_FATSOV1) ? 1 : 0;
1354
1355			/* If the queue is now full due to tiny MSS,
1356			 * or we can't create another header, discard
1357			 * the remainder of the input mbuf but do not
1358			 * roll back the work we have done.
1359			 */
1360			if (txq->n_pend_desc + n_fatso_opt_desc +
1361			    1 /* header */ + n_dma_seg > txq->max_pkt_desc) {
1362				txq->tso_pdrop_too_many++;
1363				break;
1364			}
1365			if (__predict_false(tso_start_new_packet(txq, &tso,
1366								 &id))) {
1367				txq->tso_pdrop_no_rsrc++;
1368				break;
1369			}
1370		}
1371	}
1372
1373	txq->tso_bursts++;
1374	return (id);
1375}
1376
1377static void
1378sfxge_tx_qunblock(struct sfxge_txq *txq)
1379{
1380	struct sfxge_softc *sc;
1381	struct sfxge_evq *evq;
1382
1383	sc = txq->sc;
1384	evq = sc->evq[txq->evq_index];
1385
1386	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
1387
1388	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED))
1389		return;
1390
1391	SFXGE_TXQ_LOCK(txq);
1392
1393	if (txq->blocked) {
1394		unsigned int level;
1395
1396		level = txq->added - txq->completed;
1397		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) {
1398			/* reaped must be in sync with blocked */
1399			sfxge_tx_qreap(txq);
1400			txq->blocked = 0;
1401		}
1402	}
1403
1404	sfxge_tx_qdpl_service(txq);
1405	/* note: lock has been dropped */
1406}
1407
1408void
1409sfxge_tx_qflush_done(struct sfxge_txq *txq)
1410{
1411
1412	txq->flush_state = SFXGE_FLUSH_DONE;
1413}
1414
1415static void
1416sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index)
1417{
1418	struct sfxge_txq *txq;
1419	struct sfxge_evq *evq;
1420	unsigned int count;
1421
1422	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1423
1424	txq = sc->txq[index];
1425	evq = sc->evq[txq->evq_index];
1426
1427	SFXGE_EVQ_LOCK(evq);
1428	SFXGE_TXQ_LOCK(txq);
1429
1430	KASSERT(txq->init_state == SFXGE_TXQ_STARTED,
1431	    ("txq->init_state != SFXGE_TXQ_STARTED"));
1432
1433	txq->init_state = SFXGE_TXQ_INITIALIZED;
1434
1435	if (txq->flush_state != SFXGE_FLUSH_DONE) {
1436		txq->flush_state = SFXGE_FLUSH_PENDING;
1437
1438		SFXGE_EVQ_UNLOCK(evq);
1439		SFXGE_TXQ_UNLOCK(txq);
1440
1441		/* Flush the transmit queue. */
1442		if (efx_tx_qflush(txq->common) != 0) {
1443			log(LOG_ERR, "%s: Flushing Tx queue %u failed\n",
1444			    device_get_nameunit(sc->dev), index);
1445			txq->flush_state = SFXGE_FLUSH_DONE;
1446		} else {
1447			count = 0;
1448			do {
1449				/* Spin for 100ms. */
1450				DELAY(100000);
1451				if (txq->flush_state != SFXGE_FLUSH_PENDING)
1452					break;
1453			} while (++count < 20);
1454		}
1455		SFXGE_EVQ_LOCK(evq);
1456		SFXGE_TXQ_LOCK(txq);
1457
1458		KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
1459		    ("txq->flush_state == SFXGE_FLUSH_FAILED"));
1460
1461		if (txq->flush_state != SFXGE_FLUSH_DONE) {
1462			/* Flush timeout */
1463			log(LOG_ERR, "%s: Cannot flush Tx queue %u\n",
1464			    device_get_nameunit(sc->dev), index);
1465			txq->flush_state = SFXGE_FLUSH_DONE;
1466		}
1467	}
1468
1469	txq->blocked = 0;
1470	txq->pending = txq->added;
1471
1472	sfxge_tx_qcomplete(txq, evq);
1473	KASSERT(txq->completed == txq->added,
1474	    ("txq->completed != txq->added"));
1475
1476	sfxge_tx_qreap(txq);
1477	KASSERT(txq->reaped == txq->completed,
1478	    ("txq->reaped != txq->completed"));
1479
1480	txq->added = 0;
1481	txq->pending = 0;
1482	txq->completed = 0;
1483	txq->reaped = 0;
1484
1485	/* Destroy the common code transmit queue. */
1486	efx_tx_qdestroy(txq->common);
1487	txq->common = NULL;
1488
1489	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1490	    EFX_TXQ_NBUFS(sc->txq_entries));
1491
1492	SFXGE_EVQ_UNLOCK(evq);
1493	SFXGE_TXQ_UNLOCK(txq);
1494}
1495
1496/*
1497 * Estimate maximum number of Tx descriptors required for TSO packet.
1498 * With minimum MSS and maximum mbuf length we might need more (even
1499 * than a ring-ful of descriptors), but this should not happen in
1500 * practice except due to deliberate attack.  In that case we will
1501 * truncate the output at a packet boundary.
1502 */
1503static unsigned int
1504sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type,
1505		      unsigned int tso_fw_assisted)
1506{
1507	/* One descriptor for every input fragment */
1508	unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG;
1509	unsigned int sw_tso_max_descs;
1510	unsigned int fa_tso_v1_max_descs = 0;
1511	unsigned int fa_tso_v2_max_descs = 0;
1512
1513	/* VLAN tagging Tx option descriptor may be required */
1514	if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled)
1515		max_descs++;
1516
1517	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) {
1518		/*
1519		 * Plus header and payload descriptor for each output segment.
1520		 * Minus one since header fragment is already counted.
1521		 * Even if FATSO is used, we should be ready to fallback
1522		 * to do it in the driver.
1523		 */
1524		sw_tso_max_descs = SFXGE_TSO_MAX_SEGS * 2 - 1;
1525
1526		/* FW assisted TSOv1 requires one more descriptor per segment
1527		 * in comparison to SW TSO */
1528		if (tso_fw_assisted & SFXGE_FATSOV1)
1529			fa_tso_v1_max_descs =
1530			    sw_tso_max_descs + SFXGE_TSO_MAX_SEGS;
1531
1532		/* FW assisted TSOv2 requires 3 (2 FATSO plus header) extra
1533		 * descriptors per superframe limited by number of DMA fetches
1534		 * per packet. The first packet header is already counted.
1535		 */
1536		if (tso_fw_assisted & SFXGE_FATSOV2) {
1537			fa_tso_v2_max_descs =
1538			    howmany(SFXGE_TX_MAPPING_MAX_SEG,
1539				    EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1) *
1540			    (EFX_TX_FATSOV2_OPT_NDESCS + 1) - 1;
1541		}
1542
1543		max_descs += MAX(sw_tso_max_descs,
1544				 MAX(fa_tso_v1_max_descs, fa_tso_v2_max_descs));
1545	}
1546
1547	return (max_descs);
1548}
1549
1550static int
1551sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
1552{
1553	struct sfxge_txq *txq;
1554	efsys_mem_t *esmp;
1555	uint16_t flags;
1556	unsigned int tso_fw_assisted;
1557	struct sfxge_evq *evq;
1558	unsigned int desc_index;
1559	int rc;
1560
1561	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1562
1563	txq = sc->txq[index];
1564	esmp = &txq->mem;
1565	evq = sc->evq[txq->evq_index];
1566
1567	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1568	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1569	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1570	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1571
1572	/* Program the buffer table. */
1573	if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp,
1574	    EFX_TXQ_NBUFS(sc->txq_entries))) != 0)
1575		return (rc);
1576
1577	/* Determine the kind of queue we are creating. */
1578	tso_fw_assisted = 0;
1579	switch (txq->type) {
1580	case SFXGE_TXQ_NON_CKSUM:
1581		flags = 0;
1582		break;
1583	case SFXGE_TXQ_IP_CKSUM:
1584		flags = EFX_TXQ_CKSUM_IPV4;
1585		break;
1586	case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1587		flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP;
1588		tso_fw_assisted = sc->tso_fw_assisted;
1589		if (tso_fw_assisted & SFXGE_FATSOV2)
1590			flags |= EFX_TXQ_FATSOV2;
1591		break;
1592	default:
1593		KASSERT(0, ("Impossible TX queue"));
1594		flags = 0;
1595		break;
1596	}
1597
1598	/* Create the common code transmit queue. */
1599	if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1600	    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1601	    &txq->common, &desc_index)) != 0) {
1602		/* Retry if no FATSOv2 resources, otherwise fail */
1603		if ((rc != ENOSPC) || (~flags & EFX_TXQ_FATSOV2))
1604			goto fail;
1605
1606		/* Looks like all FATSOv2 contexts are used */
1607		flags &= ~EFX_TXQ_FATSOV2;
1608		tso_fw_assisted &= ~SFXGE_FATSOV2;
1609		if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1610		    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1611		    &txq->common, &desc_index)) != 0)
1612			goto fail;
1613	}
1614
1615	/* Initialise queue descriptor indexes */
1616	txq->added = txq->pending = txq->completed = txq->reaped = desc_index;
1617
1618	SFXGE_TXQ_LOCK(txq);
1619
1620	/* Enable the transmit queue. */
1621	efx_tx_qenable(txq->common);
1622
1623	txq->init_state = SFXGE_TXQ_STARTED;
1624	txq->flush_state = SFXGE_FLUSH_REQUIRED;
1625	txq->tso_fw_assisted = tso_fw_assisted;
1626
1627	txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, txq->type,
1628						  tso_fw_assisted);
1629
1630	txq->hw_vlan_tci = 0;
1631
1632	SFXGE_TXQ_UNLOCK(txq);
1633
1634	return (0);
1635
1636fail:
1637	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1638	    EFX_TXQ_NBUFS(sc->txq_entries));
1639	return (rc);
1640}
1641
1642void
1643sfxge_tx_stop(struct sfxge_softc *sc)
1644{
1645	int index;
1646
1647	index = sc->txq_count;
1648	while (--index >= 0)
1649		sfxge_tx_qstop(sc, index);
1650
1651	/* Tear down the transmit module */
1652	efx_tx_fini(sc->enp);
1653}
1654
1655int
1656sfxge_tx_start(struct sfxge_softc *sc)
1657{
1658	int index;
1659	int rc;
1660
1661	/* Initialize the common code transmit module. */
1662	if ((rc = efx_tx_init(sc->enp)) != 0)
1663		return (rc);
1664
1665	for (index = 0; index < sc->txq_count; index++) {
1666		if ((rc = sfxge_tx_qstart(sc, index)) != 0)
1667			goto fail;
1668	}
1669
1670	return (0);
1671
1672fail:
1673	while (--index >= 0)
1674		sfxge_tx_qstop(sc, index);
1675
1676	efx_tx_fini(sc->enp);
1677
1678	return (rc);
1679}
1680
1681static int
1682sfxge_txq_stat_init(struct sfxge_txq *txq, struct sysctl_oid *txq_node)
1683{
1684	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(txq->sc->dev);
1685	struct sysctl_oid *stat_node;
1686	unsigned int id;
1687
1688	stat_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1689				    "stats", CTLFLAG_RD, NULL,
1690				    "Tx queue statistics");
1691	if (stat_node == NULL)
1692		return (ENOMEM);
1693
1694	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1695		SYSCTL_ADD_ULONG(
1696		    ctx, SYSCTL_CHILDREN(stat_node), OID_AUTO,
1697		    sfxge_tx_stats[id].name, CTLFLAG_RD | CTLFLAG_STATS,
1698		    (unsigned long *)((caddr_t)txq + sfxge_tx_stats[id].offset),
1699		    "");
1700	}
1701
1702	return (0);
1703}
1704
1705/**
1706 * Destroy a transmit queue.
1707 */
1708static void
1709sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index)
1710{
1711	struct sfxge_txq *txq;
1712	unsigned int nmaps;
1713
1714	txq = sc->txq[index];
1715
1716	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1717	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1718
1719	if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM)
1720		tso_fini(txq);
1721
1722	/* Free the context arrays. */
1723	free(txq->pend_desc, M_SFXGE);
1724	nmaps = sc->txq_entries;
1725	while (nmaps-- != 0)
1726		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1727	free(txq->stmp, M_SFXGE);
1728
1729	/* Release DMA memory mapping. */
1730	sfxge_dma_free(&txq->mem);
1731
1732	sc->txq[index] = NULL;
1733
1734	SFXGE_TXQ_LOCK_DESTROY(txq);
1735
1736	free(txq, M_SFXGE);
1737}
1738
1739static int
1740sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
1741	       enum sfxge_txq_type type, unsigned int evq_index)
1742{
1743	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
1744	char name[16];
1745	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1746	struct sysctl_oid *txq_node;
1747	struct sfxge_txq *txq;
1748	struct sfxge_evq *evq;
1749	struct sfxge_tx_dpl *stdp;
1750	struct sysctl_oid *dpl_node;
1751	efsys_mem_t *esmp;
1752	unsigned int nmaps;
1753	int rc;
1754
1755	txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK);
1756	txq->sc = sc;
1757	txq->entries = sc->txq_entries;
1758	txq->ptr_mask = txq->entries - 1;
1759
1760	sc->txq[txq_index] = txq;
1761	esmp = &txq->mem;
1762
1763	evq = sc->evq[evq_index];
1764
1765	/* Allocate and zero DMA space for the descriptor ring. */
1766	if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(sc->txq_entries), esmp)) != 0)
1767		return (rc);
1768
1769	/* Allocate buffer table entries. */
1770	sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(sc->txq_entries),
1771				 &txq->buf_base_id);
1772
1773	/* Create a DMA tag for packet mappings. */
1774	if (bus_dma_tag_create(sc->parent_dma_tag, 1,
1775	    encp->enc_tx_dma_desc_boundary,
1776	    MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL,
1777	    NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG,
1778	    encp->enc_tx_dma_desc_size_max, 0, NULL, NULL,
1779	    &txq->packet_dma_tag) != 0) {
1780		device_printf(sc->dev, "Couldn't allocate txq DMA tag\n");
1781		rc = ENOMEM;
1782		goto fail;
1783	}
1784
1785	/* Allocate pending descriptor array for batching writes. */
1786	txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries,
1787				M_SFXGE, M_ZERO | M_WAITOK);
1788
1789	/* Allocate and initialise mbuf DMA mapping array. */
1790	txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * sc->txq_entries,
1791	    M_SFXGE, M_ZERO | M_WAITOK);
1792	for (nmaps = 0; nmaps < sc->txq_entries; nmaps++) {
1793		rc = bus_dmamap_create(txq->packet_dma_tag, 0,
1794				       &txq->stmp[nmaps].map);
1795		if (rc != 0)
1796			goto fail2;
1797	}
1798
1799	snprintf(name, sizeof(name), "%u", txq_index);
1800	txq_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->txqs_node),
1801				   OID_AUTO, name, CTLFLAG_RD, NULL, "");
1802	if (txq_node == NULL) {
1803		rc = ENOMEM;
1804		goto fail_txq_node;
1805	}
1806
1807	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM &&
1808	    (rc = tso_init(txq)) != 0)
1809		goto fail3;
1810
1811	/* Initialize the deferred packet list. */
1812	stdp = &txq->dpl;
1813	stdp->std_put_max = sfxge_tx_dpl_put_max;
1814	stdp->std_get_max = sfxge_tx_dpl_get_max;
1815	stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max;
1816	stdp->std_getp = &stdp->std_get;
1817
1818	SFXGE_TXQ_LOCK_INIT(txq, device_get_nameunit(sc->dev), txq_index);
1819
1820	dpl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1821				   "dpl", CTLFLAG_RD, NULL,
1822				   "Deferred packet list statistics");
1823	if (dpl_node == NULL) {
1824		rc = ENOMEM;
1825		goto fail_dpl_node;
1826	}
1827
1828	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1829			"get_count", CTLFLAG_RD | CTLFLAG_STATS,
1830			&stdp->std_get_count, 0, "");
1831	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1832			"get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS,
1833			&stdp->std_get_non_tcp_count, 0, "");
1834	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1835			"get_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1836			&stdp->std_get_hiwat, 0, "");
1837	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1838			"put_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1839			&stdp->std_put_hiwat, 0, "");
1840
1841	rc = sfxge_txq_stat_init(txq, txq_node);
1842	if (rc != 0)
1843		goto fail_txq_stat_init;
1844
1845	txq->type = type;
1846	txq->evq_index = evq_index;
1847	txq->init_state = SFXGE_TXQ_INITIALIZED;
1848
1849	return (0);
1850
1851fail_txq_stat_init:
1852fail_dpl_node:
1853fail3:
1854fail_txq_node:
1855	free(txq->pend_desc, M_SFXGE);
1856fail2:
1857	while (nmaps-- != 0)
1858		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1859	free(txq->stmp, M_SFXGE);
1860	bus_dma_tag_destroy(txq->packet_dma_tag);
1861
1862fail:
1863	sfxge_dma_free(esmp);
1864
1865	return (rc);
1866}
1867
1868static int
1869sfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS)
1870{
1871	struct sfxge_softc *sc = arg1;
1872	unsigned int id = arg2;
1873	unsigned long sum;
1874	unsigned int index;
1875
1876	/* Sum across all TX queues */
1877	sum = 0;
1878	for (index = 0; index < sc->txq_count; index++)
1879		sum += *(unsigned long *)((caddr_t)sc->txq[index] +
1880					  sfxge_tx_stats[id].offset);
1881
1882	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1883}
1884
1885static void
1886sfxge_tx_stat_init(struct sfxge_softc *sc)
1887{
1888	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1889	struct sysctl_oid_list *stat_list;
1890	unsigned int id;
1891
1892	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1893
1894	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1895		SYSCTL_ADD_PROC(
1896			ctx, stat_list,
1897			OID_AUTO, sfxge_tx_stats[id].name,
1898			CTLTYPE_ULONG|CTLFLAG_RD,
1899			sc, id, sfxge_tx_stat_handler, "LU",
1900			"");
1901	}
1902}
1903
1904uint64_t
1905sfxge_tx_get_drops(struct sfxge_softc *sc)
1906{
1907	unsigned int index;
1908	uint64_t drops = 0;
1909	struct sfxge_txq *txq;
1910
1911	/* Sum across all TX queues */
1912	for (index = 0; index < sc->txq_count; index++) {
1913		txq = sc->txq[index];
1914		/*
1915		 * In theory, txq->put_overflow and txq->netdown_drops
1916		 * should use atomic operation and other should be
1917		 * obtained under txq lock, but it is just statistics.
1918		 */
1919		drops += txq->drops + txq->get_overflow +
1920			 txq->get_non_tcp_overflow +
1921			 txq->put_overflow + txq->netdown_drops +
1922			 txq->tso_pdrop_too_many + txq->tso_pdrop_no_rsrc;
1923	}
1924	return (drops);
1925}
1926
1927void
1928sfxge_tx_fini(struct sfxge_softc *sc)
1929{
1930	int index;
1931
1932	index = sc->txq_count;
1933	while (--index >= 0)
1934		sfxge_tx_qfini(sc, index);
1935
1936	sc->txq_count = 0;
1937}
1938
1939
1940int
1941sfxge_tx_init(struct sfxge_softc *sc)
1942{
1943	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
1944	struct sfxge_intr *intr;
1945	int index;
1946	int rc;
1947
1948	intr = &sc->intr;
1949
1950	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1951	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1952
1953	if (sfxge_tx_dpl_get_max <= 0) {
1954		log(LOG_ERR, "%s=%d must be greater than 0",
1955		    SFXGE_PARAM_TX_DPL_GET_MAX, sfxge_tx_dpl_get_max);
1956		rc = EINVAL;
1957		goto fail_tx_dpl_get_max;
1958	}
1959	if (sfxge_tx_dpl_get_non_tcp_max <= 0) {
1960		log(LOG_ERR, "%s=%d must be greater than 0",
1961		    SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX,
1962		    sfxge_tx_dpl_get_non_tcp_max);
1963		rc = EINVAL;
1964		goto fail_tx_dpl_get_non_tcp_max;
1965	}
1966	if (sfxge_tx_dpl_put_max < 0) {
1967		log(LOG_ERR, "%s=%d must be greater or equal to 0",
1968		    SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max);
1969		rc = EINVAL;
1970		goto fail_tx_dpl_put_max;
1971	}
1972
1973	sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc;
1974
1975	sc->tso_fw_assisted = sfxge_tso_fw_assisted;
1976	if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) ||
1977	    (!encp->enc_fw_assisted_tso_enabled))
1978		sc->tso_fw_assisted &= ~SFXGE_FATSOV1;
1979	if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO_V2) ||
1980	    (!encp->enc_fw_assisted_tso_v2_enabled))
1981		sc->tso_fw_assisted &= ~SFXGE_FATSOV2;
1982
1983	sc->txqs_node = SYSCTL_ADD_NODE(
1984		device_get_sysctl_ctx(sc->dev),
1985		SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1986		OID_AUTO, "txq", CTLFLAG_RD, NULL, "Tx queues");
1987	if (sc->txqs_node == NULL) {
1988		rc = ENOMEM;
1989		goto fail_txq_node;
1990	}
1991
1992	/* Initialize the transmit queues */
1993	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM,
1994	    SFXGE_TXQ_NON_CKSUM, 0)) != 0)
1995		goto fail;
1996
1997	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM,
1998	    SFXGE_TXQ_IP_CKSUM, 0)) != 0)
1999		goto fail2;
2000
2001	for (index = 0;
2002	     index < sc->txq_count - SFXGE_TXQ_NTYPES + 1;
2003	     index++) {
2004		if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NTYPES - 1 + index,
2005		    SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0)
2006			goto fail3;
2007	}
2008
2009	sfxge_tx_stat_init(sc);
2010
2011	return (0);
2012
2013fail3:
2014	while (--index >= 0)
2015		sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index);
2016
2017	sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM);
2018
2019fail2:
2020	sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM);
2021
2022fail:
2023fail_txq_node:
2024	sc->txq_count = 0;
2025fail_tx_dpl_put_max:
2026fail_tx_dpl_get_non_tcp_max:
2027fail_tx_dpl_get_max:
2028	return (rc);
2029}
2030