1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_rss.h"
34#include "ena.h"
35#include "ena_datapath.h"
36#ifdef DEV_NETMAP
37#include "ena_netmap.h"
38#endif /* DEV_NETMAP */
39
40/*********************************************************************
41 *  Static functions prototypes
42 *********************************************************************/
43
44static int	ena_tx_cleanup(struct ena_ring *);
45static int	ena_rx_cleanup(struct ena_ring *);
46static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
47static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
48    struct mbuf *);
49static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
50    struct ena_com_rx_ctx *, uint16_t *);
51static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
52    struct mbuf *);
53static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
54static int	ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
55    struct mbuf **mbuf);
56static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
57static void	ena_start_xmit(struct ena_ring *);
58
59/*********************************************************************
60 *  Global functions
61 *********************************************************************/
62
63void
64ena_cleanup(void *arg, int pending)
65{
66	struct ena_que	*que = arg;
67	struct ena_adapter *adapter = que->adapter;
68	if_t ifp = adapter->ifp;
69	struct ena_ring *tx_ring;
70	struct ena_ring *rx_ring;
71	struct ena_com_io_cq* io_cq;
72	struct ena_eth_io_intr_reg intr_reg;
73	int qid, ena_qid;
74	int txc, rxc, i;
75
76	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
77		return;
78
79	ena_trace(NULL, ENA_DBG, "MSI-X TX/RX routine\n");
80
81	tx_ring = que->tx_ring;
82	rx_ring = que->rx_ring;
83	qid = que->id;
84	ena_qid = ENA_IO_TXQ_IDX(qid);
85	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
86
87	tx_ring->first_interrupt = true;
88	rx_ring->first_interrupt = true;
89
90	for (i = 0; i < CLEAN_BUDGET; ++i) {
91		rxc = ena_rx_cleanup(rx_ring);
92		txc = ena_tx_cleanup(tx_ring);
93
94		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
95			return;
96
97		if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
98		       break;
99	}
100
101	/* Signal that work is done and unmask interrupt */
102	ena_com_update_intr_reg(&intr_reg,
103	    RX_IRQ_INTERVAL,
104	    TX_IRQ_INTERVAL,
105	    true);
106	ena_com_unmask_intr(io_cq, &intr_reg);
107}
108
109void
110ena_deferred_mq_start(void *arg, int pending)
111{
112	struct ena_ring *tx_ring = (struct ena_ring *)arg;
113	struct ifnet *ifp = tx_ring->adapter->ifp;
114
115	while (!drbr_empty(ifp, tx_ring->br) &&
116	    tx_ring->running &&
117	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
118		ENA_RING_MTX_LOCK(tx_ring);
119		ena_start_xmit(tx_ring);
120		ENA_RING_MTX_UNLOCK(tx_ring);
121	}
122}
123
124int
125ena_mq_start(if_t ifp, struct mbuf *m)
126{
127	struct ena_adapter *adapter = ifp->if_softc;
128	struct ena_ring *tx_ring;
129	int ret, is_drbr_empty;
130	uint32_t i;
131
132	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
133		return (ENODEV);
134
135	/* Which queue to use */
136	/*
137	 * If everything is setup correctly, it should be the
138	 * same bucket that the current CPU we're on is.
139	 * It should improve performance.
140	 */
141	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
142		i = m->m_pkthdr.flowid % adapter->num_io_queues;
143	} else {
144		i = curcpu % adapter->num_io_queues;
145	}
146	tx_ring = &adapter->tx_ring[i];
147
148	/* Check if drbr is empty before putting packet */
149	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
150	ret = drbr_enqueue(ifp, tx_ring->br, m);
151	if (unlikely(ret != 0)) {
152		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
153		return (ret);
154	}
155
156	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
157		ena_start_xmit(tx_ring);
158		ENA_RING_MTX_UNLOCK(tx_ring);
159	} else {
160		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
161	}
162
163	return (0);
164}
165
166void
167ena_qflush(if_t ifp)
168{
169	struct ena_adapter *adapter = ifp->if_softc;
170	struct ena_ring *tx_ring = adapter->tx_ring;
171	int i;
172
173	for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
174		if (!drbr_empty(ifp, tx_ring->br)) {
175			ENA_RING_MTX_LOCK(tx_ring);
176			drbr_flush(ifp, tx_ring->br);
177			ENA_RING_MTX_UNLOCK(tx_ring);
178		}
179
180	if_qflush(ifp);
181}
182
183/*********************************************************************
184 *  Static functions
185 *********************************************************************/
186
187static inline int
188validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
189{
190	struct ena_adapter *adapter = tx_ring->adapter;
191	struct ena_tx_buffer *tx_info = NULL;
192
193	if (likely(req_id < tx_ring->ring_size)) {
194		tx_info = &tx_ring->tx_buffer_info[req_id];
195		if (tx_info->mbuf != NULL)
196			return (0);
197		device_printf(adapter->pdev,
198		    "tx_info doesn't have valid mbuf\n");
199	}
200
201	device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
202	counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
203
204	/* Trigger device reset */
205	ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
206
207	return (EFAULT);
208}
209
210/**
211 * ena_tx_cleanup - clear sent packets and corresponding descriptors
212 * @tx_ring: ring for which we want to clean packets
213 *
214 * Once packets are sent, we ask the device in a loop for no longer used
215 * descriptors. We find the related mbuf chain in a map (index in an array)
216 * and free it, then update ring state.
217 * This is performed in "endless" loop, updating ring pointers every
218 * TX_COMMIT. The first check of free descriptor is performed before the actual
219 * loop, then repeated at the loop end.
220 **/
221static int
222ena_tx_cleanup(struct ena_ring *tx_ring)
223{
224	struct ena_adapter *adapter;
225	struct ena_com_io_cq* io_cq;
226	uint16_t next_to_clean;
227	uint16_t req_id;
228	uint16_t ena_qid;
229	unsigned int total_done = 0;
230	int rc;
231	int commit = TX_COMMIT;
232	int budget = TX_BUDGET;
233	int work_done;
234	bool above_thresh;
235
236	adapter = tx_ring->que->adapter;
237	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
238	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
239	next_to_clean = tx_ring->next_to_clean;
240
241#ifdef DEV_NETMAP
242	if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
243		return (0);
244#endif /* DEV_NETMAP */
245
246	do {
247		struct ena_tx_buffer *tx_info;
248		struct mbuf *mbuf;
249
250		rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
251		if (unlikely(rc != 0))
252			break;
253
254		rc = validate_tx_req_id(tx_ring, req_id);
255		if (unlikely(rc != 0))
256			break;
257
258		tx_info = &tx_ring->tx_buffer_info[req_id];
259
260		mbuf = tx_info->mbuf;
261
262		tx_info->mbuf = NULL;
263		bintime_clear(&tx_info->timestamp);
264
265		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
266		    BUS_DMASYNC_POSTWRITE);
267		bus_dmamap_unload(adapter->tx_buf_tag,
268		    tx_info->dmamap);
269
270		ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
271		    tx_ring->qid, mbuf);
272
273		m_freem(mbuf);
274
275		total_done += tx_info->tx_descs;
276
277		tx_ring->free_tx_ids[next_to_clean] = req_id;
278		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
279		    tx_ring->ring_size);
280
281		if (unlikely(--commit == 0)) {
282			commit = TX_COMMIT;
283			/* update ring state every TX_COMMIT descriptor */
284			tx_ring->next_to_clean = next_to_clean;
285			ena_com_comp_ack(
286			    &adapter->ena_dev->io_sq_queues[ena_qid],
287			    total_done);
288			ena_com_update_dev_comp_head(io_cq);
289			total_done = 0;
290		}
291	} while (likely(--budget));
292
293	work_done = TX_BUDGET - budget;
294
295	ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
296	tx_ring->qid, work_done);
297
298	/* If there is still something to commit update ring state */
299	if (likely(commit != TX_COMMIT)) {
300		tx_ring->next_to_clean = next_to_clean;
301		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
302		    total_done);
303		ena_com_update_dev_comp_head(io_cq);
304	}
305
306	/*
307	 * Need to make the rings circular update visible to
308	 * ena_xmit_mbuf() before checking for tx_ring->running.
309	 */
310	mb();
311
312	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
313	    ENA_TX_RESUME_THRESH);
314	if (unlikely(!tx_ring->running && above_thresh)) {
315		ENA_RING_MTX_LOCK(tx_ring);
316		above_thresh =
317		    ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
318		    ENA_TX_RESUME_THRESH);
319		if (!tx_ring->running && above_thresh) {
320			tx_ring->running = true;
321			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
322			taskqueue_enqueue(tx_ring->enqueue_tq,
323			    &tx_ring->enqueue_task);
324		}
325		ENA_RING_MTX_UNLOCK(tx_ring);
326	}
327
328	return (work_done);
329}
330
331static void
332ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
333    struct mbuf *mbuf)
334{
335	struct ena_adapter *adapter = rx_ring->adapter;
336
337	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
338		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
339
340#ifdef RSS
341		/*
342		 * Hardware and software RSS are in agreement only when both are
343		 * configured to Toeplitz algorithm.  This driver configures
344		 * that algorithm only when software RSS is enabled and uses it.
345		 */
346		if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
347		    ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
348			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
349			return;
350		}
351#endif
352
353		if (ena_rx_ctx->frag &&
354		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
355			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
356			return;
357		}
358
359		switch (ena_rx_ctx->l3_proto) {
360		case ENA_ETH_IO_L3_PROTO_IPV4:
361			switch (ena_rx_ctx->l4_proto) {
362			case ENA_ETH_IO_L4_PROTO_TCP:
363				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
364				break;
365			case ENA_ETH_IO_L4_PROTO_UDP:
366				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
367				break;
368			default:
369				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
370			}
371			break;
372		case ENA_ETH_IO_L3_PROTO_IPV6:
373			switch (ena_rx_ctx->l4_proto) {
374			case ENA_ETH_IO_L4_PROTO_TCP:
375				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
376				break;
377			case ENA_ETH_IO_L4_PROTO_UDP:
378				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
379				break;
380			default:
381				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
382			}
383			break;
384		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
385			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
386			break;
387		default:
388			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
389		}
390	} else {
391		mbuf->m_pkthdr.flowid = rx_ring->qid;
392		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
393	}
394}
395
396/**
397 * ena_rx_mbuf - assemble mbuf from descriptors
398 * @rx_ring: ring for which we want to clean packets
399 * @ena_bufs: buffer info
400 * @ena_rx_ctx: metadata for this packet(s)
401 * @next_to_clean: ring pointer, will be updated only upon success
402 *
403 **/
404static struct mbuf*
405ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
406    struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
407{
408	struct mbuf *mbuf;
409	struct ena_rx_buffer *rx_info;
410	struct ena_adapter *adapter;
411	unsigned int descs = ena_rx_ctx->descs;
412	uint16_t ntc, len, req_id, buf = 0;
413
414	ntc = *next_to_clean;
415	adapter = rx_ring->adapter;
416
417	len = ena_bufs[buf].len;
418	req_id = ena_bufs[buf].req_id;
419	rx_info = &rx_ring->rx_buffer_info[req_id];
420	if (unlikely(rx_info->mbuf == NULL)) {
421		device_printf(adapter->pdev, "NULL mbuf in rx_info");
422		return (NULL);
423	}
424
425	ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
426	    rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
427
428	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
429	    BUS_DMASYNC_POSTREAD);
430	mbuf = rx_info->mbuf;
431	mbuf->m_flags |= M_PKTHDR;
432	mbuf->m_pkthdr.len = len;
433	mbuf->m_len = len;
434	// Only for the first segment the data starts at specific offset
435	mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
436	ena_trace(NULL, ENA_DBG | ENA_RXPTH,
437		"Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
438	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
439
440	/* Fill mbuf with hash key and it's interpretation for optimization */
441	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
442
443	ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
444	    mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
445
446	/* DMA address is not needed anymore, unmap it */
447	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
448
449	rx_info->mbuf = NULL;
450	rx_ring->free_rx_ids[ntc] = req_id;
451	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
452
453	/*
454	 * While we have more than 1 descriptors for one rcvd packet, append
455	 * other mbufs to the main one
456	 */
457	while (--descs) {
458		++buf;
459		len = ena_bufs[buf].len;
460		req_id = ena_bufs[buf].req_id;
461		rx_info = &rx_ring->rx_buffer_info[req_id];
462
463		if (unlikely(rx_info->mbuf == NULL)) {
464			device_printf(adapter->pdev, "NULL mbuf in rx_info");
465			/*
466			 * If one of the required mbufs was not allocated yet,
467			 * we can break there.
468			 * All earlier used descriptors will be reallocated
469			 * later and not used mbufs can be reused.
470			 * The next_to_clean pointer will not be updated in case
471			 * of an error, so caller should advance it manually
472			 * in error handling routine to keep it up to date
473			 * with hw ring.
474			 */
475			m_freem(mbuf);
476			return (NULL);
477		}
478
479		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
480		    BUS_DMASYNC_POSTREAD);
481		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
482			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
483			ena_trace(NULL, ENA_WARNING, "Failed to append Rx mbuf %p\n",
484			    mbuf);
485		}
486
487		ena_trace(NULL, ENA_DBG | ENA_RXPTH,
488		    "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
489
490		/* Free already appended mbuf, it won't be useful anymore */
491		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
492		m_freem(rx_info->mbuf);
493		rx_info->mbuf = NULL;
494
495		rx_ring->free_rx_ids[ntc] = req_id;
496		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
497	}
498
499	*next_to_clean = ntc;
500
501	return (mbuf);
502}
503
504/**
505 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
506 **/
507static inline void
508ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
509    struct mbuf *mbuf)
510{
511
512	/* if IP and error */
513	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
514	    ena_rx_ctx->l3_csum_err)) {
515		/* ipv4 checksum error */
516		mbuf->m_pkthdr.csum_flags = 0;
517		counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
518		ena_trace(NULL, ENA_DBG, "RX IPv4 header checksum error\n");
519		return;
520	}
521
522	/* if TCP/UDP */
523	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
524	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
525		if (ena_rx_ctx->l4_csum_err) {
526			/* TCP/UDP checksum error */
527			mbuf->m_pkthdr.csum_flags = 0;
528			counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
529			ena_trace(NULL, ENA_DBG, "RX L4 checksum error\n");
530		} else {
531			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
532			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
533		}
534	}
535}
536
537/**
538 * ena_rx_cleanup - handle rx irq
539 * @arg: ring for which irq is being handled
540 **/
541static int
542ena_rx_cleanup(struct ena_ring *rx_ring)
543{
544	struct ena_adapter *adapter;
545	struct mbuf *mbuf;
546	struct ena_com_rx_ctx ena_rx_ctx;
547	struct ena_com_io_cq* io_cq;
548	struct ena_com_io_sq* io_sq;
549	enum ena_regs_reset_reason_types reset_reason;
550	if_t ifp;
551	uint16_t ena_qid;
552	uint16_t next_to_clean;
553	uint32_t refill_required;
554	uint32_t refill_threshold;
555	uint32_t do_if_input = 0;
556	unsigned int qid;
557	int rc, i;
558	int budget = RX_BUDGET;
559#ifdef DEV_NETMAP
560	int done;
561#endif /* DEV_NETMAP */
562
563	adapter = rx_ring->que->adapter;
564	ifp = adapter->ifp;
565	qid = rx_ring->que->id;
566	ena_qid = ENA_IO_RXQ_IDX(qid);
567	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
568	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
569	next_to_clean = rx_ring->next_to_clean;
570
571#ifdef DEV_NETMAP
572	if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
573		return (0);
574#endif /* DEV_NETMAP */
575
576	ena_trace(NULL, ENA_DBG, "rx: qid %d\n", qid);
577
578	do {
579		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
580		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
581		ena_rx_ctx.descs = 0;
582		ena_rx_ctx.pkt_offset = 0;
583
584		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
585		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
586		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
587		if (unlikely(rc != 0)) {
588			if (rc == ENA_COM_NO_SPACE) {
589				counter_u64_add(rx_ring->rx_stats.bad_desc_num,
590				    1);
591				reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
592			} else {
593				counter_u64_add(rx_ring->rx_stats.bad_req_id,
594				    1);
595				reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
596			}
597			ena_trigger_reset(adapter, reset_reason);
598			return (0);
599		}
600
601		if (unlikely(ena_rx_ctx.descs == 0))
602			break;
603
604		ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
605		    "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
606		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
607		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
608
609		/* Receive mbuf from the ring */
610		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
611		    &ena_rx_ctx, &next_to_clean);
612		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
613		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
614		/* Exit if we failed to retrieve a buffer */
615		if (unlikely(mbuf == NULL)) {
616			for (i = 0; i < ena_rx_ctx.descs; ++i) {
617				rx_ring->free_rx_ids[next_to_clean] =
618				    rx_ring->ena_bufs[i].req_id;
619				next_to_clean =
620				    ENA_RX_RING_IDX_NEXT(next_to_clean,
621				    rx_ring->ring_size);
622
623			}
624			break;
625		}
626
627		if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
628		    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
629			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
630		}
631
632		counter_enter();
633		counter_u64_add_protected(rx_ring->rx_stats.bytes,
634		    mbuf->m_pkthdr.len);
635		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
636		    mbuf->m_pkthdr.len);
637		counter_exit();
638		/*
639		 * LRO is only for IP/TCP packets and TCP checksum of the packet
640		 * should be computed by hardware.
641		 */
642		do_if_input = 1;
643		if (((ifp->if_capenable & IFCAP_LRO) != 0)  &&
644		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
645		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
646			/*
647			 * Send to the stack if:
648			 *  - LRO not enabled, or
649			 *  - no LRO resources, or
650			 *  - lro enqueue fails
651			 */
652			if ((rx_ring->lro.lro_cnt != 0) &&
653			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
654					do_if_input = 0;
655		}
656		if (do_if_input != 0) {
657			ena_trace(NULL, ENA_DBG | ENA_RXPTH,
658			    "calling if_input() with mbuf %p\n", mbuf);
659			(*ifp->if_input)(ifp, mbuf);
660		}
661
662		counter_enter();
663		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
664		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
665		counter_exit();
666	} while (--budget);
667
668	rx_ring->next_to_clean = next_to_clean;
669
670	refill_required = ena_com_free_q_entries(io_sq);
671	refill_threshold = min_t(int,
672	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
673	    ENA_RX_REFILL_THRESH_PACKET);
674
675	if (refill_required > refill_threshold) {
676		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
677		ena_refill_rx_bufs(rx_ring, refill_required);
678	}
679
680	tcp_lro_flush_all(&rx_ring->lro);
681
682	return (RX_BUDGET - budget);
683}
684
685static void
686ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
687    bool disable_meta_caching)
688{
689	struct ena_com_tx_meta *ena_meta;
690	struct ether_vlan_header *eh;
691	struct mbuf *mbuf_next;
692	u32 mss;
693	bool offload;
694	uint16_t etype;
695	int ehdrlen;
696	struct ip *ip;
697	int iphlen;
698	struct tcphdr *th;
699	int offset;
700
701	offload = false;
702	ena_meta = &ena_tx_ctx->ena_meta;
703	mss = mbuf->m_pkthdr.tso_segsz;
704
705	if (mss != 0)
706		offload = true;
707
708	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
709		offload = true;
710
711	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
712		offload = true;
713
714	if (!offload) {
715		if (disable_meta_caching) {
716			memset(ena_meta, 0, sizeof(*ena_meta));
717			ena_tx_ctx->meta_valid = 1;
718		} else {
719			ena_tx_ctx->meta_valid = 0;
720		}
721		return;
722	}
723
724	/* Determine where frame payload starts. */
725	eh = mtod(mbuf, struct ether_vlan_header *);
726	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
727		etype = ntohs(eh->evl_proto);
728		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
729	} else {
730		etype = ntohs(eh->evl_encap_proto);
731		ehdrlen = ETHER_HDR_LEN;
732	}
733
734	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
735	ip = (struct ip *)(mtodo(mbuf_next, offset));
736	iphlen = ip->ip_hl << 2;
737
738	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
739	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
740
741	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
742		ena_tx_ctx->l3_csum_enable = 1;
743	}
744	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
745		ena_tx_ctx->tso_enable = 1;
746		ena_meta->l4_hdr_len = (th->th_off);
747	}
748
749	switch (etype) {
750	case ETHERTYPE_IP:
751		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
752		if ((ip->ip_off & htons(IP_DF)) != 0)
753			ena_tx_ctx->df = 1;
754		break;
755	case ETHERTYPE_IPV6:
756		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
757
758	default:
759		break;
760	}
761
762	if (ip->ip_p == IPPROTO_TCP) {
763		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
764		if ((mbuf->m_pkthdr.csum_flags &
765		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
766			ena_tx_ctx->l4_csum_enable = 1;
767		else
768			ena_tx_ctx->l4_csum_enable = 0;
769	} else if (ip->ip_p == IPPROTO_UDP) {
770		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
771		if ((mbuf->m_pkthdr.csum_flags &
772		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
773			ena_tx_ctx->l4_csum_enable = 1;
774		else
775			ena_tx_ctx->l4_csum_enable = 0;
776	} else {
777		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
778		ena_tx_ctx->l4_csum_enable = 0;
779	}
780
781	ena_meta->mss = mss;
782	ena_meta->l3_hdr_len = iphlen;
783	ena_meta->l3_hdr_offset = ehdrlen;
784	ena_tx_ctx->meta_valid = 1;
785}
786
787static int
788ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
789{
790	struct ena_adapter *adapter;
791	struct mbuf *collapsed_mbuf;
792	int num_frags;
793
794	adapter = tx_ring->adapter;
795	num_frags = ena_mbuf_count(*mbuf);
796
797	/* One segment must be reserved for configuration descriptor. */
798	if (num_frags < adapter->max_tx_sgl_size)
799		return (0);
800	counter_u64_add(tx_ring->tx_stats.collapse, 1);
801
802	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
803	    adapter->max_tx_sgl_size - 1);
804	if (unlikely(collapsed_mbuf == NULL)) {
805		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
806		return (ENOMEM);
807	}
808
809	/* If mbuf was collapsed succesfully, original mbuf is released. */
810	*mbuf = collapsed_mbuf;
811
812	return (0);
813}
814
815static int
816ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
817    struct mbuf *mbuf, void **push_hdr, u16 *header_len)
818{
819	struct ena_adapter *adapter = tx_ring->adapter;
820	struct ena_com_buf *ena_buf;
821	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
822	size_t iseg = 0;
823	uint32_t mbuf_head_len;
824	uint16_t offset;
825	int rc, nsegs;
826
827	mbuf_head_len = mbuf->m_len;
828	tx_info->mbuf = mbuf;
829	ena_buf = tx_info->bufs;
830
831	/*
832	 * For easier maintaining of the DMA map, map the whole mbuf even if
833	 * the LLQ is used. The descriptors will be filled using the segments.
834	 */
835	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf,
836	    segs, &nsegs, BUS_DMA_NOWAIT);
837	if (unlikely((rc != 0) || (nsegs == 0))) {
838		ena_trace(NULL, ENA_WARNING,
839		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
840		goto dma_error;
841	}
842
843	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
844		/*
845		 * When the device is LLQ mode, the driver will copy
846		 * the header into the device memory space.
847		 * the ena_com layer assumes the header is in a linear
848		 * memory space.
849		 * This assumption might be wrong since part of the header
850		 * can be in the fragmented buffers.
851		 * First check if header fits in the mbuf. If not, copy it to
852		 * separate buffer that will be holding linearized data.
853		 */
854		*header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size);
855
856		/* If header is in linear space, just point into mbuf's data. */
857		if (likely(*header_len <= mbuf_head_len)) {
858			*push_hdr = mbuf->m_data;
859		/*
860		 * Otherwise, copy whole portion of header from multiple mbufs
861		 * to intermediate buffer.
862		 */
863		} else {
864			m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf);
865			*push_hdr = tx_ring->push_buf_intermediate_buf;
866
867			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
868		}
869
870		ena_trace(NULL, ENA_DBG | ENA_TXPTH,
871		    "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
872		    mbuf, *push_hdr, *header_len);
873
874		/* If packet is fitted in LLQ header, no need for DMA segments. */
875		if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
876			return (0);
877		} else {
878			offset = tx_ring->tx_max_header_size;
879			/*
880			 * As Header part is mapped to LLQ header, we can skip it and just
881			 * map the residuum of the mbuf to DMA Segments.
882			 */
883			while (offset > 0) {
884				if (offset >= segs[iseg].ds_len) {
885					offset -= segs[iseg].ds_len;
886				} else {
887					ena_buf->paddr = segs[iseg].ds_addr + offset;
888					ena_buf->len = segs[iseg].ds_len - offset;
889					ena_buf++;
890					tx_info->num_of_bufs++;
891					offset = 0;
892				}
893				iseg++;
894			}
895		}
896	} else {
897		*push_hdr = NULL;
898		/*
899		* header_len is just a hint for the device. Because FreeBSD is not
900		* giving us information about packet header length and it is not
901		* guaranteed that all packet headers will be in the 1st mbuf, setting
902		* header_len to 0 is making the device ignore this value and resolve
903		* header on it's own.
904		*/
905		*header_len = 0;
906	}
907
908	/* Map rest of the mbuf */
909	while (iseg < nsegs) {
910		ena_buf->paddr = segs[iseg].ds_addr;
911		ena_buf->len = segs[iseg].ds_len;
912		ena_buf++;
913		iseg++;
914		tx_info->num_of_bufs++;
915	}
916
917	return (0);
918
919dma_error:
920	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
921	tx_info->mbuf = NULL;
922	return (rc);
923}
924
925static int
926ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
927{
928	struct ena_adapter *adapter;
929	struct ena_tx_buffer *tx_info;
930	struct ena_com_tx_ctx ena_tx_ctx;
931	struct ena_com_dev *ena_dev;
932	struct ena_com_io_sq* io_sq;
933	void *push_hdr;
934	uint16_t next_to_use;
935	uint16_t req_id;
936	uint16_t ena_qid;
937	uint16_t header_len;
938	int rc;
939	int nb_hw_desc;
940
941	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
942	adapter = tx_ring->que->adapter;
943	ena_dev = adapter->ena_dev;
944	io_sq = &ena_dev->io_sq_queues[ena_qid];
945
946	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
947	if (unlikely(rc != 0)) {
948		ena_trace(NULL, ENA_WARNING,
949		    "Failed to collapse mbuf! err: %d\n", rc);
950		return (rc);
951	}
952
953	ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
954
955	next_to_use = tx_ring->next_to_use;
956	req_id = tx_ring->free_tx_ids[next_to_use];
957	tx_info = &tx_ring->tx_buffer_info[req_id];
958	tx_info->num_of_bufs = 0;
959
960	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
961	if (unlikely(rc != 0)) {
962		ena_trace(NULL, ENA_WARNING, "Failed to map TX mbuf\n");
963		return (rc);
964	}
965	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
966	ena_tx_ctx.ena_bufs = tx_info->bufs;
967	ena_tx_ctx.push_header = push_hdr;
968	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
969	ena_tx_ctx.req_id = req_id;
970	ena_tx_ctx.header_len = header_len;
971
972	/* Set flags and meta data */
973	ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
974
975	if (tx_ring->acum_pkts == DB_THRESHOLD ||
976	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
977		ena_trace(NULL, ENA_DBG | ENA_TXPTH,
978		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
979		    tx_ring->que->id);
980		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
981		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
982		tx_ring->acum_pkts = 0;
983	}
984
985	/* Prepare the packet's descriptors and send them to device */
986	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
987	if (unlikely(rc != 0)) {
988		if (likely(rc == ENA_COM_NO_MEM)) {
989			ena_trace(NULL, ENA_DBG | ENA_TXPTH,
990			    "tx ring[%d] if out of space\n", tx_ring->que->id);
991		} else {
992			device_printf(adapter->pdev,
993			    "failed to prepare tx bufs\n");
994		}
995		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
996		goto dma_error;
997	}
998
999	counter_enter();
1000	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1001	counter_u64_add_protected(tx_ring->tx_stats.bytes,
1002	    (*mbuf)->m_pkthdr.len);
1003
1004	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1005	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1006	    (*mbuf)->m_pkthdr.len);
1007	counter_exit();
1008
1009	tx_info->tx_descs = nb_hw_desc;
1010	getbinuptime(&tx_info->timestamp);
1011	tx_info->print_once = true;
1012
1013	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1014	    tx_ring->ring_size);
1015
1016	/* stop the queue when no more space available, the packet can have up
1017	 * to sgl_size + 2. one for the meta descriptor and one for header
1018	 * (if the header is larger than tx_max_header_size).
1019	 */
1020	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1021	    adapter->max_tx_sgl_size + 2))) {
1022		ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
1023		    tx_ring->que->id);
1024
1025		tx_ring->running = false;
1026		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1027
1028		/* There is a rare condition where this function decides to
1029		 * stop the queue but meanwhile tx_cleanup() updates
1030		 * next_to_completion and terminates.
1031		 * The queue will remain stopped forever.
1032		 * To solve this issue this function performs mb(), checks
1033		 * the wakeup condition and wakes up the queue if needed.
1034		 */
1035		mb();
1036
1037		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1038		    ENA_TX_RESUME_THRESH)) {
1039			tx_ring->running = true;
1040			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1041		}
1042	}
1043
1044	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1045	    BUS_DMASYNC_PREWRITE);
1046
1047	return (0);
1048
1049dma_error:
1050	tx_info->mbuf = NULL;
1051	bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1052
1053	return (rc);
1054}
1055
1056static void
1057ena_start_xmit(struct ena_ring *tx_ring)
1058{
1059	struct mbuf *mbuf;
1060	struct ena_adapter *adapter = tx_ring->adapter;
1061	struct ena_com_io_sq* io_sq;
1062	int ena_qid;
1063	int ret = 0;
1064
1065	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1066		return;
1067
1068	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1069		return;
1070
1071	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1072	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1073
1074	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1075		ena_trace(NULL, ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
1076		    " header csum flags %#jx\n",
1077		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1078
1079		if (unlikely(!tx_ring->running)) {
1080			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1081			break;
1082		}
1083
1084		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1085			if (ret == ENA_COM_NO_MEM) {
1086				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1087			} else if (ret == ENA_COM_NO_SPACE) {
1088				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1089			} else {
1090				m_freem(mbuf);
1091				drbr_advance(adapter->ifp, tx_ring->br);
1092			}
1093
1094			break;
1095		}
1096
1097		drbr_advance(adapter->ifp, tx_ring->br);
1098
1099		if (unlikely((if_getdrvflags(adapter->ifp) &
1100		    IFF_DRV_RUNNING) == 0))
1101			return;
1102
1103		tx_ring->acum_pkts++;
1104
1105		BPF_MTAP(adapter->ifp, mbuf);
1106	}
1107
1108	if (likely(tx_ring->acum_pkts != 0)) {
1109		/* Trigger the dma engine */
1110		ena_com_write_sq_doorbell(io_sq);
1111		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1112		tx_ring->acum_pkts = 0;
1113	}
1114
1115	if (unlikely(!tx_ring->running))
1116		taskqueue_enqueue(tx_ring->que->cleanup_tq,
1117		    &tx_ring->que->cleanup_task);
1118}
1119