mlx5_en_tx.c revision 331590
1/*-
2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c 331590 2018-03-26 21:04:47Z hselasky $
26 */
27
28#include "en.h"
29#include <machine/atomic.h>
30
31static inline bool
32mlx5e_do_send_cqe(struct mlx5e_sq *sq)
33{
34	sq->cev_counter++;
35	/* interleave the CQEs */
36	if (sq->cev_counter >= sq->cev_factor) {
37		sq->cev_counter = 0;
38		return (1);
39	}
40	return (0);
41}
42
43void
44mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
45{
46	u16 pi = sq->pc & sq->wq.sz_m1;
47	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
48
49	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
50
51	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
52	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
53	if (mlx5e_do_send_cqe(sq))
54		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
55	else
56		wqe->ctrl.fm_ce_se = 0;
57
58	/* Copy data for doorbell */
59	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
60
61	sq->mbuf[pi].mbuf = NULL;
62	sq->mbuf[pi].num_bytes = 0;
63	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
64	sq->pc += sq->mbuf[pi].num_wqebbs;
65}
66
67#if (__FreeBSD_version >= 1100000)
68static uint32_t mlx5e_hash_value;
69
70static void
71mlx5e_hash_init(void *arg)
72{
73	mlx5e_hash_value = m_ether_tcpip_hash_init();
74}
75
76/* Make kernel call mlx5e_hash_init after the random stack finished initializing */
77SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
78#endif
79
80static struct mlx5e_sq *
81mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
82{
83	struct mlx5e_priv *priv = ifp->if_softc;
84	struct mlx5e_channel * volatile *ppch;
85	struct mlx5e_channel *pch;
86	u32 ch;
87	u32 tc;
88
89	ppch = priv->channel;
90
91	/* check if channels are successfully opened */
92	if (unlikely(ppch == NULL))
93		return (NULL);
94
95	/* obtain VLAN information if present */
96	if (mb->m_flags & M_VLANTAG) {
97		tc = (mb->m_pkthdr.ether_vtag >> 13);
98		if (tc >= priv->num_tc)
99			tc = priv->default_vlan_prio;
100	} else {
101		tc = priv->default_vlan_prio;
102	}
103
104	ch = priv->params.num_channels;
105
106	/* check if flowid is set */
107	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
108#ifdef RSS
109		u32 temp;
110
111		if (rss_hash2bucket(mb->m_pkthdr.flowid,
112		    M_HASHTYPE_GET(mb), &temp) == 0)
113			ch = temp % ch;
114		else
115#endif
116			ch = (mb->m_pkthdr.flowid % 128) % ch;
117	} else {
118#if (__FreeBSD_version >= 1100000)
119		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
120		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
121#else
122		/*
123		 * m_ether_tcpip_hash not present in stable, so just
124		 * throw unhashed mbufs on queue 0
125		 */
126		ch = 0;
127#endif
128	}
129
130	/* check if channel is allocated and not stopped */
131	pch = ppch[ch];
132	if (likely(pch != NULL && pch->sq[tc].stopped == 0))
133		return (&pch->sq[tc]);
134	return (NULL);
135}
136
137static inline u16
138mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb)
139{
140	return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len));
141}
142
143static int
144mlx5e_get_header_size(struct mbuf *mb)
145{
146	struct ether_vlan_header *eh;
147	struct tcphdr *th;
148	struct ip *ip;
149	int ip_hlen, tcp_hlen;
150	struct ip6_hdr *ip6;
151	uint16_t eth_type;
152	int eth_hdr_len;
153
154	eh = mtod(mb, struct ether_vlan_header *);
155	if (mb->m_len < ETHER_HDR_LEN)
156		return (0);
157	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
158		eth_type = ntohs(eh->evl_proto);
159		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
160	} else {
161		eth_type = ntohs(eh->evl_encap_proto);
162		eth_hdr_len = ETHER_HDR_LEN;
163	}
164	if (mb->m_len < eth_hdr_len)
165		return (0);
166	switch (eth_type) {
167	case ETHERTYPE_IP:
168		ip = (struct ip *)(mb->m_data + eth_hdr_len);
169		if (mb->m_len < eth_hdr_len + sizeof(*ip))
170			return (0);
171		if (ip->ip_p != IPPROTO_TCP)
172			return (0);
173		ip_hlen = ip->ip_hl << 2;
174		eth_hdr_len += ip_hlen;
175		break;
176	case ETHERTYPE_IPV6:
177		ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
178		if (mb->m_len < eth_hdr_len + sizeof(*ip6))
179			return (0);
180		if (ip6->ip6_nxt != IPPROTO_TCP)
181			return (0);
182		eth_hdr_len += sizeof(*ip6);
183		break;
184	default:
185		return (0);
186	}
187	if (mb->m_len < eth_hdr_len + sizeof(*th))
188		return (0);
189	th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
190	tcp_hlen = th->th_off << 2;
191	eth_hdr_len += tcp_hlen;
192	if (mb->m_len < eth_hdr_len)
193		return (0);
194	return (eth_hdr_len);
195}
196
197/*
198 * The return value is not going back to the stack because of
199 * the drbr
200 */
201static int
202mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
203{
204	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
205	struct mlx5_wqe_data_seg *dseg;
206	struct mlx5e_tx_wqe *wqe;
207	struct ifnet *ifp;
208	int nsegs;
209	int err;
210	int x;
211	struct mbuf *mb = *mbp;
212	u16 ds_cnt;
213	u16 ihs;
214	u16 pi;
215	u8 opcode;
216
217	/*
218	 * Return ENOBUFS if the queue is full, this may trigger reinsertion
219	 * of the mbuf into the drbr (see mlx5e_xmit_locked)
220	 */
221	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
222		return (ENOBUFS);
223	}
224
225	/* Align SQ edge with NOPs to avoid WQE wrap around */
226	pi = ((~sq->pc) & sq->wq.sz_m1);
227	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
228		/* Send one multi NOP message instead of many */
229		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
230		pi = ((~sq->pc) & sq->wq.sz_m1);
231		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))
232			return (ENOMEM);
233	}
234
235	/* Setup local variables */
236	pi = sq->pc & sq->wq.sz_m1;
237	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
238	ifp = sq->ifp;
239
240	memset(wqe, 0, sizeof(*wqe));
241
242	/* Send a copy of the frame to the BPF listener, if any */
243	if (ifp != NULL && ifp->if_bpf != NULL)
244		ETHER_BPF_MTAP(ifp, mb);
245
246	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
247		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
248	}
249	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
250		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
251	}
252	if (wqe->eth.cs_flags == 0) {
253		sq->stats.csum_offload_none++;
254	}
255	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
256		u32 payload_len;
257		u32 mss = mb->m_pkthdr.tso_segsz;
258		u32 num_pkts;
259
260		wqe->eth.mss = cpu_to_be16(mss);
261		opcode = MLX5_OPCODE_LSO;
262		ihs = mlx5e_get_header_size(mb);
263		payload_len = mb->m_pkthdr.len - ihs;
264		if (payload_len == 0)
265			num_pkts = 1;
266		else
267			num_pkts = DIV_ROUND_UP(payload_len, mss);
268		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs);
269
270		sq->stats.tso_packets++;
271		sq->stats.tso_bytes += payload_len;
272	} else {
273		opcode = MLX5_OPCODE_SEND;
274		ihs = mlx5e_get_inline_hdr_size(sq, mb);
275		sq->mbuf[pi].num_bytes = max_t (unsigned int,
276		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
277	}
278	if (mb->m_flags & M_VLANTAG) {
279		struct ether_vlan_header *eh =
280		    (struct ether_vlan_header *)wqe->eth.inline_hdr_start;
281
282		/* Range checks */
283		if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))
284			ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN);
285		else if (ihs < ETHER_HDR_LEN) {
286			err = EINVAL;
287			goto tx_drop;
288		}
289		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
290		m_adj(mb, ETHER_HDR_LEN);
291		/* Insert 4 bytes VLAN tag into data stream */
292		eh->evl_proto = eh->evl_encap_proto;
293		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
294		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
295		/* Copy rest of header data, if any */
296		m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
297		m_adj(mb, ihs - ETHER_HDR_LEN);
298		/* Extend header by 4 bytes */
299		ihs += ETHER_VLAN_ENCAP_LEN;
300	} else {
301		m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
302		m_adj(mb, ihs);
303	}
304
305	wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
306
307	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
308	if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) {
309		ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start),
310		    MLX5_SEND_WQE_DS);
311	}
312	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
313
314	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
315	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
316	if (err == EFBIG) {
317		/* Update statistics */
318		sq->stats.defragged++;
319		/* Too many mbuf fragments */
320		mb = m_defrag(*mbp, M_NOWAIT);
321		if (mb == NULL) {
322			mb = *mbp;
323			goto tx_drop;
324		}
325		/* Try again */
326		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
327		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
328	}
329	/* Catch errors */
330	if (err != 0)
331		goto tx_drop;
332
333	/* Make sure all mbuf data, if any, is written to RAM */
334	if (nsegs != 0) {
335		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
336		    BUS_DMASYNC_PREWRITE);
337	} else {
338		/* All data was inlined, free the mbuf. */
339		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
340		m_freem(mb);
341		mb = NULL;
342	}
343
344	for (x = 0; x != nsegs; x++) {
345		if (segs[x].ds_len == 0)
346			continue;
347		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
348		dseg->lkey = sq->mkey_be;
349		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
350		dseg++;
351	}
352
353	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
354
355	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
356	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
357	if (mlx5e_do_send_cqe(sq))
358		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
359	else
360		wqe->ctrl.fm_ce_se = 0;
361
362	/* Copy data for doorbell */
363	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
364
365	/* Store pointer to mbuf */
366	sq->mbuf[pi].mbuf = mb;
367	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
368	sq->pc += sq->mbuf[pi].num_wqebbs;
369
370	sq->stats.packets++;
371	*mbp = NULL;	/* safety clear */
372	return (0);
373
374tx_drop:
375	sq->stats.dropped++;
376	*mbp = NULL;
377	m_freem(mb);
378	return err;
379}
380
381static void
382mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
383{
384	u16 sqcc;
385
386	/*
387	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
388	 * otherwise a cq overrun may occur
389	 */
390	sqcc = sq->cc;
391
392	while (budget > 0) {
393		struct mlx5_cqe64 *cqe;
394		struct mbuf *mb;
395		u16 x;
396		u16 ci;
397
398		cqe = mlx5e_get_cqe(&sq->cq);
399		if (!cqe)
400			break;
401
402		mlx5_cqwq_pop(&sq->cq.wq);
403
404		/* update budget according to the event factor */
405		budget -= sq->cev_factor;
406
407		for (x = 0; x != sq->cev_factor; x++) {
408			ci = sqcc & sq->wq.sz_m1;
409			mb = sq->mbuf[ci].mbuf;
410			sq->mbuf[ci].mbuf = NULL;	/* Safety clear */
411
412			if (mb == NULL) {
413				if (sq->mbuf[ci].num_bytes == 0) {
414					/* NOP */
415					sq->stats.nop++;
416				}
417			} else {
418				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
419				    BUS_DMASYNC_POSTWRITE);
420				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
421
422				/* Free transmitted mbuf */
423				m_freem(mb);
424			}
425			sqcc += sq->mbuf[ci].num_wqebbs;
426		}
427	}
428
429	mlx5_cqwq_update_db_record(&sq->cq.wq);
430
431	/* Ensure cq space is freed before enabling more cqes */
432	atomic_thread_fence_rel();
433
434	sq->cc = sqcc;
435
436	if (sq->sq_tq != NULL &&
437	    atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY))
438		taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
439}
440
441static int
442mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
443{
444	struct mbuf *next;
445	int err = 0;
446
447	if (likely(mb != NULL)) {
448		/*
449		 * If we can't insert mbuf into drbr, try to xmit anyway.
450		 * We keep the error we got so we could return that after xmit.
451		 */
452		err = drbr_enqueue(ifp, sq->br, mb);
453	}
454
455	/*
456	 * Check if the network interface is closed or if the SQ is
457	 * being stopped:
458	 */
459	if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
460	    sq->stopped != 0))
461		return (err);
462
463	/* Process the queue */
464	while ((next = drbr_peek(ifp, sq->br)) != NULL) {
465		if (mlx5e_sq_xmit(sq, &next) != 0) {
466			if (next == NULL) {
467				drbr_advance(ifp, sq->br);
468			} else {
469				drbr_putback(ifp, sq->br, next);
470				atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL);
471			}
472			break;
473		}
474		drbr_advance(ifp, sq->br);
475	}
476	/* Check if we need to write the doorbell */
477	if (likely(sq->doorbell.d64 != 0)) {
478		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
479		sq->doorbell.d64 = 0;
480	}
481	/*
482	 * Check if we need to start the event timer which flushes the
483	 * transmit ring on timeout:
484	 */
485	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
486	    sq->cev_factor != 1)) {
487		/* start the timer */
488		mlx5e_sq_cev_timeout(sq);
489	} else {
490		/* don't send NOPs yet */
491		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
492	}
493	return (err);
494}
495
496static int
497mlx5e_xmit_locked_no_br(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
498{
499	int err = 0;
500
501	if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
502	    sq->stopped != 0)) {
503		m_freem(mb);
504		return (ENETDOWN);
505	}
506
507	/* Do transmit */
508	if (mlx5e_sq_xmit(sq, &mb) != 0) {
509		/* NOTE: m_freem() is NULL safe */
510		m_freem(mb);
511		err = ENOBUFS;
512	}
513
514	/* Check if we need to write the doorbell */
515	if (likely(sq->doorbell.d64 != 0)) {
516		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
517		sq->doorbell.d64 = 0;
518	}
519
520	/*
521	 * Check if we need to start the event timer which flushes the
522	 * transmit ring on timeout:
523	 */
524	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
525	    sq->cev_factor != 1)) {
526		/* start the timer */
527		mlx5e_sq_cev_timeout(sq);
528	} else {
529		/* don't send NOPs yet */
530		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
531	}
532	return (err);
533}
534
535int
536mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
537{
538	struct mlx5e_sq *sq;
539	int ret;
540
541	sq = mlx5e_select_queue(ifp, mb);
542	if (unlikely(sq == NULL)) {
543		/* Invalid send queue */
544		m_freem(mb);
545		return (ENXIO);
546	}
547
548	if (unlikely(sq->br == NULL)) {
549		/* rate limited traffic */
550		mtx_lock(&sq->lock);
551		ret = mlx5e_xmit_locked_no_br(ifp, sq, mb);
552		mtx_unlock(&sq->lock);
553	} else if (mtx_trylock(&sq->lock)) {
554		ret = mlx5e_xmit_locked(ifp, sq, mb);
555		mtx_unlock(&sq->lock);
556	} else {
557		ret = drbr_enqueue(ifp, sq->br, mb);
558		taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
559	}
560
561	return (ret);
562}
563
564void
565mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq)
566{
567	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
568
569	mtx_lock(&sq->comp_lock);
570	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
571	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
572	mtx_unlock(&sq->comp_lock);
573}
574
575void
576mlx5e_tx_que(void *context, int pending)
577{
578	struct mlx5e_sq *sq = context;
579	struct ifnet *ifp = sq->ifp;
580
581	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
582		mtx_lock(&sq->lock);
583		if (!drbr_empty(ifp, sq->br))
584			mlx5e_xmit_locked(ifp, sq, NULL);
585		mtx_unlock(&sq->lock);
586	}
587}
588