mlx5_en_rx.c revision 292195
1/*-
2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c 292195 2015-12-14 10:18:04Z hselasky $
26 */
27
28#include "en.h"
29#include <machine/in_cksum.h>
30
31static inline int
32mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
33    struct mlx5e_rx_wqe *wqe, u16 ix)
34{
35	bus_dma_segment_t segs[1];
36	struct mbuf *mb;
37	int nsegs;
38	int err;
39
40	if (rq->mbuf[ix].mbuf != NULL)
41		return (0);
42
43	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
44	if (unlikely(!mb))
45		return (-ENOMEM);
46
47	/* set initial mbuf length */
48	mb->m_pkthdr.len = mb->m_len = rq->wqe_sz;
49
50	/* get IP header aligned */
51	m_adj(mb, MLX5E_NET_IP_ALIGN);
52
53	err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map,
54	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
55	if (err != 0)
56		goto err_free_mbuf;
57	if (unlikely(nsegs != 1)) {
58		bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map);
59		err = -ENOMEM;
60		goto err_free_mbuf;
61	}
62	wqe->data.addr = cpu_to_be64(segs[0].ds_addr);
63
64	rq->mbuf[ix].mbuf = mb;
65	rq->mbuf[ix].data = mb->m_data;
66
67	bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map,
68	    BUS_DMASYNC_PREREAD);
69	return (0);
70
71err_free_mbuf:
72	m_freem(mb);
73	return (err);
74}
75
76static void
77mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
78{
79	if (unlikely(rq->enabled == 0))
80		return;
81
82	while (!mlx5_wq_ll_is_full(&rq->wq)) {
83		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head);
84
85		if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head)))
86			break;
87
88		mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index));
89	}
90
91	/* ensure wqes are visible to device before updating doorbell record */
92	wmb();
93
94	mlx5_wq_ll_update_db_record(&rq->wq);
95}
96
97static void
98mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe)
99{
100	/* TODO: consider vlans, ip options, ... */
101	struct ether_header *eh;
102	uint16_t eh_type;
103	struct ip6_hdr *ip6 = NULL;
104	struct ip *ip4 = NULL;
105	struct tcphdr *th;
106	uint32_t *ts_ptr;
107
108	eh = mtod(mb, struct ether_header *);
109	eh_type = ntohs(eh->ether_type);
110
111	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
112	int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
113	    (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
114
115	/* TODO: consider vlan */
116	u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN;
117
118	switch (eh_type) {
119	case ETHERTYPE_IP:
120		ip4 = (struct ip *)(eh + 1);
121		th = (struct tcphdr *)(ip4 + 1);
122		break;
123	case ETHERTYPE_IPV6:
124		ip6 = (struct ip6_hdr *)(eh + 1);
125		th = (struct tcphdr *)(ip6 + 1);
126		break;
127	default:
128		return;
129	}
130
131	ts_ptr = (uint32_t *)(th + 1);
132
133	if (get_cqe_lro_tcppsh(cqe))
134		th->th_flags |= TH_PUSH;
135
136	if (tcp_ack) {
137		th->th_flags |= TH_ACK;
138		th->th_ack = cqe->lro_ack_seq_num;
139		th->th_win = cqe->lro_tcp_win;
140
141		/*
142		 * FreeBSD handles only 32bit aligned timestamp right after
143		 * the TCP hdr
144		 * +--------+--------+--------+--------+
145		 * |   NOP  |  NOP   |  TSopt |   10   |
146		 * +--------+--------+--------+--------+
147		 * |          TSval   timestamp        |
148		 * +--------+--------+--------+--------+
149		 * |          TSecr   timestamp        |
150		 * +--------+--------+--------+--------+
151		 */
152		if (get_cqe_lro_timestamp_valid(cqe) &&
153		    (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 |
154		    TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 |
155		    TCPOLEN_TIMESTAMP))) {
156			/*
157			 * cqe->timestamp is 64bit long.
158			 * [0-31] - timestamp.
159			 * [32-64] - timestamp echo replay.
160			 */
161			ts_ptr[1] = *(uint32_t *)&cqe->timestamp;
162			ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1);
163		}
164	}
165	if (ip4) {
166		ip4->ip_ttl = cqe->lro_min_ttl;
167		ip4->ip_len = cpu_to_be16(tot_len);
168		ip4->ip_sum = 0;
169		ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2);
170	} else {
171		ip6->ip6_hlim = cqe->lro_min_ttl;
172		ip6->ip6_plen = cpu_to_be16(tot_len -
173		    sizeof(struct ip6_hdr));
174	}
175	/* TODO: handle tcp checksum */
176}
177
178static inline void
179mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
180    struct mlx5e_rq *rq, struct mbuf *mb,
181    u32 cqe_bcnt)
182{
183	struct ifnet *ifp = rq->ifp;
184	int lro_num_seg;	/* HW LRO session aggregated packets counter */
185
186	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
187	if (lro_num_seg > 1) {
188		mlx5e_lro_update_hdr(mb, cqe);
189		rq->stats.lro_packets++;
190		rq->stats.lro_bytes += cqe_bcnt;
191	}
192
193	mb->m_pkthdr.len = mb->m_len = cqe_bcnt;
194	/* check if a Toeplitz hash was computed */
195	if (cqe->rss_hash_type != 0) {
196		mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result);
197#ifdef RSS
198		/* decode the RSS hash type */
199		switch (cqe->rss_hash_type &
200		    (CQE_RSS_DST_HTYPE_L4 | CQE_RSS_DST_HTYPE_IP)) {
201		/* IPv4 */
202		case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV4):
203			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV4);
204			break;
205		case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV4):
206			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV4);
207			break;
208		case CQE_RSS_DST_HTYPE_IPV4:
209			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV4);
210			break;
211		/* IPv6 */
212		case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV6):
213			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV6);
214			break;
215		case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV6):
216			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV6);
217			break;
218		case CQE_RSS_DST_HTYPE_IPV6:
219			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6);
220			break;
221		default:	/* Other */
222			M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
223			break;
224		}
225#else
226		M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
227#endif
228	} else {
229		mb->m_pkthdr.flowid = rq->ix;
230		M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
231	}
232	mb->m_pkthdr.rcvif = ifp;
233
234	if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
235	    ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) ==
236	    (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) {
237		mb->m_pkthdr.csum_flags =
238		    CSUM_IP_CHECKED | CSUM_IP_VALID |
239		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
240		mb->m_pkthdr.csum_data = htons(0xffff);
241	} else {
242		rq->stats.csum_none++;
243	}
244
245	if (cqe_has_vlan(cqe)) {
246		mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info);
247		mb->m_flags |= M_VLANTAG;
248	}
249}
250
251static int
252mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
253{
254#ifndef HAVE_TURBO_LRO
255	struct lro_entry *queued;
256#endif
257	int i;
258
259	for (i = 0; i < budget; i++) {
260		struct mlx5e_rx_wqe *wqe;
261		struct mlx5_cqe64 *cqe;
262		struct mbuf *mb;
263		__be16 wqe_counter_be;
264		u16 wqe_counter;
265		u32 byte_cnt;
266
267		cqe = mlx5e_get_cqe(&rq->cq);
268		if (!cqe)
269			break;
270
271		wqe_counter_be = cqe->wqe_counter;
272		wqe_counter = be16_to_cpu(wqe_counter_be);
273		wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
274		byte_cnt = be32_to_cpu(cqe->byte_cnt);
275
276		bus_dmamap_sync(rq->dma_tag,
277		    rq->mbuf[wqe_counter].dma_map,
278		    BUS_DMASYNC_POSTREAD);
279
280		if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
281			rq->stats.wqe_err++;
282			goto wq_ll_pop;
283		}
284
285		if (MHLEN >= byte_cnt &&
286		    (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
287			bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t),
288			    byte_cnt);
289		} else {
290			mb = rq->mbuf[wqe_counter].mbuf;
291			rq->mbuf[wqe_counter].mbuf = NULL;	/* safety clear */
292
293			bus_dmamap_unload(rq->dma_tag,
294			    rq->mbuf[wqe_counter].dma_map);
295		}
296
297		mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
298		rq->stats.packets++;
299#ifdef HAVE_TURBO_LRO
300		if (mb->m_pkthdr.csum_flags == 0 ||
301		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
302		    rq->lro.mbuf == NULL) {
303			/* normal input */
304			rq->ifp->if_input(rq->ifp, mb);
305		} else {
306			tcp_tlro_rx(&rq->lro, mb);
307		}
308#else
309		if (mb->m_pkthdr.csum_flags == 0 ||
310		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
311		    rq->lro.lro_cnt == 0 ||
312		    tcp_lro_rx(&rq->lro, mb, 0) != 0) {
313			rq->ifp->if_input(rq->ifp, mb);
314		}
315#endif
316wq_ll_pop:
317		mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
318		    &wqe->next.next_wqe_index);
319	}
320
321	mlx5_cqwq_update_db_record(&rq->cq.wq);
322
323	/* ensure cq space is freed before enabling more cqes */
324	wmb();
325#ifndef HAVE_TURBO_LRO
326	while ((queued = SLIST_FIRST(&rq->lro.lro_active)) != NULL) {
327		SLIST_REMOVE_HEAD(&rq->lro.lro_active, next);
328		tcp_lro_flush(&rq->lro, queued);
329	}
330#endif
331	return (i);
332}
333
334void
335mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
336{
337	struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq);
338	int i = 0;
339
340#ifdef HAVE_PER_CQ_EVENT_PACKET
341	struct mbuf *mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
342
343	if (mb != NULL) {
344		/* this code is used for debugging purpose only */
345		mb->m_pkthdr.len = mb->m_len = 15;
346		memset(mb->m_data, 255, 14);
347		mb->m_data[14] = rq->ix;
348		mb->m_pkthdr.rcvif = rq->ifp;
349		rq->ifp->if_input(rq->ifp, mb);
350	}
351#endif
352
353	mtx_lock(&rq->mtx);
354
355	/*
356	 * Polling the entire CQ without posting new WQEs results in
357	 * lack of receive WQEs during heavy traffic scenarios.
358	 */
359	while (1) {
360		if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) !=
361		    MLX5E_RX_BUDGET_MAX)
362			break;
363		i += MLX5E_RX_BUDGET_MAX;
364		if (i >= MLX5E_BUDGET_MAX)
365			break;
366		mlx5e_post_rx_wqes(rq);
367	}
368	mlx5e_post_rx_wqes(rq);
369	mlx5e_cq_arm(&rq->cq);
370#ifdef HAVE_TURBO_LRO
371	tcp_tlro_flush(&rq->lro, 1);
372#endif
373	mtx_unlock(&rq->mtx);
374}
375