1/* SPDX-License-Identifier: BSD-3-Clause */
2/*  Copyright (c) 2024, Intel Corporation
3 *  All rights reserved.
4 *
5 *  Redistribution and use in source and binary forms, with or without
6 *  modification, are permitted provided that the following conditions are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright notice,
9 *      this list of conditions and the following disclaimer.
10 *
11 *   2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *
15 *   3. Neither the name of the Intel Corporation nor the names of its
16 *      contributors may be used to endorse or promote products derived from
17 *      this software without specific prior written permission.
18 *
19 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 *  POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/**
33 * @file ice_common_txrx.h
34 * @brief common Tx/Rx utility functions
35 *
36 * Contains common utility functions for the Tx/Rx hot path.
37 *
38 * The functions do depend on the if_pkt_info_t structure. A suitable
39 * implementation of this structure must be provided if these functions are to
40 * be used without the iflib networking stack.
41 */
42
43#ifndef _ICE_COMMON_TXRX_H_
44#define _ICE_COMMON_TXRX_H_
45
46#include <netinet/udp.h>
47#include <netinet/sctp.h>
48
49/**
50 * ice_tso_detect_sparse - detect TSO packets with too many segments
51 * @pi: packet information
52 *
53 * Hardware only transmits packets with a maximum of 8 descriptors. For TSO
54 * packets, hardware needs to be able to build the split packets using 8 or
55 * fewer descriptors. Additionally, the header must be contained within at
56 * most 3 descriptors.
57 *
58 * To verify this, we walk the headers to find out how many descriptors the
59 * headers require (usually 1). Then we ensure that, for each TSO segment, its
60 * data plus the headers are contained within 8 or fewer descriptors.
61 */
62static inline int
63ice_tso_detect_sparse(if_pkt_info_t pi)
64{
65	int count, curseg, i, hlen, segsz, seglen, tsolen, hdrs, maxsegs;
66	bus_dma_segment_t *segs = pi->ipi_segs;
67	int nsegs = pi->ipi_nsegs;
68
69	curseg = hdrs = 0;
70
71	hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
72	tsolen = pi->ipi_len - hlen;
73
74	/* First, count the number of descriptors for the header.
75	 * Additionally, make sure it does not span more than 3 segments.
76	 */
77	i = 0;
78	curseg = segs[0].ds_len;
79	while (hlen > 0) {
80		hdrs++;
81		if (hdrs > ICE_MAX_TSO_HDR_SEGS)
82			return (1);
83		if (curseg == 0) {
84			i++;
85			if (__predict_false(i == nsegs))
86				return (1);
87
88			curseg = segs[i].ds_len;
89		}
90		seglen = min(curseg, hlen);
91		curseg -= seglen;
92		hlen -= seglen;
93	}
94
95	maxsegs = ICE_MAX_TX_SEGS - hdrs;
96
97	/* We must count the headers, in order to verify that they take up
98	 * 3 or fewer descriptors. However, we don't need to check the data
99	 * if the total segments is small.
100	 */
101	if (nsegs <= maxsegs)
102		return (0);
103
104	count = 0;
105
106	/* Now check the data to make sure that each TSO segment is made up of
107	 * no more than maxsegs descriptors. This ensures that hardware will
108	 * be capable of performing TSO offload.
109	 */
110	while (tsolen > 0) {
111		segsz = pi->ipi_tso_segsz;
112		while (segsz > 0 && tsolen != 0) {
113			count++;
114			if (count > maxsegs) {
115				return (1);
116			}
117			if (curseg == 0) {
118				i++;
119				if (__predict_false(i == nsegs)) {
120					return (1);
121				}
122				curseg = segs[i].ds_len;
123			}
124			seglen = min(curseg, segsz);
125			segsz -= seglen;
126			curseg -= seglen;
127			tsolen -= seglen;
128		}
129		count = 0;
130	}
131
132	return (0);
133}
134
135/**
136 * ice_tso_setup - Setup a context descriptor to prepare for a TSO packet
137 * @txq: the Tx queue to use
138 * @pi: the packet info to prepare for
139 *
140 * Setup a context descriptor in preparation for sending a Tx packet that
141 * requires the TSO offload. Returns the index of the descriptor to use when
142 * encapsulating the Tx packet data into descriptors.
143 */
144static inline int
145ice_tso_setup(struct ice_tx_queue *txq, if_pkt_info_t pi)
146{
147	struct ice_tx_ctx_desc		*txd;
148	u32				cmd, mss, type, tsolen;
149	int				idx;
150	u64				type_cmd_tso_mss;
151
152	idx = pi->ipi_pidx;
153	txd = (struct ice_tx_ctx_desc *)&txq->tx_base[idx];
154	tsolen = pi->ipi_len - (pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen);
155
156	type = ICE_TX_DESC_DTYPE_CTX;
157	cmd = ICE_TX_CTX_DESC_TSO;
158	/* TSO MSS must not be less than 64 */
159	if (pi->ipi_tso_segsz < ICE_MIN_TSO_MSS) {
160		txq->stats.mss_too_small++;
161		pi->ipi_tso_segsz = ICE_MIN_TSO_MSS;
162	}
163	mss = pi->ipi_tso_segsz;
164
165	type_cmd_tso_mss = ((u64)type << ICE_TXD_CTX_QW1_DTYPE_S) |
166	    ((u64)cmd << ICE_TXD_CTX_QW1_CMD_S) |
167	    ((u64)tsolen << ICE_TXD_CTX_QW1_TSO_LEN_S) |
168	    ((u64)mss << ICE_TXD_CTX_QW1_MSS_S);
169	txd->qw1 = htole64(type_cmd_tso_mss);
170
171	txd->tunneling_params = htole32(0);
172	txq->tso++;
173
174	return ((idx + 1) & (txq->desc_count-1));
175}
176
177/**
178 * ice_tx_setup_offload - Setup register values for performing a Tx offload
179 * @txq: The Tx queue, used to track checksum offload stats
180 * @pi: the packet info to program for
181 * @cmd: the cmd register value to update
182 * @off: the off register value to update
183 *
184 * Based on the packet info provided, update the cmd and off values for
185 * enabling Tx offloads. This depends on the packet type and which offloads
186 * have been requested.
187 *
188 * We also track the total number of times that we've requested hardware
189 * offload a particular type of checksum for debugging purposes.
190 */
191static inline void
192ice_tx_setup_offload(struct ice_tx_queue *txq, if_pkt_info_t pi, u32 *cmd, u32 *off)
193{
194	u32 remaining_csum_flags = pi->ipi_csum_flags;
195
196	switch (pi->ipi_etype) {
197#ifdef INET
198		case ETHERTYPE_IP:
199			if (pi->ipi_csum_flags & ICE_CSUM_IP) {
200				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
201				txq->stats.cso[ICE_CSO_STAT_TX_IP4]++;
202				remaining_csum_flags &= ~CSUM_IP;
203			} else
204				*cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
205			break;
206#endif
207#ifdef INET6
208		case ETHERTYPE_IPV6:
209			*cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
210			/*
211			 * This indicates that the IIPT flag was set to the IPV6 value;
212			 * there's no checksum for IPv6 packets.
213			 */
214			txq->stats.cso[ICE_CSO_STAT_TX_IP6]++;
215			break;
216#endif
217		default:
218			txq->stats.cso[ICE_CSO_STAT_TX_L3_ERR]++;
219			break;
220	}
221
222	*off |= (pi->ipi_ehdrlen >> 1) << ICE_TX_DESC_LEN_MACLEN_S;
223	*off |= (pi->ipi_ip_hlen >> 2) << ICE_TX_DESC_LEN_IPLEN_S;
224
225	if (!(remaining_csum_flags & ~ICE_RX_CSUM_FLAGS))
226		return;
227
228	switch (pi->ipi_ipproto) {
229		case IPPROTO_TCP:
230			if (pi->ipi_csum_flags & ICE_CSUM_TCP) {
231				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
232				*off |= (pi->ipi_tcp_hlen >> 2) <<
233				    ICE_TX_DESC_LEN_L4_LEN_S;
234				txq->stats.cso[ICE_CSO_STAT_TX_TCP]++;
235			}
236			break;
237		case IPPROTO_UDP:
238			if (pi->ipi_csum_flags & ICE_CSUM_UDP) {
239				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
240				*off |= (sizeof(struct udphdr) >> 2) <<
241				    ICE_TX_DESC_LEN_L4_LEN_S;
242				txq->stats.cso[ICE_CSO_STAT_TX_UDP]++;
243			}
244			break;
245		case IPPROTO_SCTP:
246			if (pi->ipi_csum_flags & ICE_CSUM_SCTP) {
247				*cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
248				*off |= (sizeof(struct sctphdr) >> 2) <<
249				    ICE_TX_DESC_LEN_L4_LEN_S;
250				txq->stats.cso[ICE_CSO_STAT_TX_SCTP]++;
251			}
252			break;
253		default:
254			txq->stats.cso[ICE_CSO_STAT_TX_L4_ERR]++;
255			break;
256	}
257}
258
259/**
260 * ice_rx_checksum - verify hardware checksum is valid or not
261 * @rxq: the Rx queue structure
262 * @flags: checksum flags to update
263 * @data: checksum data to update
264 * @status0: descriptor status data
265 * @ptype: packet type
266 *
267 * Determine whether the hardware indicated that the Rx checksum is valid. If
268 * so, update the checksum flags and data, informing the stack of the status
269 * of the checksum so that it does not spend time verifying it manually.
270 */
271static void
272ice_rx_checksum(struct ice_rx_queue *rxq, uint32_t *flags, uint32_t *data,
273		u16 status0, u16 ptype)
274{
275	const u16 l3_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
276			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S));
277	const u16 l4_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |
278			      BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S));
279	const u16 xsum_errors = (l3_error | l4_error |
280				 BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S));
281	struct ice_rx_ptype_decoded decoded;
282	bool is_ipv4, is_ipv6;
283
284	/* No L3 or L4 checksum was calculated */
285	if (!(status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) {
286		return;
287	}
288
289	decoded = ice_decode_rx_desc_ptype(ptype);
290	*flags = 0;
291
292	if (!(decoded.known && decoded.outer_ip))
293		return;
294
295	is_ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
296	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
297	is_ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
298	    (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
299
300	/* No checksum errors were reported */
301	if (!(status0 & xsum_errors)) {
302		if (is_ipv4)
303			*flags |= CSUM_L3_CALC | CSUM_L3_VALID;
304
305		switch (decoded.inner_prot) {
306		case ICE_RX_PTYPE_INNER_PROT_TCP:
307		case ICE_RX_PTYPE_INNER_PROT_UDP:
308		case ICE_RX_PTYPE_INNER_PROT_SCTP:
309			*flags |= CSUM_L4_CALC | CSUM_L4_VALID;
310			*data |= htons(0xffff);
311			break;
312		default:
313			break;
314		}
315
316		return;
317	}
318
319	/*
320	 * Certain IPv6 extension headers impact the validity of L4 checksums.
321	 * If one of these headers exist, hardware will set the IPV6EXADD bit
322	 * in the descriptor. If the bit is set then pretend like hardware
323	 * didn't checksum this packet.
324	 */
325	if (is_ipv6 && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))) {
326		rxq->stats.cso[ICE_CSO_STAT_RX_IP6_ERR]++;
327		return;
328	}
329
330	/*
331	 * At this point, status0 must have at least one of the l3_error or
332	 * l4_error bits set.
333	 */
334
335	if (status0 & l3_error) {
336		if (is_ipv4) {
337			rxq->stats.cso[ICE_CSO_STAT_RX_IP4_ERR]++;
338			*flags |= CSUM_L3_CALC;
339		} else {
340			/* Hardware indicated L3 error but this isn't IPv4? */
341			rxq->stats.cso[ICE_CSO_STAT_RX_L3_ERR]++;
342		}
343		/* don't bother reporting L4 errors if we got an L3 error */
344		return;
345	} else if (is_ipv4) {
346		*flags |= CSUM_L3_CALC | CSUM_L3_VALID;
347	}
348
349	if (status0 & l4_error) {
350		switch (decoded.inner_prot) {
351		case ICE_RX_PTYPE_INNER_PROT_TCP:
352			rxq->stats.cso[ICE_CSO_STAT_RX_TCP_ERR]++;
353			*flags |= CSUM_L4_CALC;
354			break;
355		case ICE_RX_PTYPE_INNER_PROT_UDP:
356			rxq->stats.cso[ICE_CSO_STAT_RX_UDP_ERR]++;
357			*flags |= CSUM_L4_CALC;
358			break;
359		case ICE_RX_PTYPE_INNER_PROT_SCTP:
360			rxq->stats.cso[ICE_CSO_STAT_RX_SCTP_ERR]++;
361			*flags |= CSUM_L4_CALC;
362			break;
363		default:
364			/*
365			 * Hardware indicated L4 error, but this isn't one of
366			 * the expected protocols.
367			 */
368			rxq->stats.cso[ICE_CSO_STAT_RX_L4_ERR]++;
369		}
370	}
371}
372
373/**
374 * ice_ptype_to_hash - Convert packet type to a hash value
375 * @ptype: the packet type to convert
376 *
377 * Given the packet type, convert to a suitable hashtype to report to the
378 * upper stack via the iri_rsstype value of the if_rxd_info_t structure.
379 *
380 * If the hash type is unknown we'll report M_HASHTYPE_OPAQUE.
381 */
382static inline int
383ice_ptype_to_hash(u16 ptype)
384{
385	struct ice_rx_ptype_decoded decoded;
386
387	if (ptype >= ARRAY_SIZE(ice_ptype_lkup))
388		return M_HASHTYPE_OPAQUE;
389
390	decoded = ice_decode_rx_desc_ptype(ptype);
391
392	if (!decoded.known)
393		return M_HASHTYPE_OPAQUE;
394
395	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
396		return M_HASHTYPE_OPAQUE;
397
398	/* Note: anything that gets to this point is IP */
399	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6) {
400		switch (decoded.inner_prot) {
401		case ICE_RX_PTYPE_INNER_PROT_TCP:
402			return M_HASHTYPE_RSS_TCP_IPV6;
403		case ICE_RX_PTYPE_INNER_PROT_UDP:
404			return M_HASHTYPE_RSS_UDP_IPV6;
405		default:
406			return M_HASHTYPE_RSS_IPV6;
407		}
408	}
409	if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4) {
410		switch (decoded.inner_prot) {
411		case ICE_RX_PTYPE_INNER_PROT_TCP:
412			return M_HASHTYPE_RSS_TCP_IPV4;
413		case ICE_RX_PTYPE_INNER_PROT_UDP:
414			return M_HASHTYPE_RSS_UDP_IPV4;
415		default:
416			return M_HASHTYPE_RSS_IPV4;
417		}
418	}
419
420	/* We should never get here!! */
421	return M_HASHTYPE_OPAQUE;
422}
423#endif
424