1/*
2 * Copyright (C) 2014-2015 Vincenzo Maffione
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *   1. Redistributions of source code must retain the above copyright
9 *      notice, this list of conditions and the following disclaimer.
10 *   2. Redistributions in binary form must reproduce the above copyright
11 *      notice, this list of conditions and the following disclaimer in the
12 *      documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/* $FreeBSD: stable/11/sys/dev/netmap/netmap_offloadings.c 344047 2019-02-12 09:26:05Z vmaffione $ */
28
29#if defined(__FreeBSD__)
30#include <sys/cdefs.h> /* prerequisite */
31
32#include <sys/types.h>
33#include <sys/errno.h>
34#include <sys/param.h>	/* defines used in kernel.h */
35#include <sys/kernel.h>	/* types used in module initialization */
36#include <sys/sockio.h>
37#include <sys/malloc.h>
38#include <sys/socketvar.h>	/* struct socket */
39#include <sys/socket.h> /* sockaddrs */
40#include <net/if.h>
41#include <net/if_var.h>
42#include <machine/bus.h>	/* bus_dmamap_* */
43#include <sys/endian.h>
44
45#elif defined(linux)
46
47#include "bsd_glue.h"
48
49#elif defined(__APPLE__)
50
51#warning OSX support is only partial
52#include "osx_glue.h"
53
54#else
55
56#error	Unsupported platform
57
58#endif /* unsupported */
59
60#include <net/netmap.h>
61#include <dev/netmap/netmap_kern.h>
62
63
64
65/* This routine is called by bdg_mismatch_datapath() when it finishes
66 * accumulating bytes for a segment, in order to fix some fields in the
67 * segment headers (which still contain the same content as the header
68 * of the original GSO packet). 'pkt' points to the beginning of the IP
69 * header of the segment, while 'len' is the length of the IP packet.
70 */
71static void
72gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
73		u_int idx, u_int segmented_bytes, u_int last_segment)
74{
75	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
76	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
77	uint16_t *check = NULL;
78	uint8_t *check_data = NULL;
79
80	if (ipv4) {
81		/* Set the IPv4 "Total Length" field. */
82		iph->tot_len = htobe16(len);
83		nm_prdis("ip total length %u", be16toh(ip->tot_len));
84
85		/* Set the IPv4 "Identification" field. */
86		iph->id = htobe16(be16toh(iph->id) + idx);
87		nm_prdis("ip identification %u", be16toh(iph->id));
88
89		/* Compute and insert the IPv4 header checksum. */
90		iph->check = 0;
91		iph->check = nm_os_csum_ipv4(iph);
92		nm_prdis("IP csum %x", be16toh(iph->check));
93	} else {
94		/* Set the IPv6 "Payload Len" field. */
95		ip6h->payload_len = htobe16(len-iphlen);
96	}
97
98	if (tcp) {
99		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
100
101		/* Set the TCP sequence number. */
102		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
103		nm_prdis("tcp seq %u", be32toh(tcph->seq));
104
105		/* Zero the PSH and FIN TCP flags if this is not the last
106		   segment. */
107		if (!last_segment)
108			tcph->flags &= ~(0x8 | 0x1);
109		nm_prdis("last_segment %u", last_segment);
110
111		check = &tcph->check;
112		check_data = (uint8_t *)tcph;
113	} else { /* UDP */
114		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
115
116		/* Set the UDP 'Length' field. */
117		udph->len = htobe16(len-iphlen);
118
119		check = &udph->check;
120		check_data = (uint8_t *)udph;
121	}
122
123	/* Compute and insert TCP/UDP checksum. */
124	*check = 0;
125	if (ipv4)
126		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
127	else
128		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
129
130	nm_prdis("TCP/UDP csum %x", be16toh(*check));
131}
132
133static inline int
134vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
135{
136	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
137
138	return (
139		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
140		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
141		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
142		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
143		||
144		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
145			       | VIRTIO_NET_HDR_F_DATA_VALID))
146	       );
147}
148
149/* The VALE mismatch datapath implementation. */
150void
151bdg_mismatch_datapath(struct netmap_vp_adapter *na,
152		      struct netmap_vp_adapter *dst_na,
153		      const struct nm_bdg_fwd *ft_p,
154		      struct netmap_ring *dst_ring,
155		      u_int *j, u_int lim, u_int *howmany)
156{
157	struct netmap_slot *dst_slot = NULL;
158	struct nm_vnet_hdr *vh = NULL;
159	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
160
161	/* Source and destination pointers. */
162	uint8_t *dst, *src;
163	size_t src_len, dst_len;
164
165	/* Indices and counters for the destination ring. */
166	u_int j_start = *j;
167	u_int j_cur = j_start;
168	u_int dst_slots = 0;
169
170	if (unlikely(ft_p == ft_end)) {
171		nm_prlim(1, "No source slots to process");
172		return;
173	}
174
175	/* Init source and dest pointers. */
176	src = ft_p->ft_buf;
177	src_len = ft_p->ft_len;
178	dst_slot = &dst_ring->slot[j_cur];
179	dst = NMB(&dst_na->up, dst_slot);
180	dst_len = src_len;
181
182	/* If the source port uses the offloadings, while destination doesn't,
183	 * we grab the source virtio-net header and do the offloadings here.
184	 */
185	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
186		vh = (struct nm_vnet_hdr *)src;
187		/* Initial sanity check on the source virtio-net header. If
188		 * something seems wrong, just drop the packet. */
189		if (src_len < na->up.virt_hdr_len) {
190			nm_prlim(1, "Short src vnet header, dropping");
191			return;
192		}
193		if (unlikely(vnet_hdr_is_bad(vh))) {
194			nm_prlim(1, "Bad src vnet header, dropping");
195			return;
196		}
197	}
198
199	/* We are processing the first input slot and there is a mismatch
200	 * between source and destination virt_hdr_len (SHL and DHL).
201	 * When the a client is using virtio-net headers, the header length
202	 * can be:
203	 *    - 10: the header corresponds to the struct nm_vnet_hdr
204	 *    - 12: the first 10 bytes correspond to the struct
205	 *          virtio_net_hdr, and the last 2 bytes store the
206	 *          "mergeable buffers" info, which is an optional
207	 *	    hint that can be zeroed for compatibility
208	 *
209	 * The destination header is therefore built according to the
210	 * following table:
211	 *
212	 * SHL | DHL | destination header
213	 * -----------------------------
214	 *   0 |  10 | zero
215	 *   0 |  12 | zero
216	 *  10 |   0 | doesn't exist
217	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
218	 *  12 |   0 | doesn't exist
219	 *  12 |  10 | copied from the first 10 bytes of source header
220	 */
221	bzero(dst, dst_na->up.virt_hdr_len);
222	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
223		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
224	/* Skip the virtio-net headers. */
225	src += na->up.virt_hdr_len;
226	src_len -= na->up.virt_hdr_len;
227	dst += dst_na->up.virt_hdr_len;
228	dst_len = dst_na->up.virt_hdr_len + src_len;
229
230	/* Here it could be dst_len == 0 (which implies src_len == 0),
231	 * so we avoid passing a zero length fragment.
232	 */
233	if (dst_len == 0) {
234		ft_p++;
235		src = ft_p->ft_buf;
236		src_len = ft_p->ft_len;
237		dst_len = src_len;
238	}
239
240	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
241		u_int gso_bytes = 0;
242		/* Length of the GSO packet header. */
243		u_int gso_hdr_len = 0;
244		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
245		uint8_t *gso_hdr = NULL;
246		/* Index of the current segment. */
247		u_int gso_idx = 0;
248		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
249		u_int segmented_bytes = 0;
250		/* Is this an IPv4 or IPv6 GSO packet? */
251		u_int ipv4 = 0;
252		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
253		u_int iphlen = 0;
254		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
255		u_int ethhlen = 14;
256		/* Is this a TCP or an UDP GSO packet? */
257		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
258				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
259
260		/* Segment the GSO packet contained into the input slots (frags). */
261		for (;;) {
262			size_t copy;
263
264			if (dst_slots >= *howmany) {
265				/* We still have work to do, but we've run out of
266				 * dst slots, so we have to drop the packet. */
267				nm_prdis(1, "Not enough slots, dropping GSO packet");
268				return;
269			}
270
271			/* Grab the GSO header if we don't have it. */
272			if (!gso_hdr) {
273				uint16_t ethertype;
274
275				gso_hdr = src;
276
277				/* Look at the 'Ethertype' field to see if this packet
278				 * is IPv4 or IPv6, taking into account VLAN
279				 * encapsulation. */
280				for (;;) {
281					if (src_len < ethhlen) {
282						nm_prlim(1, "Short GSO fragment [eth], dropping");
283						return;
284					}
285					ethertype = be16toh(*((uint16_t *)
286							    (gso_hdr + ethhlen - 2)));
287					if (ethertype != 0x8100) /* not 802.1q */
288						break;
289					ethhlen += 4;
290				}
291				switch (ethertype) {
292					case 0x0800:  /* IPv4 */
293					{
294						struct nm_iphdr *iph = (struct nm_iphdr *)
295									(gso_hdr + ethhlen);
296
297						if (src_len < ethhlen + 20) {
298							nm_prlim(1, "Short GSO fragment "
299							      "[IPv4], dropping");
300							return;
301						}
302						ipv4 = 1;
303						iphlen = 4 * (iph->version_ihl & 0x0F);
304						break;
305					}
306					case 0x86DD:  /* IPv6 */
307						ipv4 = 0;
308						iphlen = 40;
309						break;
310					default:
311						nm_prlim(1, "Unsupported ethertype, "
312						      "dropping GSO packet");
313						return;
314				}
315				nm_prdis(3, "type=%04x", ethertype);
316
317				if (src_len < ethhlen + iphlen) {
318					nm_prlim(1, "Short GSO fragment [IP], dropping");
319					return;
320				}
321
322				/* Compute gso_hdr_len. For TCP we need to read the
323				 * content of the 'Data Offset' field.
324				 */
325				if (tcp) {
326					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
327								(gso_hdr + ethhlen + iphlen);
328
329					if (src_len < ethhlen + iphlen + 20) {
330						nm_prlim(1, "Short GSO fragment "
331								"[TCP], dropping");
332						return;
333					}
334					gso_hdr_len = ethhlen + iphlen +
335						      4 * (tcph->doff >> 4);
336				} else {
337					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
338				}
339
340				if (src_len < gso_hdr_len) {
341					nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
342					return;
343				}
344
345				nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
346								   dst_na->mfs);
347
348				/* Advance source pointers. */
349				src += gso_hdr_len;
350				src_len -= gso_hdr_len;
351				if (src_len == 0) {
352					ft_p++;
353					if (ft_p == ft_end)
354						break;
355					src = ft_p->ft_buf;
356					src_len = ft_p->ft_len;
357				}
358			}
359
360			/* Fill in the header of the current segment. */
361			if (gso_bytes == 0) {
362				memcpy(dst, gso_hdr, gso_hdr_len);
363				gso_bytes = gso_hdr_len;
364			}
365
366			/* Fill in data and update source and dest pointers. */
367			copy = src_len;
368			if (gso_bytes + copy > dst_na->mfs)
369				copy = dst_na->mfs - gso_bytes;
370			memcpy(dst + gso_bytes, src, copy);
371			gso_bytes += copy;
372			src += copy;
373			src_len -= copy;
374
375			/* A segment is complete or we have processed all the
376			   the GSO payload bytes. */
377			if (gso_bytes >= dst_na->mfs ||
378				(src_len == 0 && ft_p + 1 == ft_end)) {
379				/* After raw segmentation, we must fix some header
380				 * fields and compute checksums, in a protocol dependent
381				 * way. */
382				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
383						ipv4, iphlen, tcp,
384						gso_idx, segmented_bytes,
385						src_len == 0 && ft_p + 1 == ft_end);
386
387				nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
388				dst_slot->len = gso_bytes;
389				dst_slot->flags = 0;
390				dst_slots++;
391				segmented_bytes += gso_bytes - gso_hdr_len;
392
393				gso_bytes = 0;
394				gso_idx++;
395
396				/* Next destination slot. */
397				j_cur = nm_next(j_cur, lim);
398				dst_slot = &dst_ring->slot[j_cur];
399				dst = NMB(&dst_na->up, dst_slot);
400			}
401
402			/* Next input slot. */
403			if (src_len == 0) {
404				ft_p++;
405				if (ft_p == ft_end)
406					break;
407				src = ft_p->ft_buf;
408				src_len = ft_p->ft_len;
409			}
410		}
411		nm_prdis(3, "%d bytes segmented", segmented_bytes);
412
413	} else {
414		/* Address of a checksum field into a destination slot. */
415		uint16_t *check = NULL;
416		/* Accumulator for an unfolded checksum. */
417		rawsum_t csum = 0;
418
419		/* Process a non-GSO packet. */
420
421		/* Init 'check' if necessary. */
422		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
423			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
424				nm_prerr("invalid checksum request");
425			else
426				check = (uint16_t *)(dst + vh->csum_start +
427						vh->csum_offset);
428		}
429
430		while (ft_p != ft_end) {
431			/* Init/update the packet checksum if needed. */
432			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
433				if (!dst_slots)
434					csum = nm_os_csum_raw(src + vh->csum_start,
435								src_len - vh->csum_start, 0);
436				else
437					csum = nm_os_csum_raw(src, src_len, csum);
438			}
439
440			/* Round to a multiple of 64 */
441			src_len = (src_len + 63) & ~63;
442
443			if (ft_p->ft_flags & NS_INDIRECT) {
444				if (copyin(src, dst, src_len)) {
445					/* Invalid user pointer, pretend len is 0. */
446					dst_len = 0;
447				}
448			} else {
449				memcpy(dst, src, (int)src_len);
450			}
451			dst_slot->len = dst_len;
452			dst_slots++;
453
454			/* Next destination slot. */
455			j_cur = nm_next(j_cur, lim);
456			dst_slot = &dst_ring->slot[j_cur];
457			dst = NMB(&dst_na->up, dst_slot);
458
459			/* Next source slot. */
460			ft_p++;
461			src = ft_p->ft_buf;
462			dst_len = src_len = ft_p->ft_len;
463		}
464
465		/* Finalize (fold) the checksum if needed. */
466		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
467			*check = nm_os_csum_fold(csum);
468		}
469		nm_prdis(3, "using %u dst_slots", dst_slots);
470
471		/* A second pass on the destination slots to set the slot flags,
472		 * using the right number of destination slots.
473		 */
474		while (j_start != j_cur) {
475			dst_slot = &dst_ring->slot[j_start];
476			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
477			j_start = nm_next(j_start, lim);
478		}
479		/* Clear NS_MOREFRAG flag on last entry. */
480		dst_slot->flags = (dst_slots << 8);
481	}
482
483	/* Update howmany and j. This is to commit the use of
484	 * those slots in the destination ring. */
485	if (unlikely(dst_slots > *howmany)) {
486		nm_prerr("bug: slot allocation error");
487	}
488	*j = j_cur;
489	*howmany -= dst_slots;
490}
491