1262153Sluigi/*
2262153Sluigi * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
3262153Sluigi *
4262153Sluigi * Redistribution and use in source and binary forms, with or without
5262153Sluigi * modification, are permitted provided that the following conditions
6262153Sluigi * are met:
7262153Sluigi *   1. Redistributions of source code must retain the above copyright
8262153Sluigi *      notice, this list of conditions and the following disclaimer.
9262153Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10262153Sluigi *      notice, this list of conditions and the following disclaimer in the
11262153Sluigi *      documentation and/or other materials provided with the distribution.
12262153Sluigi *
13262153Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14262153Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15262153Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16262153Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17262153Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18262153Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19262153Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20262153Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21262153Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22262153Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23262153Sluigi * SUCH DAMAGE.
24262153Sluigi */
25262153Sluigi
26262153Sluigi/* $FreeBSD$ */
27262153Sluigi
28262153Sluigi#if defined(__FreeBSD__)
29262153Sluigi#include <sys/cdefs.h> /* prerequisite */
30262153Sluigi
31262153Sluigi#include <sys/types.h>
32262153Sluigi#include <sys/errno.h>
33262153Sluigi#include <sys/param.h>	/* defines used in kernel.h */
34262153Sluigi#include <sys/kernel.h>	/* types used in module initialization */
35262153Sluigi#include <sys/sockio.h>
36262153Sluigi#include <sys/socketvar.h>	/* struct socket */
37262153Sluigi#include <sys/socket.h> /* sockaddrs */
38262153Sluigi#include <net/if.h>
39262153Sluigi#include <net/if_var.h>
40262153Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
41262153Sluigi#include <sys/endian.h>
42262153Sluigi
43262153Sluigi#elif defined(linux)
44262153Sluigi
45262153Sluigi#include "bsd_glue.h"
46262153Sluigi
47262153Sluigi#elif defined(__APPLE__)
48262153Sluigi
49262153Sluigi#warning OSX support is only partial
50262153Sluigi#include "osx_glue.h"
51262153Sluigi
52262153Sluigi#else
53262153Sluigi
54262153Sluigi#error	Unsupported platform
55262153Sluigi
56262153Sluigi#endif /* unsupported */
57262153Sluigi
58262153Sluigi#include <net/netmap.h>
59262153Sluigi#include <dev/netmap/netmap_kern.h>
60262153Sluigi
61262153Sluigi
62262153Sluigi
63262153Sluigi/* This routine is called by bdg_mismatch_datapath() when it finishes
64262153Sluigi * accumulating bytes for a segment, in order to fix some fields in the
65262153Sluigi * segment headers (which still contain the same content as the header
66262153Sluigi * of the original GSO packet). 'buf' points to the beginning (e.g.
67262153Sluigi * the ethernet header) of the segment, and 'len' is its length.
68262153Sluigi */
69262153Sluigistatic void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
70262153Sluigi			    u_int segmented_bytes, u_int last_segment,
71262153Sluigi			    u_int tcp, u_int iphlen)
72262153Sluigi{
73262153Sluigi	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
74262153Sluigi	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
75262153Sluigi	uint16_t *check = NULL;
76262153Sluigi	uint8_t *check_data = NULL;
77262153Sluigi
78262153Sluigi	if (iphlen == 20) {
79262153Sluigi		/* Set the IPv4 "Total Length" field. */
80262153Sluigi		iph->tot_len = htobe16(len-14);
81262153Sluigi		ND("ip total length %u", be16toh(ip->tot_len));
82262153Sluigi
83262153Sluigi		/* Set the IPv4 "Identification" field. */
84262153Sluigi		iph->id = htobe16(be16toh(iph->id) + idx);
85262153Sluigi		ND("ip identification %u", be16toh(iph->id));
86262153Sluigi
87262153Sluigi		/* Compute and insert the IPv4 header checksum. */
88262153Sluigi		iph->check = 0;
89262153Sluigi		iph->check = nm_csum_ipv4(iph);
90262153Sluigi		ND("IP csum %x", be16toh(iph->check));
91262153Sluigi	} else {/* if (iphlen == 40) */
92262153Sluigi		/* Set the IPv6 "Payload Len" field. */
93262153Sluigi		ip6h->payload_len = htobe16(len-14-iphlen);
94262153Sluigi	}
95262153Sluigi
96262153Sluigi	if (tcp) {
97262153Sluigi		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
98262153Sluigi
99262153Sluigi		/* Set the TCP sequence number. */
100262153Sluigi		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
101262153Sluigi		ND("tcp seq %u", be32toh(tcph->seq));
102262153Sluigi
103262153Sluigi		/* Zero the PSH and FIN TCP flags if this is not the last
104262153Sluigi		   segment. */
105262153Sluigi		if (!last_segment)
106262153Sluigi			tcph->flags &= ~(0x8 | 0x1);
107262153Sluigi		ND("last_segment %u", last_segment);
108262153Sluigi
109262153Sluigi		check = &tcph->check;
110262153Sluigi		check_data = (uint8_t *)tcph;
111262153Sluigi	} else { /* UDP */
112262153Sluigi		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
113262153Sluigi
114262153Sluigi		/* Set the UDP 'Length' field. */
115262153Sluigi		udph->len = htobe16(len-14-iphlen);
116262153Sluigi
117262153Sluigi		check = &udph->check;
118262153Sluigi		check_data = (uint8_t *)udph;
119262153Sluigi	}
120262153Sluigi
121262153Sluigi	/* Compute and insert TCP/UDP checksum. */
122262153Sluigi	*check = 0;
123262153Sluigi	if (iphlen == 20)
124262153Sluigi		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
125262153Sluigi	else
126262153Sluigi		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
127262153Sluigi
128262153Sluigi	ND("TCP/UDP csum %x", be16toh(*check));
129262153Sluigi}
130262153Sluigi
131262153Sluigi
132262153Sluigi/* The VALE mismatch datapath implementation. */
133262153Sluigivoid bdg_mismatch_datapath(struct netmap_vp_adapter *na,
134262153Sluigi			   struct netmap_vp_adapter *dst_na,
135262153Sluigi			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
136262153Sluigi			   u_int *j, u_int lim, u_int *howmany)
137262153Sluigi{
138262153Sluigi	struct netmap_slot *slot = NULL;
139262153Sluigi	struct nm_vnet_hdr *vh = NULL;
140262153Sluigi	/* Number of source slots to process. */
141262153Sluigi	u_int frags = ft_p->ft_frags;
142262153Sluigi	struct nm_bdg_fwd *ft_end = ft_p + frags;
143262153Sluigi
144262153Sluigi	/* Source and destination pointers. */
145262153Sluigi	uint8_t *dst, *src;
146262153Sluigi	size_t src_len, dst_len;
147262153Sluigi
148262153Sluigi	u_int j_start = *j;
149262153Sluigi	u_int dst_slots = 0;
150262153Sluigi
151262153Sluigi	/* If the source port uses the offloadings, while destination doesn't,
152262153Sluigi	 * we grab the source virtio-net header and do the offloadings here.
153262153Sluigi	 */
154262153Sluigi	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
155262153Sluigi		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
156262153Sluigi	}
157262153Sluigi
158262153Sluigi	/* Init source and dest pointers. */
159262153Sluigi	src = ft_p->ft_buf;
160262153Sluigi	src_len = ft_p->ft_len;
161262153Sluigi	slot = &ring->slot[*j];
162262153Sluigi	dst = BDG_NMB(&dst_na->up, slot);
163262153Sluigi	dst_len = src_len;
164262153Sluigi
165262153Sluigi	/* We are processing the first input slot and there is a mismatch
166262153Sluigi	 * between source and destination virt_hdr_len (SHL and DHL).
167262153Sluigi	 * When the a client is using virtio-net headers, the header length
168262153Sluigi	 * can be:
169262153Sluigi	 *    - 10: the header corresponds to the struct nm_vnet_hdr
170262153Sluigi	 *    - 12: the first 10 bytes correspond to the struct
171262153Sluigi	 *          virtio_net_hdr, and the last 2 bytes store the
172262153Sluigi	 *          "mergeable buffers" info, which is an optional
173262153Sluigi	 *	    hint that can be zeroed for compability
174262153Sluigi	 *
175262153Sluigi	 * The destination header is therefore built according to the
176262153Sluigi	 * following table:
177262153Sluigi	 *
178262153Sluigi	 * SHL | DHL | destination header
179262153Sluigi	 * -----------------------------
180262153Sluigi	 *   0 |  10 | zero
181262153Sluigi	 *   0 |  12 | zero
182262153Sluigi	 *  10 |   0 | doesn't exist
183262153Sluigi	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
184262153Sluigi	 *  12 |   0 | doesn't exist
185262153Sluigi	 *  12 |  10 | copied from the first 10 bytes of source header
186262153Sluigi	 */
187262153Sluigi	bzero(dst, dst_na->virt_hdr_len);
188262153Sluigi	if (na->virt_hdr_len && dst_na->virt_hdr_len)
189262153Sluigi		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
190262153Sluigi	/* Skip the virtio-net headers. */
191262153Sluigi	src += na->virt_hdr_len;
192262153Sluigi	src_len -= na->virt_hdr_len;
193262153Sluigi	dst += dst_na->virt_hdr_len;
194262153Sluigi	dst_len = dst_na->virt_hdr_len + src_len;
195262153Sluigi
196262153Sluigi	/* Here it could be dst_len == 0 (which implies src_len == 0),
197262153Sluigi	 * so we avoid passing a zero length fragment.
198262153Sluigi	 */
199262153Sluigi	if (dst_len == 0) {
200262153Sluigi		ft_p++;
201262153Sluigi		src = ft_p->ft_buf;
202262153Sluigi		src_len = ft_p->ft_len;
203262153Sluigi		dst_len = src_len;
204262153Sluigi	}
205262153Sluigi
206262153Sluigi	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
207262153Sluigi		u_int gso_bytes = 0;
208262153Sluigi		/* Length of the GSO packet header. */
209262153Sluigi		u_int gso_hdr_len = 0;
210262153Sluigi		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
211262153Sluigi		uint8_t *gso_hdr = NULL;
212262153Sluigi		/* Index of the current segment. */
213262153Sluigi		u_int gso_idx = 0;
214262153Sluigi		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
215262153Sluigi		u_int segmented_bytes = 0;
216262153Sluigi		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
217262153Sluigi		u_int iphlen = 0;
218262153Sluigi		/* Is this a TCP or an UDP GSO packet? */
219262153Sluigi		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
220262153Sluigi				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
221262153Sluigi
222262153Sluigi		/* Segment the GSO packet contained into the input slots (frags). */
223262153Sluigi		while (ft_p != ft_end) {
224262153Sluigi			size_t copy;
225262153Sluigi
226262153Sluigi			/* Grab the GSO header if we don't have it. */
227262153Sluigi			if (!gso_hdr) {
228262153Sluigi				uint16_t ethertype;
229262153Sluigi
230262153Sluigi				gso_hdr = src;
231262153Sluigi
232262153Sluigi				/* Look at the 'Ethertype' field to see if this packet
233262153Sluigi				 * is IPv4 or IPv6.
234262153Sluigi				 */
235262153Sluigi				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
236262153Sluigi				if (ethertype == 0x0800)
237262153Sluigi					iphlen = 20;
238262153Sluigi				else /* if (ethertype == 0x86DD) */
239262153Sluigi					iphlen = 40;
240262153Sluigi				ND(3, "type=%04x", ethertype);
241262153Sluigi
242262153Sluigi				/* Compute gso_hdr_len. For TCP we need to read the
243262153Sluigi				 * content of the 'Data Offset' field.
244262153Sluigi				 */
245262153Sluigi				if (tcp) {
246262153Sluigi					struct nm_tcphdr *tcph =
247262153Sluigi						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
248262153Sluigi
249262153Sluigi					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
250262153Sluigi				} else
251262153Sluigi					gso_hdr_len = 14 + iphlen + 8; /* UDP */
252262153Sluigi
253262153Sluigi				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
254262153Sluigi								dst_na->mfs);
255262153Sluigi
256262153Sluigi				/* Advance source pointers. */
257262153Sluigi				src += gso_hdr_len;
258262153Sluigi				src_len -= gso_hdr_len;
259262153Sluigi				if (src_len == 0) {
260262153Sluigi					ft_p++;
261262153Sluigi					if (ft_p == ft_end)
262262153Sluigi						break;
263262153Sluigi					src = ft_p->ft_buf;
264262153Sluigi					src_len = ft_p->ft_len;
265262153Sluigi					continue;
266262153Sluigi				}
267262153Sluigi			}
268262153Sluigi
269262153Sluigi			/* Fill in the header of the current segment. */
270262153Sluigi			if (gso_bytes == 0) {
271262153Sluigi				memcpy(dst, gso_hdr, gso_hdr_len);
272262153Sluigi				gso_bytes = gso_hdr_len;
273262153Sluigi			}
274262153Sluigi
275262153Sluigi			/* Fill in data and update source and dest pointers. */
276262153Sluigi			copy = src_len;
277262153Sluigi			if (gso_bytes + copy > dst_na->mfs)
278262153Sluigi				copy = dst_na->mfs - gso_bytes;
279262153Sluigi			memcpy(dst + gso_bytes, src, copy);
280262153Sluigi			gso_bytes += copy;
281262153Sluigi			src += copy;
282262153Sluigi			src_len -= copy;
283262153Sluigi
284262153Sluigi			/* A segment is complete or we have processed all the
285262153Sluigi			   the GSO payload bytes. */
286262153Sluigi			if (gso_bytes >= dst_na->mfs ||
287262153Sluigi				(src_len == 0 && ft_p + 1 == ft_end)) {
288262153Sluigi				/* After raw segmentation, we must fix some header
289262153Sluigi				 * fields and compute checksums, in a protocol dependent
290262153Sluigi				 * way. */
291262153Sluigi				gso_fix_segment(dst, gso_bytes, gso_idx,
292262153Sluigi						segmented_bytes,
293262153Sluigi						src_len == 0 && ft_p + 1 == ft_end,
294262153Sluigi						tcp, iphlen);
295262153Sluigi
296262153Sluigi				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
297262153Sluigi				slot->len = gso_bytes;
298262153Sluigi				slot->flags = 0;
299262153Sluigi				segmented_bytes += gso_bytes - gso_hdr_len;
300262153Sluigi
301262153Sluigi				dst_slots++;
302262153Sluigi
303262153Sluigi				/* Next destination slot. */
304262153Sluigi				*j = nm_next(*j, lim);
305262153Sluigi				slot = &ring->slot[*j];
306262153Sluigi				dst = BDG_NMB(&dst_na->up, slot);
307262153Sluigi
308262153Sluigi				gso_bytes = 0;
309262153Sluigi				gso_idx++;
310262153Sluigi			}
311262153Sluigi
312262153Sluigi			/* Next input slot. */
313262153Sluigi			if (src_len == 0) {
314262153Sluigi				ft_p++;
315262153Sluigi				if (ft_p == ft_end)
316262153Sluigi					break;
317262153Sluigi				src = ft_p->ft_buf;
318262153Sluigi				src_len = ft_p->ft_len;
319262153Sluigi			}
320262153Sluigi		}
321262153Sluigi		ND(3, "%d bytes segmented", segmented_bytes);
322262153Sluigi
323262153Sluigi	} else {
324262153Sluigi		/* Address of a checksum field into a destination slot. */
325262153Sluigi		uint16_t *check = NULL;
326262153Sluigi		/* Accumulator for an unfolded checksum. */
327262153Sluigi		rawsum_t csum = 0;
328262153Sluigi
329262153Sluigi		/* Process a non-GSO packet. */
330262153Sluigi
331262153Sluigi		/* Init 'check' if necessary. */
332262153Sluigi		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
333262153Sluigi			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
334262153Sluigi				D("invalid checksum request");
335262153Sluigi			else
336262153Sluigi				check = (uint16_t *)(dst + vh->csum_start +
337262153Sluigi						vh->csum_offset);
338262153Sluigi		}
339262153Sluigi
340262153Sluigi		while (ft_p != ft_end) {
341262153Sluigi			/* Init/update the packet checksum if needed. */
342262153Sluigi			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
343262153Sluigi				if (!dst_slots)
344262153Sluigi					csum = nm_csum_raw(src + vh->csum_start,
345262153Sluigi								src_len - vh->csum_start, 0);
346262153Sluigi				else
347262153Sluigi					csum = nm_csum_raw(src, src_len, csum);
348262153Sluigi			}
349262153Sluigi
350262153Sluigi			/* Round to a multiple of 64 */
351262153Sluigi			src_len = (src_len + 63) & ~63;
352262153Sluigi
353262153Sluigi			if (ft_p->ft_flags & NS_INDIRECT) {
354262153Sluigi				if (copyin(src, dst, src_len)) {
355262153Sluigi					/* Invalid user pointer, pretend len is 0. */
356262153Sluigi					dst_len = 0;
357262153Sluigi				}
358262153Sluigi			} else {
359262153Sluigi				memcpy(dst, src, (int)src_len);
360262153Sluigi			}
361262153Sluigi			slot->len = dst_len;
362262153Sluigi
363262153Sluigi			dst_slots++;
364262153Sluigi
365262153Sluigi			/* Next destination slot. */
366262153Sluigi			*j = nm_next(*j, lim);
367262153Sluigi			slot = &ring->slot[*j];
368262153Sluigi			dst = BDG_NMB(&dst_na->up, slot);
369262153Sluigi
370262153Sluigi			/* Next source slot. */
371262153Sluigi			ft_p++;
372262153Sluigi			src = ft_p->ft_buf;
373262153Sluigi			dst_len = src_len = ft_p->ft_len;
374262153Sluigi
375262153Sluigi		}
376262153Sluigi
377262153Sluigi		/* Finalize (fold) the checksum if needed. */
378262153Sluigi		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
379262153Sluigi			*check = nm_csum_fold(csum);
380262153Sluigi		}
381262153Sluigi		ND(3, "using %u dst_slots", dst_slots);
382262153Sluigi
383262153Sluigi		/* A second pass on the desitations slots to set the slot flags,
384262153Sluigi		 * using the right number of destination slots.
385262153Sluigi		 */
386262153Sluigi		while (j_start != *j) {
387262153Sluigi			slot = &ring->slot[j_start];
388262153Sluigi			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
389262153Sluigi			j_start = nm_next(j_start, lim);
390262153Sluigi		}
391262153Sluigi		/* Clear NS_MOREFRAG flag on last entry. */
392262153Sluigi		slot->flags = (dst_slots << 8);
393262153Sluigi	}
394262153Sluigi
395262153Sluigi	/* Update howmany. */
396262153Sluigi	if (unlikely(dst_slots > *howmany)) {
397262153Sluigi		dst_slots = *howmany;
398262153Sluigi		D("Slot allocation error: Should never happen");
399262153Sluigi	}
400262153Sluigi	*howmany -= dst_slots;
401262153Sluigi}
402