netmap_offloadings.c revision 270252
1298948Sadrian/*
2298948Sadrian * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
3298948Sadrian *
4298948Sadrian * Redistribution and use in source and binary forms, with or without
5298948Sadrian * modification, are permitted provided that the following conditions
6298948Sadrian * are met:
7298948Sadrian *   1. Redistributions of source code must retain the above copyright
8298948Sadrian *      notice, this list of conditions and the following disclaimer.
9298948Sadrian *   2. Redistributions in binary form must reproduce the above copyright
10298948Sadrian *      notice, this list of conditions and the following disclaimer in the
11298948Sadrian *      documentation and/or other materials provided with the distribution.
12298948Sadrian *
13298948Sadrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14298948Sadrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15298948Sadrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16298948Sadrian * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17298948Sadrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18298948Sadrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19298948Sadrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20298948Sadrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21298948Sadrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22298948Sadrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23298948Sadrian * SUCH DAMAGE.
24298948Sadrian */
25298948Sadrian
26298948Sadrian/* $FreeBSD: stable/10/sys/dev/netmap/netmap_offloadings.c 270252 2014-08-20 23:34:36Z luigi $ */
27298948Sadrian
28298948Sadrian#if defined(__FreeBSD__)
29298948Sadrian#include <sys/cdefs.h> /* prerequisite */
30298948Sadrian
31298948Sadrian#include <sys/types.h>
32298948Sadrian#include <sys/errno.h>
33299984Sadrian#include <sys/param.h>	/* defines used in kernel.h */
34299984Sadrian#include <sys/kernel.h>	/* types used in module initialization */
35299984Sadrian#include <sys/sockio.h>
36298948Sadrian#include <sys/socketvar.h>	/* struct socket */
37298948Sadrian#include <sys/socket.h> /* sockaddrs */
38298948Sadrian#include <net/if.h>
39298948Sadrian#include <net/if_var.h>
40298948Sadrian#include <machine/bus.h>	/* bus_dmamap_* */
41298948Sadrian#include <sys/endian.h>
42298948Sadrian
43298948Sadrian#elif defined(linux)
44298948Sadrian
45298948Sadrian#include "bsd_glue.h"
46298948Sadrian
47298948Sadrian#elif defined(__APPLE__)
48298948Sadrian
49298948Sadrian#warning OSX support is only partial
50298948Sadrian#include "osx_glue.h"
51298948Sadrian
52298948Sadrian#else
53298948Sadrian
54298948Sadrian#error	Unsupported platform
55298948Sadrian
56298948Sadrian#endif /* unsupported */
57298948Sadrian
58298948Sadrian#include <net/netmap.h>
59298948Sadrian#include <dev/netmap/netmap_kern.h>
60298948Sadrian
61298948Sadrian
62298948Sadrian
63298948Sadrian/* This routine is called by bdg_mismatch_datapath() when it finishes
64298948Sadrian * accumulating bytes for a segment, in order to fix some fields in the
65298948Sadrian * segment headers (which still contain the same content as the header
66298948Sadrian * of the original GSO packet). 'buf' points to the beginning (e.g.
67298948Sadrian * the ethernet header) of the segment, and 'len' is its length.
68298948Sadrian */
69298948Sadrianstatic void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
70298948Sadrian			    u_int segmented_bytes, u_int last_segment,
71298948Sadrian			    u_int tcp, u_int iphlen)
72298948Sadrian{
73298948Sadrian	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
74298948Sadrian	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
75298948Sadrian	uint16_t *check = NULL;
76298948Sadrian	uint8_t *check_data = NULL;
77298948Sadrian
78298948Sadrian	if (iphlen == 20) {
79298948Sadrian		/* Set the IPv4 "Total Length" field. */
80298948Sadrian		iph->tot_len = htobe16(len-14);
81298948Sadrian		ND("ip total length %u", be16toh(ip->tot_len));
82298948Sadrian
83298948Sadrian		/* Set the IPv4 "Identification" field. */
84298948Sadrian		iph->id = htobe16(be16toh(iph->id) + idx);
85298948Sadrian		ND("ip identification %u", be16toh(iph->id));
86298948Sadrian
87298948Sadrian		/* Compute and insert the IPv4 header checksum. */
88298948Sadrian		iph->check = 0;
89298948Sadrian		iph->check = nm_csum_ipv4(iph);
90298948Sadrian		ND("IP csum %x", be16toh(iph->check));
91298948Sadrian	} else {/* if (iphlen == 40) */
92298948Sadrian		/* Set the IPv6 "Payload Len" field. */
93298948Sadrian		ip6h->payload_len = htobe16(len-14-iphlen);
94298948Sadrian	}
95298948Sadrian
96298948Sadrian	if (tcp) {
97298948Sadrian		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
98298948Sadrian
99298948Sadrian		/* Set the TCP sequence number. */
100298948Sadrian		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
101298948Sadrian		ND("tcp seq %u", be32toh(tcph->seq));
102298948Sadrian
103298948Sadrian		/* Zero the PSH and FIN TCP flags if this is not the last
104298948Sadrian		   segment. */
105298948Sadrian		if (!last_segment)
106298948Sadrian			tcph->flags &= ~(0x8 | 0x1);
107298948Sadrian		ND("last_segment %u", last_segment);
108298948Sadrian
109298948Sadrian		check = &tcph->check;
110298948Sadrian		check_data = (uint8_t *)tcph;
111298948Sadrian	} else { /* UDP */
112298948Sadrian		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
113298948Sadrian
114298948Sadrian		/* Set the UDP 'Length' field. */
115298948Sadrian		udph->len = htobe16(len-14-iphlen);
116298948Sadrian
117298948Sadrian		check = &udph->check;
118298948Sadrian		check_data = (uint8_t *)udph;
119298948Sadrian	}
120298948Sadrian
121298948Sadrian	/* Compute and insert TCP/UDP checksum. */
122298948Sadrian	*check = 0;
123298948Sadrian	if (iphlen == 20)
124298948Sadrian		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
125298948Sadrian	else
126298948Sadrian		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
127298948Sadrian
128298948Sadrian	ND("TCP/UDP csum %x", be16toh(*check));
129298948Sadrian}
130298948Sadrian
131298948Sadrian
132298948Sadrian/* The VALE mismatch datapath implementation. */
133298948Sadrianvoid bdg_mismatch_datapath(struct netmap_vp_adapter *na,
134298948Sadrian			   struct netmap_vp_adapter *dst_na,
135298948Sadrian			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
136298948Sadrian			   u_int *j, u_int lim, u_int *howmany)
137298948Sadrian{
138298948Sadrian	struct netmap_slot *slot = NULL;
139298948Sadrian	struct nm_vnet_hdr *vh = NULL;
140298948Sadrian	/* Number of source slots to process. */
141298948Sadrian	u_int frags = ft_p->ft_frags;
142298948Sadrian	struct nm_bdg_fwd *ft_end = ft_p + frags;
143298948Sadrian
144298948Sadrian	/* Source and destination pointers. */
145298948Sadrian	uint8_t *dst, *src;
146298948Sadrian	size_t src_len, dst_len;
147298948Sadrian
148298948Sadrian	u_int j_start = *j;
149298948Sadrian	u_int dst_slots = 0;
150298948Sadrian
151298948Sadrian	/* If the source port uses the offloadings, while destination doesn't,
152298948Sadrian	 * we grab the source virtio-net header and do the offloadings here.
153298948Sadrian	 */
154298948Sadrian	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
155298948Sadrian		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
156298948Sadrian	}
157298948Sadrian
158298948Sadrian	/* Init source and dest pointers. */
159298948Sadrian	src = ft_p->ft_buf;
160298948Sadrian	src_len = ft_p->ft_len;
161298948Sadrian	slot = &ring->slot[*j];
162298948Sadrian	dst = NMB(&dst_na->up, slot);
163298948Sadrian	dst_len = src_len;
164298948Sadrian
165298948Sadrian	/* We are processing the first input slot and there is a mismatch
166298948Sadrian	 * between source and destination virt_hdr_len (SHL and DHL).
167298948Sadrian	 * When the a client is using virtio-net headers, the header length
168298948Sadrian	 * can be:
169298948Sadrian	 *    - 10: the header corresponds to the struct nm_vnet_hdr
170298948Sadrian	 *    - 12: the first 10 bytes correspond to the struct
171298948Sadrian	 *          virtio_net_hdr, and the last 2 bytes store the
172298948Sadrian	 *          "mergeable buffers" info, which is an optional
173298948Sadrian	 *	    hint that can be zeroed for compability
174298948Sadrian	 *
175298948Sadrian	 * The destination header is therefore built according to the
176298948Sadrian	 * following table:
177298948Sadrian	 *
178298948Sadrian	 * SHL | DHL | destination header
179298948Sadrian	 * -----------------------------
180298948Sadrian	 *   0 |  10 | zero
181298948Sadrian	 *   0 |  12 | zero
182298948Sadrian	 *  10 |   0 | doesn't exist
183298948Sadrian	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
184298948Sadrian	 *  12 |   0 | doesn't exist
185298948Sadrian	 *  12 |  10 | copied from the first 10 bytes of source header
186298948Sadrian	 */
187298948Sadrian	bzero(dst, dst_na->virt_hdr_len);
188298948Sadrian	if (na->virt_hdr_len && dst_na->virt_hdr_len)
189298948Sadrian		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
190298948Sadrian	/* Skip the virtio-net headers. */
191298948Sadrian	src += na->virt_hdr_len;
192298948Sadrian	src_len -= na->virt_hdr_len;
193298948Sadrian	dst += dst_na->virt_hdr_len;
194298948Sadrian	dst_len = dst_na->virt_hdr_len + src_len;
195298948Sadrian
196298948Sadrian	/* Here it could be dst_len == 0 (which implies src_len == 0),
197298948Sadrian	 * so we avoid passing a zero length fragment.
198298948Sadrian	 */
199298948Sadrian	if (dst_len == 0) {
200298948Sadrian		ft_p++;
201298948Sadrian		src = ft_p->ft_buf;
202298948Sadrian		src_len = ft_p->ft_len;
203298948Sadrian		dst_len = src_len;
204298948Sadrian	}
205298948Sadrian
206298948Sadrian	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
207298948Sadrian		u_int gso_bytes = 0;
208298948Sadrian		/* Length of the GSO packet header. */
209298948Sadrian		u_int gso_hdr_len = 0;
210298948Sadrian		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
211298948Sadrian		uint8_t *gso_hdr = NULL;
212298948Sadrian		/* Index of the current segment. */
213298948Sadrian		u_int gso_idx = 0;
214298948Sadrian		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
215298948Sadrian		u_int segmented_bytes = 0;
216298948Sadrian		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
217298948Sadrian		u_int iphlen = 0;
218298948Sadrian		/* Is this a TCP or an UDP GSO packet? */
219298948Sadrian		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
220298948Sadrian				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
221298948Sadrian
222298948Sadrian		/* Segment the GSO packet contained into the input slots (frags). */
223298948Sadrian		while (ft_p != ft_end) {
224298948Sadrian			size_t copy;
225298948Sadrian
226298948Sadrian			/* Grab the GSO header if we don't have it. */
227298948Sadrian			if (!gso_hdr) {
228298948Sadrian				uint16_t ethertype;
229298948Sadrian
230298948Sadrian				gso_hdr = src;
231298948Sadrian
232298948Sadrian				/* Look at the 'Ethertype' field to see if this packet
233298948Sadrian				 * is IPv4 or IPv6.
234298948Sadrian				 */
235298948Sadrian				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
236298948Sadrian				if (ethertype == 0x0800)
237298948Sadrian					iphlen = 20;
238298948Sadrian				else /* if (ethertype == 0x86DD) */
239298948Sadrian					iphlen = 40;
240298948Sadrian				ND(3, "type=%04x", ethertype);
241298948Sadrian
242298948Sadrian				/* Compute gso_hdr_len. For TCP we need to read the
243298948Sadrian				 * content of the 'Data Offset' field.
244298948Sadrian				 */
245298948Sadrian				if (tcp) {
246298948Sadrian					struct nm_tcphdr *tcph =
247298948Sadrian						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
248298948Sadrian
249298948Sadrian					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
250298948Sadrian				} else
251298948Sadrian					gso_hdr_len = 14 + iphlen + 8; /* UDP */
252298948Sadrian
253298948Sadrian				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
254298948Sadrian								dst_na->mfs);
255298948Sadrian
256298948Sadrian				/* Advance source pointers. */
257298948Sadrian				src += gso_hdr_len;
258298948Sadrian				src_len -= gso_hdr_len;
259298948Sadrian				if (src_len == 0) {
260298948Sadrian					ft_p++;
261298948Sadrian					if (ft_p == ft_end)
262298948Sadrian						break;
263298948Sadrian					src = ft_p->ft_buf;
264298948Sadrian					src_len = ft_p->ft_len;
265298948Sadrian					continue;
266298948Sadrian				}
267298948Sadrian			}
268298948Sadrian
269298948Sadrian			/* Fill in the header of the current segment. */
270298948Sadrian			if (gso_bytes == 0) {
271298948Sadrian				memcpy(dst, gso_hdr, gso_hdr_len);
272298948Sadrian				gso_bytes = gso_hdr_len;
273298948Sadrian			}
274298948Sadrian
275298948Sadrian			/* Fill in data and update source and dest pointers. */
276298948Sadrian			copy = src_len;
277298948Sadrian			if (gso_bytes + copy > dst_na->mfs)
278298948Sadrian				copy = dst_na->mfs - gso_bytes;
279298948Sadrian			memcpy(dst + gso_bytes, src, copy);
280298948Sadrian			gso_bytes += copy;
281298948Sadrian			src += copy;
282298948Sadrian			src_len -= copy;
283298948Sadrian
284298948Sadrian			/* A segment is complete or we have processed all the
285298948Sadrian			   the GSO payload bytes. */
286298948Sadrian			if (gso_bytes >= dst_na->mfs ||
287298948Sadrian				(src_len == 0 && ft_p + 1 == ft_end)) {
288298948Sadrian				/* After raw segmentation, we must fix some header
289298948Sadrian				 * fields and compute checksums, in a protocol dependent
290298948Sadrian				 * way. */
291298948Sadrian				gso_fix_segment(dst, gso_bytes, gso_idx,
292298948Sadrian						segmented_bytes,
293298948Sadrian						src_len == 0 && ft_p + 1 == ft_end,
294298948Sadrian						tcp, iphlen);
295298948Sadrian
296298948Sadrian				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
297298948Sadrian				slot->len = gso_bytes;
298298948Sadrian				slot->flags = 0;
299298948Sadrian				segmented_bytes += gso_bytes - gso_hdr_len;
300298948Sadrian
301298948Sadrian				dst_slots++;
302298948Sadrian
303298948Sadrian				/* Next destination slot. */
304298948Sadrian				*j = nm_next(*j, lim);
305298948Sadrian				slot = &ring->slot[*j];
306298948Sadrian				dst = NMB(&dst_na->up, slot);
307298948Sadrian
308298948Sadrian				gso_bytes = 0;
309298948Sadrian				gso_idx++;
310298948Sadrian			}
311298948Sadrian
312298948Sadrian			/* Next input slot. */
313298948Sadrian			if (src_len == 0) {
314298948Sadrian				ft_p++;
315298948Sadrian				if (ft_p == ft_end)
316298948Sadrian					break;
317298948Sadrian				src = ft_p->ft_buf;
318298948Sadrian				src_len = ft_p->ft_len;
319298948Sadrian			}
320298948Sadrian		}
321298948Sadrian		ND(3, "%d bytes segmented", segmented_bytes);
322298948Sadrian
323298948Sadrian	} else {
324298948Sadrian		/* Address of a checksum field into a destination slot. */
325298948Sadrian		uint16_t *check = NULL;
326298948Sadrian		/* Accumulator for an unfolded checksum. */
327298948Sadrian		rawsum_t csum = 0;
328298948Sadrian
329298948Sadrian		/* Process a non-GSO packet. */
330298948Sadrian
331298948Sadrian		/* Init 'check' if necessary. */
332298948Sadrian		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
333298948Sadrian			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
334298948Sadrian				D("invalid checksum request");
335298948Sadrian			else
336298948Sadrian				check = (uint16_t *)(dst + vh->csum_start +
337298948Sadrian						vh->csum_offset);
338298948Sadrian		}
339298948Sadrian
340298948Sadrian		while (ft_p != ft_end) {
341298948Sadrian			/* Init/update the packet checksum if needed. */
342298948Sadrian			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
343298948Sadrian				if (!dst_slots)
344298948Sadrian					csum = nm_csum_raw(src + vh->csum_start,
345298948Sadrian								src_len - vh->csum_start, 0);
346298948Sadrian				else
347298948Sadrian					csum = nm_csum_raw(src, src_len, csum);
348298948Sadrian			}
349298948Sadrian
350298948Sadrian			/* Round to a multiple of 64 */
351298948Sadrian			src_len = (src_len + 63) & ~63;
352298948Sadrian
353298948Sadrian			if (ft_p->ft_flags & NS_INDIRECT) {
354298948Sadrian				if (copyin(src, dst, src_len)) {
355298948Sadrian					/* Invalid user pointer, pretend len is 0. */
356298948Sadrian					dst_len = 0;
357298948Sadrian				}
358298948Sadrian			} else {
359298948Sadrian				memcpy(dst, src, (int)src_len);
360298948Sadrian			}
361298948Sadrian			slot->len = dst_len;
362298948Sadrian
363298948Sadrian			dst_slots++;
364298948Sadrian
365298948Sadrian			/* Next destination slot. */
366298948Sadrian			*j = nm_next(*j, lim);
367298948Sadrian			slot = &ring->slot[*j];
368298948Sadrian			dst = NMB(&dst_na->up, slot);
369298948Sadrian
370298948Sadrian			/* Next source slot. */
371298948Sadrian			ft_p++;
372298948Sadrian			src = ft_p->ft_buf;
373298948Sadrian			dst_len = src_len = ft_p->ft_len;
374298948Sadrian
375298948Sadrian		}
376298948Sadrian
377298948Sadrian		/* Finalize (fold) the checksum if needed. */
378298948Sadrian		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
379298948Sadrian			*check = nm_csum_fold(csum);
380298948Sadrian		}
381298948Sadrian		ND(3, "using %u dst_slots", dst_slots);
382298948Sadrian
383298948Sadrian		/* A second pass on the desitations slots to set the slot flags,
384298948Sadrian		 * using the right number of destination slots.
385298948Sadrian		 */
386298948Sadrian		while (j_start != *j) {
387298948Sadrian			slot = &ring->slot[j_start];
388298948Sadrian			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
389298948Sadrian			j_start = nm_next(j_start, lim);
390298948Sadrian		}
391298948Sadrian		/* Clear NS_MOREFRAG flag on last entry. */
392298948Sadrian		slot->flags = (dst_slots << 8);
393298948Sadrian	}
394298948Sadrian
395298948Sadrian	/* Update howmany. */
396298948Sadrian	if (unlikely(dst_slots > *howmany)) {
397298948Sadrian		dst_slots = *howmany;
398298948Sadrian		D("Slot allocation error: Should never happen");
399298948Sadrian	}
400298948Sadrian	*howmany -= dst_slots;
401298948Sadrian}
402298948Sadrian