netmap_offloadings.c revision 295126
1/*
2 * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *   1. Redistributions of source code must retain the above copyright
8 *      notice, this list of conditions and the following disclaimer.
9 *   2. Redistributions in binary form must reproduce the above copyright
10 *      notice, this list of conditions and the following disclaimer in the
11 *      documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 295126 2016-02-01 17:41:21Z glebius $ */
27
28#if defined(__FreeBSD__)
29#include <sys/cdefs.h> /* prerequisite */
30
31#include <sys/types.h>
32#include <sys/errno.h>
33#include <sys/param.h>	/* defines used in kernel.h */
34#include <sys/malloc.h>	/* types used in module initialization */
35#include <sys/kernel.h>	/* types used in module initialization */
36#include <sys/sockio.h>
37#include <sys/socketvar.h>	/* struct socket */
38#include <sys/socket.h> /* sockaddrs */
39#include <net/if.h>
40#include <net/if_var.h>
41#include <machine/bus.h>	/* bus_dmamap_* */
42#include <sys/endian.h>
43
44#elif defined(linux)
45
46#include "bsd_glue.h"
47
48#elif defined(__APPLE__)
49
50#warning OSX support is only partial
51#include "osx_glue.h"
52
53#else
54
55#error	Unsupported platform
56
57#endif /* unsupported */
58
59#include <net/netmap.h>
60#include <dev/netmap/netmap_kern.h>
61
62
63
64/* This routine is called by bdg_mismatch_datapath() when it finishes
65 * accumulating bytes for a segment, in order to fix some fields in the
66 * segment headers (which still contain the same content as the header
67 * of the original GSO packet). 'buf' points to the beginning (e.g.
68 * the ethernet header) of the segment, and 'len' is its length.
69 */
70static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
71			    u_int segmented_bytes, u_int last_segment,
72			    u_int tcp, u_int iphlen)
73{
74	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
75	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
76	uint16_t *check = NULL;
77	uint8_t *check_data = NULL;
78
79	if (iphlen == 20) {
80		/* Set the IPv4 "Total Length" field. */
81		iph->tot_len = htobe16(len-14);
82		ND("ip total length %u", be16toh(ip->tot_len));
83
84		/* Set the IPv4 "Identification" field. */
85		iph->id = htobe16(be16toh(iph->id) + idx);
86		ND("ip identification %u", be16toh(iph->id));
87
88		/* Compute and insert the IPv4 header checksum. */
89		iph->check = 0;
90		iph->check = nm_csum_ipv4(iph);
91		ND("IP csum %x", be16toh(iph->check));
92	} else {/* if (iphlen == 40) */
93		/* Set the IPv6 "Payload Len" field. */
94		ip6h->payload_len = htobe16(len-14-iphlen);
95	}
96
97	if (tcp) {
98		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
99
100		/* Set the TCP sequence number. */
101		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
102		ND("tcp seq %u", be32toh(tcph->seq));
103
104		/* Zero the PSH and FIN TCP flags if this is not the last
105		   segment. */
106		if (!last_segment)
107			tcph->flags &= ~(0x8 | 0x1);
108		ND("last_segment %u", last_segment);
109
110		check = &tcph->check;
111		check_data = (uint8_t *)tcph;
112	} else { /* UDP */
113		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
114
115		/* Set the UDP 'Length' field. */
116		udph->len = htobe16(len-14-iphlen);
117
118		check = &udph->check;
119		check_data = (uint8_t *)udph;
120	}
121
122	/* Compute and insert TCP/UDP checksum. */
123	*check = 0;
124	if (iphlen == 20)
125		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
126	else
127		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
128
129	ND("TCP/UDP csum %x", be16toh(*check));
130}
131
132
133/* The VALE mismatch datapath implementation. */
134void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
135			   struct netmap_vp_adapter *dst_na,
136			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
137			   u_int *j, u_int lim, u_int *howmany)
138{
139	struct netmap_slot *slot = NULL;
140	struct nm_vnet_hdr *vh = NULL;
141	/* Number of source slots to process. */
142	u_int frags = ft_p->ft_frags;
143	struct nm_bdg_fwd *ft_end = ft_p + frags;
144
145	/* Source and destination pointers. */
146	uint8_t *dst, *src;
147	size_t src_len, dst_len;
148
149	u_int j_start = *j;
150	u_int dst_slots = 0;
151
152	/* If the source port uses the offloadings, while destination doesn't,
153	 * we grab the source virtio-net header and do the offloadings here.
154	 */
155	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
156		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
157	}
158
159	/* Init source and dest pointers. */
160	src = ft_p->ft_buf;
161	src_len = ft_p->ft_len;
162	slot = &ring->slot[*j];
163	dst = NMB(&dst_na->up, slot);
164	dst_len = src_len;
165
166	/* We are processing the first input slot and there is a mismatch
167	 * between source and destination virt_hdr_len (SHL and DHL).
168	 * When the a client is using virtio-net headers, the header length
169	 * can be:
170	 *    - 10: the header corresponds to the struct nm_vnet_hdr
171	 *    - 12: the first 10 bytes correspond to the struct
172	 *          virtio_net_hdr, and the last 2 bytes store the
173	 *          "mergeable buffers" info, which is an optional
174	 *	    hint that can be zeroed for compability
175	 *
176	 * The destination header is therefore built according to the
177	 * following table:
178	 *
179	 * SHL | DHL | destination header
180	 * -----------------------------
181	 *   0 |  10 | zero
182	 *   0 |  12 | zero
183	 *  10 |   0 | doesn't exist
184	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
185	 *  12 |   0 | doesn't exist
186	 *  12 |  10 | copied from the first 10 bytes of source header
187	 */
188	bzero(dst, dst_na->virt_hdr_len);
189	if (na->virt_hdr_len && dst_na->virt_hdr_len)
190		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
191	/* Skip the virtio-net headers. */
192	src += na->virt_hdr_len;
193	src_len -= na->virt_hdr_len;
194	dst += dst_na->virt_hdr_len;
195	dst_len = dst_na->virt_hdr_len + src_len;
196
197	/* Here it could be dst_len == 0 (which implies src_len == 0),
198	 * so we avoid passing a zero length fragment.
199	 */
200	if (dst_len == 0) {
201		ft_p++;
202		src = ft_p->ft_buf;
203		src_len = ft_p->ft_len;
204		dst_len = src_len;
205	}
206
207	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
208		u_int gso_bytes = 0;
209		/* Length of the GSO packet header. */
210		u_int gso_hdr_len = 0;
211		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
212		uint8_t *gso_hdr = NULL;
213		/* Index of the current segment. */
214		u_int gso_idx = 0;
215		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
216		u_int segmented_bytes = 0;
217		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
218		u_int iphlen = 0;
219		/* Is this a TCP or an UDP GSO packet? */
220		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
221				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
222
223		/* Segment the GSO packet contained into the input slots (frags). */
224		while (ft_p != ft_end) {
225			size_t copy;
226
227			/* Grab the GSO header if we don't have it. */
228			if (!gso_hdr) {
229				uint16_t ethertype;
230
231				gso_hdr = src;
232
233				/* Look at the 'Ethertype' field to see if this packet
234				 * is IPv4 or IPv6.
235				 */
236				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
237				if (ethertype == 0x0800)
238					iphlen = 20;
239				else /* if (ethertype == 0x86DD) */
240					iphlen = 40;
241				ND(3, "type=%04x", ethertype);
242
243				/* Compute gso_hdr_len. For TCP we need to read the
244				 * content of the 'Data Offset' field.
245				 */
246				if (tcp) {
247					struct nm_tcphdr *tcph =
248						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
249
250					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
251				} else
252					gso_hdr_len = 14 + iphlen + 8; /* UDP */
253
254				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
255								dst_na->mfs);
256
257				/* Advance source pointers. */
258				src += gso_hdr_len;
259				src_len -= gso_hdr_len;
260				if (src_len == 0) {
261					ft_p++;
262					if (ft_p == ft_end)
263						break;
264					src = ft_p->ft_buf;
265					src_len = ft_p->ft_len;
266					continue;
267				}
268			}
269
270			/* Fill in the header of the current segment. */
271			if (gso_bytes == 0) {
272				memcpy(dst, gso_hdr, gso_hdr_len);
273				gso_bytes = gso_hdr_len;
274			}
275
276			/* Fill in data and update source and dest pointers. */
277			copy = src_len;
278			if (gso_bytes + copy > dst_na->mfs)
279				copy = dst_na->mfs - gso_bytes;
280			memcpy(dst + gso_bytes, src, copy);
281			gso_bytes += copy;
282			src += copy;
283			src_len -= copy;
284
285			/* A segment is complete or we have processed all the
286			   the GSO payload bytes. */
287			if (gso_bytes >= dst_na->mfs ||
288				(src_len == 0 && ft_p + 1 == ft_end)) {
289				/* After raw segmentation, we must fix some header
290				 * fields and compute checksums, in a protocol dependent
291				 * way. */
292				gso_fix_segment(dst, gso_bytes, gso_idx,
293						segmented_bytes,
294						src_len == 0 && ft_p + 1 == ft_end,
295						tcp, iphlen);
296
297				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
298				slot->len = gso_bytes;
299				slot->flags = 0;
300				segmented_bytes += gso_bytes - gso_hdr_len;
301
302				dst_slots++;
303
304				/* Next destination slot. */
305				*j = nm_next(*j, lim);
306				slot = &ring->slot[*j];
307				dst = NMB(&dst_na->up, slot);
308
309				gso_bytes = 0;
310				gso_idx++;
311			}
312
313			/* Next input slot. */
314			if (src_len == 0) {
315				ft_p++;
316				if (ft_p == ft_end)
317					break;
318				src = ft_p->ft_buf;
319				src_len = ft_p->ft_len;
320			}
321		}
322		ND(3, "%d bytes segmented", segmented_bytes);
323
324	} else {
325		/* Address of a checksum field into a destination slot. */
326		uint16_t *check = NULL;
327		/* Accumulator for an unfolded checksum. */
328		rawsum_t csum = 0;
329
330		/* Process a non-GSO packet. */
331
332		/* Init 'check' if necessary. */
333		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
334			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
335				D("invalid checksum request");
336			else
337				check = (uint16_t *)(dst + vh->csum_start +
338						vh->csum_offset);
339		}
340
341		while (ft_p != ft_end) {
342			/* Init/update the packet checksum if needed. */
343			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
344				if (!dst_slots)
345					csum = nm_csum_raw(src + vh->csum_start,
346								src_len - vh->csum_start, 0);
347				else
348					csum = nm_csum_raw(src, src_len, csum);
349			}
350
351			/* Round to a multiple of 64 */
352			src_len = (src_len + 63) & ~63;
353
354			if (ft_p->ft_flags & NS_INDIRECT) {
355				if (copyin(src, dst, src_len)) {
356					/* Invalid user pointer, pretend len is 0. */
357					dst_len = 0;
358				}
359			} else {
360				memcpy(dst, src, (int)src_len);
361			}
362			slot->len = dst_len;
363
364			dst_slots++;
365
366			/* Next destination slot. */
367			*j = nm_next(*j, lim);
368			slot = &ring->slot[*j];
369			dst = NMB(&dst_na->up, slot);
370
371			/* Next source slot. */
372			ft_p++;
373			src = ft_p->ft_buf;
374			dst_len = src_len = ft_p->ft_len;
375
376		}
377
378		/* Finalize (fold) the checksum if needed. */
379		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
380			*check = nm_csum_fold(csum);
381		}
382		ND(3, "using %u dst_slots", dst_slots);
383
384		/* A second pass on the desitations slots to set the slot flags,
385		 * using the right number of destination slots.
386		 */
387		while (j_start != *j) {
388			slot = &ring->slot[j_start];
389			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
390			j_start = nm_next(j_start, lim);
391		}
392		/* Clear NS_MOREFRAG flag on last entry. */
393		slot->flags = (dst_slots << 8);
394	}
395
396	/* Update howmany. */
397	if (unlikely(dst_slots > *howmany)) {
398		dst_slots = *howmany;
399		D("Slot allocation error: Should never happen");
400	}
401	*howmany -= dst_slots;
402}
403