netmap_offloadings.c revision 261909
11590Srgrimes/*
21590Srgrimes * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
31590Srgrimes *
41590Srgrimes * Redistribution and use in source and binary forms, with or without
51590Srgrimes * modification, are permitted provided that the following conditions
61590Srgrimes * are met:
71590Srgrimes *   1. Redistributions of source code must retain the above copyright
81590Srgrimes *      notice, this list of conditions and the following disclaimer.
91590Srgrimes *   2. Redistributions in binary form must reproduce the above copyright
101590Srgrimes *      notice, this list of conditions and the following disclaimer in the
111590Srgrimes *      documentation and/or other materials provided with the distribution.
121590Srgrimes *
131590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
141590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
151590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
161590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
171590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
181590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
191590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
201590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
211590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
221590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
231590Srgrimes * SUCH DAMAGE.
241590Srgrimes */
251590Srgrimes
261590Srgrimes/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */
271590Srgrimes
281590Srgrimes#if defined(__FreeBSD__)
291590Srgrimes#include <sys/cdefs.h> /* prerequisite */
301590Srgrimes
3187217Smarkm#include <sys/types.h>
321590Srgrimes#include <sys/errno.h>
331590Srgrimes#include <sys/param.h>	/* defines used in kernel.h */
341590Srgrimes#include <sys/kernel.h>	/* types used in module initialization */
351590Srgrimes#include <sys/sockio.h>
361590Srgrimes#include <sys/socketvar.h>	/* struct socket */
3787217Smarkm#include <sys/socket.h> /* sockaddrs */
381590Srgrimes#include <net/if.h>
3999112Sobrien#include <net/if_var.h>
4099112Sobrien#include <machine/bus.h>	/* bus_dmamap_* */
411590Srgrimes#include <sys/endian.h>
4287217Smarkm
4398467Sjmallett#elif defined(linux)
441590Srgrimes
4587217Smarkm#include "bsd_glue.h"
461590Srgrimes
47#elif defined(__APPLE__)
48
49#warning OSX support is only partial
50#include "osx_glue.h"
51
52#else
53
54#error	Unsupported platform
55
56#endif /* unsupported */
57
58#include <net/netmap.h>
59#include <dev/netmap/netmap_kern.h>
60
61
62
63/* This routine is called by bdg_mismatch_datapath() when it finishes
64 * accumulating bytes for a segment, in order to fix some fields in the
65 * segment headers (which still contain the same content as the header
66 * of the original GSO packet). 'buf' points to the beginning (e.g.
67 * the ethernet header) of the segment, and 'len' is its length.
68 */
69static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
70			    u_int segmented_bytes, u_int last_segment,
71			    u_int tcp, u_int iphlen)
72{
73	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
74	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
75	uint16_t *check = NULL;
76	uint8_t *check_data = NULL;
77
78	if (iphlen == 20) {
79		/* Set the IPv4 "Total Length" field. */
80		iph->tot_len = htobe16(len-14);
81		ND("ip total length %u", be16toh(ip->tot_len));
82
83		/* Set the IPv4 "Identification" field. */
84		iph->id = htobe16(be16toh(iph->id) + idx);
85		ND("ip identification %u", be16toh(iph->id));
86
87		/* Compute and insert the IPv4 header checksum. */
88		iph->check = 0;
89		iph->check = nm_csum_ipv4(iph);
90		ND("IP csum %x", be16toh(iph->check));
91	} else {/* if (iphlen == 40) */
92		/* Set the IPv6 "Payload Len" field. */
93		ip6h->payload_len = htobe16(len-14-iphlen);
94	}
95
96	if (tcp) {
97		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
98
99		/* Set the TCP sequence number. */
100		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
101		ND("tcp seq %u", be32toh(tcph->seq));
102
103		/* Zero the PSH and FIN TCP flags if this is not the last
104		   segment. */
105		if (!last_segment)
106			tcph->flags &= ~(0x8 | 0x1);
107		ND("last_segment %u", last_segment);
108
109		check = &tcph->check;
110		check_data = (uint8_t *)tcph;
111	} else { /* UDP */
112		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
113
114		/* Set the UDP 'Length' field. */
115		udph->len = htobe16(len-14-iphlen);
116
117		check = &udph->check;
118		check_data = (uint8_t *)udph;
119	}
120
121	/* Compute and insert TCP/UDP checksum. */
122	*check = 0;
123	if (iphlen == 20)
124		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
125	else
126		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
127
128	ND("TCP/UDP csum %x", be16toh(*check));
129}
130
131
132/* The VALE mismatch datapath implementation. */
133void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
134			   struct netmap_vp_adapter *dst_na,
135			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
136			   u_int *j, u_int lim, u_int *howmany)
137{
138	struct netmap_slot *slot = NULL;
139	struct nm_vnet_hdr *vh = NULL;
140	/* Number of source slots to process. */
141	u_int frags = ft_p->ft_frags;
142	struct nm_bdg_fwd *ft_end = ft_p + frags;
143
144	/* Source and destination pointers. */
145	uint8_t *dst, *src;
146	size_t src_len, dst_len;
147
148	u_int j_start = *j;
149	u_int dst_slots = 0;
150
151	/* If the source port uses the offloadings, while destination doesn't,
152	 * we grab the source virtio-net header and do the offloadings here.
153	 */
154	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
155		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
156	}
157
158	/* Init source and dest pointers. */
159	src = ft_p->ft_buf;
160	src_len = ft_p->ft_len;
161	slot = &ring->slot[*j];
162	dst = BDG_NMB(&dst_na->up, slot);
163	dst_len = src_len;
164
165	/* We are processing the first input slot and there is a mismatch
166	 * between source and destination virt_hdr_len (SHL and DHL).
167	 * When the a client is using virtio-net headers, the header length
168	 * can be:
169	 *    - 10: the header corresponds to the struct nm_vnet_hdr
170	 *    - 12: the first 10 bytes correspond to the struct
171	 *          virtio_net_hdr, and the last 2 bytes store the
172	 *          "mergeable buffers" info, which is an optional
173	 *	    hint that can be zeroed for compability
174	 *
175	 * The destination header is therefore built according to the
176	 * following table:
177	 *
178	 * SHL | DHL | destination header
179	 * -----------------------------
180	 *   0 |  10 | zero
181	 *   0 |  12 | zero
182	 *  10 |   0 | doesn't exist
183	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
184	 *  12 |   0 | doesn't exist
185	 *  12 |  10 | copied from the first 10 bytes of source header
186	 */
187	bzero(dst, dst_na->virt_hdr_len);
188	if (na->virt_hdr_len && dst_na->virt_hdr_len)
189		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
190	/* Skip the virtio-net headers. */
191	src += na->virt_hdr_len;
192	src_len -= na->virt_hdr_len;
193	dst += dst_na->virt_hdr_len;
194	dst_len = dst_na->virt_hdr_len + src_len;
195
196	/* Here it could be dst_len == 0 (which implies src_len == 0),
197	 * so we avoid passing a zero length fragment.
198	 */
199	if (dst_len == 0) {
200		ft_p++;
201		src = ft_p->ft_buf;
202		src_len = ft_p->ft_len;
203		dst_len = src_len;
204	}
205
206	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
207		u_int gso_bytes = 0;
208		/* Length of the GSO packet header. */
209		u_int gso_hdr_len = 0;
210		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
211		uint8_t *gso_hdr = NULL;
212		/* Index of the current segment. */
213		u_int gso_idx = 0;
214		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
215		u_int segmented_bytes = 0;
216		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
217		u_int iphlen = 0;
218		/* Is this a TCP or an UDP GSO packet? */
219		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
220				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
221
222		/* Segment the GSO packet contained into the input slots (frags). */
223		while (ft_p != ft_end) {
224			size_t copy;
225
226			/* Grab the GSO header if we don't have it. */
227			if (!gso_hdr) {
228				uint16_t ethertype;
229
230				gso_hdr = src;
231
232				/* Look at the 'Ethertype' field to see if this packet
233				 * is IPv4 or IPv6.
234				 */
235				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
236				if (ethertype == 0x0800)
237					iphlen = 20;
238				else /* if (ethertype == 0x86DD) */
239					iphlen = 40;
240				ND(3, "type=%04x", ethertype);
241
242				/* Compute gso_hdr_len. For TCP we need to read the
243				 * content of the 'Data Offset' field.
244				 */
245				if (tcp) {
246					struct nm_tcphdr *tcph =
247						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
248
249					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
250				} else
251					gso_hdr_len = 14 + iphlen + 8; /* UDP */
252
253				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
254								dst_na->mfs);
255
256				/* Advance source pointers. */
257				src += gso_hdr_len;
258				src_len -= gso_hdr_len;
259				if (src_len == 0) {
260					ft_p++;
261					if (ft_p == ft_end)
262						break;
263					src = ft_p->ft_buf;
264					src_len = ft_p->ft_len;
265					continue;
266				}
267			}
268
269			/* Fill in the header of the current segment. */
270			if (gso_bytes == 0) {
271				memcpy(dst, gso_hdr, gso_hdr_len);
272				gso_bytes = gso_hdr_len;
273			}
274
275			/* Fill in data and update source and dest pointers. */
276			copy = src_len;
277			if (gso_bytes + copy > dst_na->mfs)
278				copy = dst_na->mfs - gso_bytes;
279			memcpy(dst + gso_bytes, src, copy);
280			gso_bytes += copy;
281			src += copy;
282			src_len -= copy;
283
284			/* A segment is complete or we have processed all the
285			   the GSO payload bytes. */
286			if (gso_bytes >= dst_na->mfs ||
287				(src_len == 0 && ft_p + 1 == ft_end)) {
288				/* After raw segmentation, we must fix some header
289				 * fields and compute checksums, in a protocol dependent
290				 * way. */
291				gso_fix_segment(dst, gso_bytes, gso_idx,
292						segmented_bytes,
293						src_len == 0 && ft_p + 1 == ft_end,
294						tcp, iphlen);
295
296				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
297				slot->len = gso_bytes;
298				slot->flags = 0;
299				segmented_bytes += gso_bytes - gso_hdr_len;
300
301				dst_slots++;
302
303				/* Next destination slot. */
304				*j = nm_next(*j, lim);
305				slot = &ring->slot[*j];
306				dst = BDG_NMB(&dst_na->up, slot);
307
308				gso_bytes = 0;
309				gso_idx++;
310			}
311
312			/* Next input slot. */
313			if (src_len == 0) {
314				ft_p++;
315				if (ft_p == ft_end)
316					break;
317				src = ft_p->ft_buf;
318				src_len = ft_p->ft_len;
319			}
320		}
321		ND(3, "%d bytes segmented", segmented_bytes);
322
323	} else {
324		/* Address of a checksum field into a destination slot. */
325		uint16_t *check = NULL;
326		/* Accumulator for an unfolded checksum. */
327		rawsum_t csum = 0;
328
329		/* Process a non-GSO packet. */
330
331		/* Init 'check' if necessary. */
332		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
333			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
334				D("invalid checksum request");
335			else
336				check = (uint16_t *)(dst + vh->csum_start +
337						vh->csum_offset);
338		}
339
340		while (ft_p != ft_end) {
341			/* Init/update the packet checksum if needed. */
342			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
343				if (!dst_slots)
344					csum = nm_csum_raw(src + vh->csum_start,
345								src_len - vh->csum_start, 0);
346				else
347					csum = nm_csum_raw(src, src_len, csum);
348			}
349
350			/* Round to a multiple of 64 */
351			src_len = (src_len + 63) & ~63;
352
353			if (ft_p->ft_flags & NS_INDIRECT) {
354				if (copyin(src, dst, src_len)) {
355					/* Invalid user pointer, pretend len is 0. */
356					dst_len = 0;
357				}
358			} else {
359				memcpy(dst, src, (int)src_len);
360			}
361			slot->len = dst_len;
362
363			dst_slots++;
364
365			/* Next destination slot. */
366			*j = nm_next(*j, lim);
367			slot = &ring->slot[*j];
368			dst = BDG_NMB(&dst_na->up, slot);
369
370			/* Next source slot. */
371			ft_p++;
372			src = ft_p->ft_buf;
373			dst_len = src_len = ft_p->ft_len;
374
375		}
376
377		/* Finalize (fold) the checksum if needed. */
378		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
379			*check = nm_csum_fold(csum);
380		}
381		ND(3, "using %u dst_slots", dst_slots);
382
383		/* A second pass on the desitations slots to set the slot flags,
384		 * using the right number of destination slots.
385		 */
386		while (j_start != *j) {
387			slot = &ring->slot[j_start];
388			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
389			j_start = nm_next(j_start, lim);
390		}
391		/* Clear NS_MOREFRAG flag on last entry. */
392		slot->flags = (dst_slots << 8);
393	}
394
395	/* Update howmany. */
396	if (unlikely(dst_slots > *howmany)) {
397		dst_slots = *howmany;
398		D("Slot allocation error: Should never happen");
399	}
400	*howmany -= dst_slots;
401}
402