1// SPDX-License-Identifier: GPL-2.0
2
3/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
4 *
5 * The test runs on two machines to exercise the NIC. For this reason it
6 * is not integrated in kselftests.
7 *
8 *     CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
9 *
10 * Rx:
11 *
12 * The sender sends packets with a known checksum field using PF_INET(6)
13 * SOCK_RAW sockets.
14 *
15 * good packet: $CMD [-t]
16 * bad packet:  $CMD [-t] -E
17 *
18 * The receiver reads UDP packets with a UDP socket. This is not an
19 * option for TCP packets ('-t'). Optionally insert an iptables filter
20 * to avoid these entering the real protocol stack.
21 *
22 * The receiver also reads all packets with a PF_PACKET socket, to
23 * observe whether both good and bad packets arrive on the host. And to
24 * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
25 * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
26 *
27 * Tx:
28 *
29 * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
30 * checksum offload.
31 *
32 * The sender can sends packets with a UDP socket.
33 *
34 * Optionally crafts a packet that sums up to zero to verify that the
35 * device writes negative zero 0xFFFF in this case to distinguish from
36 * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
37 * by choosing a specific source port.
38 *
39 * good packet: $CMD -U
40 * zero csum:   $CMD -U -Z
41 *
42 * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
43 * to cover more protocols. PF_PACKET requires passing src and dst mac
44 * addresses.
45 *
46 * good packet: $CMD -s $smac -d $dmac -p [-t]
47 *
48 * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
49 * to verify that the NIC passes these packets unmodified.
50 *
51 * Argument '-e' adds a transport mode encapsulation header between
52 * network and transport header. This will fail for devices that parse
53 *  headers. Should work on devices that implement protocol agnostic tx
54 * checksum offload (NETIF_F_HW_CSUM).
55 *
56 * Argument '-r $SEED' optionally randomizes header, payload and length
57 * to increase coverage between packets sent. SEED 1 further chooses a
58 * different seed for each run (and logs this for reproducibility). It
59 * is advised to enable this for extra coverage in continuous testing.
60 */
61
62#define _GNU_SOURCE
63
64#include <arpa/inet.h>
65#include <asm/byteorder.h>
66#include <errno.h>
67#include <error.h>
68#include <linux/filter.h>
69#include <linux/if_packet.h>
70#include <linux/ipv6.h>
71#include <linux/virtio_net.h>
72#include <net/ethernet.h>
73#include <net/if.h>
74#include <netinet/if_ether.h>
75#include <netinet/in.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78#include <netinet/tcp.h>
79#include <netinet/udp.h>
80#include <poll.h>
81#include <sched.h>
82#include <stdbool.h>
83#include <stddef.h>
84#include <stdint.h>
85#include <stdio.h>
86#include <stdlib.h>
87#include <string.h>
88#include <sys/socket.h>
89#include <sys/stat.h>
90#include <sys/time.h>
91#include <sys/types.h>
92#include <unistd.h>
93
94#include "kselftest.h"
95
96static bool cfg_bad_csum;
97static int cfg_family = PF_INET6;
98static int cfg_num_pkt = 4;
99static bool cfg_do_rx = true;
100static bool cfg_do_tx = true;
101static bool cfg_encap;
102static char *cfg_ifname = "eth0";
103static char *cfg_mac_dst;
104static char *cfg_mac_src;
105static int cfg_proto = IPPROTO_UDP;
106static int cfg_payload_char = 'a';
107static int cfg_payload_len = 100;
108static uint16_t cfg_port_dst = 34000;
109static uint16_t cfg_port_src = 33000;
110static uint16_t cfg_port_src_encap = 33001;
111static unsigned int cfg_random_seed;
112static int cfg_rcvbuf = 1 << 22;	/* be able to queue large cfg_num_pkt */
113static bool cfg_send_pfpacket;
114static bool cfg_send_udp;
115static int cfg_timeout_ms = 2000;
116static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
117static bool cfg_zero_sum;     /* create packet that adds up to zero */
118
119static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
120static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
121static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
122static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
123
124#define ENC_HEADER_LEN	(sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
125#define MAX_HEADER_LEN	(sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
126#define MAX_PAYLOAD_LEN 1024
127
128/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
129struct udp_encap_hdr {
130	uint8_t nexthdr;
131	uint8_t padding[3];
132};
133
134/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
135static void *iph_addr_p;
136
137static unsigned long gettimeofday_ms(void)
138{
139	struct timeval tv;
140
141	gettimeofday(&tv, NULL);
142	return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
143}
144
145static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
146{
147	uint16_t *words = (uint16_t *)data;
148	int i;
149
150	for (i = 0; i < len / 2; i++)
151		sum += words[i];
152
153	if (len & 1)
154		sum += ((unsigned char *)data)[len - 1];
155
156	return sum;
157}
158
159static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
160{
161	sum = checksum_nofold(data, len, sum);
162
163	while (sum > 0xFFFF)
164		sum = (sum & 0xFFFF) + (sum >> 16);
165
166	return ~sum;
167}
168
169static uint16_t checksum(void *th, uint16_t proto, size_t len)
170{
171	uint32_t sum;
172	int alen;
173
174	alen = cfg_family == PF_INET6 ? 32 : 8;
175
176	sum = checksum_nofold(iph_addr_p, alen, 0);
177	sum += htons(proto);
178	sum += htons(len);
179
180	/* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
181	if (cfg_do_tx && cfg_send_pfpacket)
182		return ~checksum_fold(NULL, 0, sum);
183	else
184		return checksum_fold(th, len, sum);
185}
186
187static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
188{
189	struct iphdr *iph = _iph;
190
191	memset(iph, 0, sizeof(*iph));
192
193	iph->version = 4;
194	iph->ihl = 5;
195	iph->ttl = 8;
196	iph->protocol = proto;
197	iph->saddr = cfg_saddr4.sin_addr.s_addr;
198	iph->daddr = cfg_daddr4.sin_addr.s_addr;
199	iph->tot_len = htons(sizeof(*iph) + len);
200	iph->check = checksum_fold(iph, sizeof(*iph), 0);
201
202	iph_addr_p = &iph->saddr;
203
204	return iph + 1;
205}
206
207static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
208{
209	struct ipv6hdr *ip6h = _ip6h;
210
211	memset(ip6h, 0, sizeof(*ip6h));
212
213	ip6h->version = 6;
214	ip6h->payload_len = htons(len);
215	ip6h->nexthdr = proto;
216	ip6h->hop_limit = 64;
217	ip6h->saddr = cfg_saddr6.sin6_addr;
218	ip6h->daddr = cfg_daddr6.sin6_addr;
219
220	iph_addr_p = &ip6h->saddr;
221
222	return ip6h + 1;
223}
224
225static void *build_packet_udp(void *_uh)
226{
227	struct udphdr *uh = _uh;
228
229	uh->source = htons(cfg_port_src);
230	uh->dest = htons(cfg_port_dst);
231	uh->len = htons(sizeof(*uh) + cfg_payload_len);
232	uh->check = 0;
233
234	/* choose source port so that uh->check adds up to zero */
235	if (cfg_zero_sum) {
236		uh->source = 0;
237		uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
238
239		fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
240			cfg_port_src, ntohs(uh->source));
241		cfg_port_src = ntohs(uh->source);
242	}
243
244	if (cfg_zero_disable)
245		uh->check = 0;
246	else
247		uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
248
249	if (cfg_bad_csum)
250		uh->check = ~uh->check;
251
252	fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
253	return uh + 1;
254}
255
256static void *build_packet_tcp(void *_th)
257{
258	struct tcphdr *th = _th;
259
260	th->source = htons(cfg_port_src);
261	th->dest = htons(cfg_port_dst);
262	th->doff = 5;
263	th->check = 0;
264
265	th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
266
267	if (cfg_bad_csum)
268		th->check = ~th->check;
269
270	fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
271	return th + 1;
272}
273
274static char *build_packet_udp_encap(void *_uh)
275{
276	struct udphdr *uh = _uh;
277	struct udp_encap_hdr *eh = _uh + sizeof(*uh);
278
279	/* outer dst == inner dst, to simplify BPF filter
280	 * outer src != inner src, to demultiplex on recv
281	 */
282	uh->dest = htons(cfg_port_dst);
283	uh->source = htons(cfg_port_src_encap);
284	uh->check = 0;
285	uh->len = htons(sizeof(*uh) +
286			sizeof(*eh) +
287			sizeof(struct tcphdr) +
288			cfg_payload_len);
289
290	eh->nexthdr = IPPROTO_TCP;
291
292	return build_packet_tcp(eh + 1);
293}
294
295static char *build_packet(char *buf, int max_len, int *len)
296{
297	uint8_t proto;
298	char *off;
299	int tlen;
300
301	if (cfg_random_seed) {
302		int *buf32 = (void *)buf;
303		int i;
304
305		for (i = 0; i < (max_len / sizeof(int)); i++)
306			buf32[i] = rand();
307	} else {
308		memset(buf, cfg_payload_char, max_len);
309	}
310
311	if (cfg_proto == IPPROTO_UDP)
312		tlen = sizeof(struct udphdr) + cfg_payload_len;
313	else
314		tlen = sizeof(struct tcphdr) + cfg_payload_len;
315
316	if (cfg_encap) {
317		proto = IPPROTO_UDP;
318		tlen += ENC_HEADER_LEN;
319	} else {
320		proto = cfg_proto;
321	}
322
323	if (cfg_family == PF_INET)
324		off = build_packet_ipv4(buf, proto, tlen);
325	else
326		off = build_packet_ipv6(buf, proto, tlen);
327
328	if (cfg_encap)
329		off = build_packet_udp_encap(off);
330	else if (cfg_proto == IPPROTO_UDP)
331		off = build_packet_udp(off);
332	else
333		off = build_packet_tcp(off);
334
335	/* only pass the payload, but still compute headers for cfg_zero_sum */
336	if (cfg_send_udp) {
337		*len = cfg_payload_len;
338		return off;
339	}
340
341	*len = off - buf + cfg_payload_len;
342	return buf;
343}
344
345static int open_inet(int ipproto, int protocol)
346{
347	int fd;
348
349	fd = socket(cfg_family, ipproto, protocol);
350	if (fd == -1)
351		error(1, errno, "socket inet");
352
353	if (cfg_family == PF_INET6) {
354		/* may have been updated by cfg_zero_sum */
355		cfg_saddr6.sin6_port = htons(cfg_port_src);
356
357		if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
358			error(1, errno, "bind dgram 6");
359		if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
360			error(1, errno, "connect dgram 6");
361	} else {
362		/* may have been updated by cfg_zero_sum */
363		cfg_saddr4.sin_port = htons(cfg_port_src);
364
365		if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
366			error(1, errno, "bind dgram 4");
367		if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
368			error(1, errno, "connect dgram 4");
369	}
370
371	return fd;
372}
373
374static int open_packet(void)
375{
376	int fd, one = 1;
377
378	fd = socket(PF_PACKET, SOCK_RAW, 0);
379	if (fd == -1)
380		error(1, errno, "socket packet");
381
382	if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
383		error(1, errno, "setsockopt packet_vnet_ndr");
384
385	return fd;
386}
387
388static void send_inet(int fd, const char *buf, int len)
389{
390	int ret;
391
392	ret = write(fd, buf, len);
393	if (ret == -1)
394		error(1, errno, "write");
395	if (ret != len)
396		error(1, 0, "write: %d", ret);
397}
398
399static void eth_str_to_addr(const char *str, unsigned char *eth)
400{
401	if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
402		   &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
403		error(1, 0, "cannot parse mac addr %s", str);
404}
405
406static void send_packet(int fd, const char *buf, int len)
407{
408	struct virtio_net_hdr vh = {0};
409	struct sockaddr_ll addr = {0};
410	struct msghdr msg = {0};
411	struct ethhdr eth;
412	struct iovec iov[3];
413	int ret;
414
415	addr.sll_family = AF_PACKET;
416	addr.sll_halen = ETH_ALEN;
417	addr.sll_ifindex = if_nametoindex(cfg_ifname);
418	if (!addr.sll_ifindex)
419		error(1, errno, "if_nametoindex %s", cfg_ifname);
420
421	vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
422	if (cfg_family == PF_INET6) {
423		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
424		addr.sll_protocol = htons(ETH_P_IPV6);
425	} else {
426		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
427		addr.sll_protocol = htons(ETH_P_IP);
428	}
429
430	if (cfg_encap)
431		vh.csum_start += ENC_HEADER_LEN;
432
433	if (cfg_proto == IPPROTO_TCP) {
434		vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
435		vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
436	} else {
437		vh.csum_offset = __builtin_offsetof(struct udphdr, check);
438		vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
439	}
440
441	eth_str_to_addr(cfg_mac_src, eth.h_source);
442	eth_str_to_addr(cfg_mac_dst, eth.h_dest);
443	eth.h_proto = addr.sll_protocol;
444
445	iov[0].iov_base = &vh;
446	iov[0].iov_len = sizeof(vh);
447
448	iov[1].iov_base = &eth;
449	iov[1].iov_len = sizeof(eth);
450
451	iov[2].iov_base = (void *)buf;
452	iov[2].iov_len = len;
453
454	msg.msg_iov = iov;
455	msg.msg_iovlen = ARRAY_SIZE(iov);
456
457	msg.msg_name = &addr;
458	msg.msg_namelen = sizeof(addr);
459
460	ret = sendmsg(fd, &msg, 0);
461	if (ret == -1)
462		error(1, errno, "sendmsg packet");
463	if (ret != sizeof(vh) + sizeof(eth) + len)
464		error(1, errno, "sendmsg packet: %u", ret);
465}
466
467static int recv_prepare_udp(void)
468{
469	int fd;
470
471	fd = socket(cfg_family, SOCK_DGRAM, 0);
472	if (fd == -1)
473		error(1, errno, "socket r");
474
475	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
476		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
477		error(1, errno, "setsockopt SO_RCVBUF r");
478
479	if (cfg_family == PF_INET6) {
480		if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
481			error(1, errno, "bind r");
482	} else {
483		if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
484			error(1, errno, "bind r");
485	}
486
487	return fd;
488}
489
490/* Filter out all traffic that is not cfg_proto with our destination port.
491 *
492 * Otherwise background noise may cause PF_PACKET receive queue overflow,
493 * dropping the expected packets and failing the test.
494 */
495static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
496{
497	struct sock_filter filter[] = {
498		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
499		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
500		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
501		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
502		BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
503		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
504		BPF_STMT(BPF_RET + BPF_K, 0),
505		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
506	};
507	struct sock_fprog prog = {};
508
509	prog.filter = filter;
510	prog.len = ARRAY_SIZE(filter);
511	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
512		error(1, errno, "setsockopt filter");
513}
514
515static void recv_prepare_packet_filter(int fd)
516{
517	const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
518
519	if (cfg_family == AF_INET)
520		__recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
521					     sizeof(struct iphdr) + off_dport);
522	else
523		__recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
524					     sizeof(struct ipv6hdr) + off_dport);
525}
526
527static void recv_prepare_packet_bind(int fd)
528{
529	struct sockaddr_ll laddr = {0};
530
531	laddr.sll_family = AF_PACKET;
532
533	if (cfg_family == PF_INET)
534		laddr.sll_protocol = htons(ETH_P_IP);
535	else
536		laddr.sll_protocol = htons(ETH_P_IPV6);
537
538	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
539	if (!laddr.sll_ifindex)
540		error(1, 0, "if_nametoindex %s", cfg_ifname);
541
542	if (bind(fd, (void *)&laddr, sizeof(laddr)))
543		error(1, errno, "bind pf_packet");
544}
545
546static int recv_prepare_packet(void)
547{
548	int fd, one = 1;
549
550	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
551	if (fd == -1)
552		error(1, errno, "socket p");
553
554	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
555		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
556		error(1, errno, "setsockopt SO_RCVBUF p");
557
558	/* enable auxdata to recv checksum status (valid vs unknown) */
559	if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
560		error(1, errno, "setsockopt auxdata");
561
562	/* install filter to restrict packet flow to match */
563	recv_prepare_packet_filter(fd);
564
565	/* bind to address family to start packet flow */
566	recv_prepare_packet_bind(fd);
567
568	return fd;
569}
570
571static int recv_udp(int fd)
572{
573	static char buf[MAX_PAYLOAD_LEN];
574	int ret, count = 0;
575
576	while (1) {
577		ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
578		if (ret == -1 && errno == EAGAIN)
579			break;
580		if (ret == -1)
581			error(1, errno, "recv r");
582
583		fprintf(stderr, "rx: udp: len=%u\n", ret);
584		count++;
585	}
586
587	return count;
588}
589
590static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
591{
592	uint16_t csum;
593
594	csum = checksum(th, cfg_proto, len);
595
596	fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
597		sport, len, csum_field, csum);
598
599	/* csum must be zero unless cfg_bad_csum indicates bad csum */
600	if (csum && !cfg_bad_csum) {
601		fprintf(stderr, "pkt: bad csum\n");
602		return 1;
603	} else if (cfg_bad_csum && !csum) {
604		fprintf(stderr, "pkt: good csum, while bad expected\n");
605		return 1;
606	}
607
608	if (cfg_zero_sum && csum_field != 0xFFFF) {
609		fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
610		return 1;
611	}
612
613	return 0;
614}
615
616static int recv_verify_packet_tcp(void *th, int len)
617{
618	struct tcphdr *tcph = th;
619
620	if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
621		return -1;
622
623	return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
624}
625
626static int recv_verify_packet_udp_encap(void *th, int len)
627{
628	struct udp_encap_hdr *eh = th;
629
630	if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
631		return -1;
632
633	return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
634}
635
636static int recv_verify_packet_udp(void *th, int len)
637{
638	struct udphdr *udph = th;
639
640	if (len < sizeof(*udph))
641		return -1;
642
643	if (udph->dest != htons(cfg_port_dst))
644		return -1;
645
646	if (udph->source == htons(cfg_port_src_encap))
647		return recv_verify_packet_udp_encap(udph + 1,
648						    len - sizeof(*udph));
649
650	return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
651}
652
653static int recv_verify_packet_ipv4(void *nh, int len)
654{
655	struct iphdr *iph = nh;
656	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
657
658	if (len < sizeof(*iph) || iph->protocol != proto)
659		return -1;
660
661	iph_addr_p = &iph->saddr;
662	if (proto == IPPROTO_TCP)
663		return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
664	else
665		return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
666}
667
668static int recv_verify_packet_ipv6(void *nh, int len)
669{
670	struct ipv6hdr *ip6h = nh;
671	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
672
673	if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
674		return -1;
675
676	iph_addr_p = &ip6h->saddr;
677
678	if (proto == IPPROTO_TCP)
679		return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
680	else
681		return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
682}
683
684/* return whether auxdata includes TP_STATUS_CSUM_VALID */
685static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
686{
687	struct tpacket_auxdata *aux = NULL;
688	struct cmsghdr *cm;
689
690	if (msg->msg_flags & MSG_CTRUNC)
691		error(1, 0, "cmsg: truncated");
692
693	for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
694		if (cm->cmsg_level != SOL_PACKET ||
695		    cm->cmsg_type != PACKET_AUXDATA)
696			error(1, 0, "cmsg: level=%d type=%d\n",
697			      cm->cmsg_level, cm->cmsg_type);
698
699		if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
700			error(1, 0, "cmsg: len=%lu expected=%lu",
701			      cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
702
703		aux = (void *)CMSG_DATA(cm);
704	}
705
706	if (!aux)
707		error(1, 0, "cmsg: no auxdata");
708
709	return aux->tp_status;
710}
711
712static int recv_packet(int fd)
713{
714	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
715	unsigned long total = 0, bad_csums = 0, bad_validations = 0;
716	char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
717	struct pkt *buf = (void *)_buf;
718	struct msghdr msg = {0};
719	uint32_t tp_status;
720	struct iovec iov;
721	int len, ret;
722
723	iov.iov_base = _buf;
724	iov.iov_len = sizeof(_buf);
725
726	msg.msg_iov = &iov;
727	msg.msg_iovlen = 1;
728
729	msg.msg_control = ctrl;
730	msg.msg_controllen = sizeof(ctrl);
731
732	while (1) {
733		msg.msg_flags = 0;
734
735		len = recvmsg(fd, &msg, MSG_DONTWAIT);
736		if (len == -1 && errno == EAGAIN)
737			break;
738		if (len == -1)
739			error(1, errno, "recv p");
740
741		tp_status = recv_get_packet_csum_status(&msg);
742
743		/* GRO might coalesce randomized packets. Such GSO packets are
744		 * then reinitialized for csum offload (CHECKSUM_PARTIAL), with
745		 * a pseudo csum. Do not try to validate these checksums.
746		 */
747		if (tp_status & TP_STATUS_CSUMNOTREADY) {
748			fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n");
749			continue;
750		}
751
752		if (cfg_family == PF_INET6)
753			ret = recv_verify_packet_ipv6(buf, len);
754		else
755			ret = recv_verify_packet_ipv4(buf, len);
756
757		if (ret == -1 /* skip: non-matching */)
758			continue;
759
760		total++;
761		if (ret == 1)
762			bad_csums++;
763
764		/* Fail if kernel returns valid for known bad csum.
765		 * Do not fail if kernel does not validate a good csum:
766		 * Absence of validation does not imply invalid.
767		 */
768		if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) {
769			fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
770			bad_validations++;
771		}
772	}
773
774	if (bad_csums || bad_validations)
775		error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
776		      total, bad_csums, bad_validations);
777
778	return total;
779}
780
781static void parse_args(int argc, char *const argv[])
782{
783	const char *daddr = NULL, *saddr = NULL;
784	int c;
785
786	while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
787		switch (c) {
788		case '4':
789			cfg_family = PF_INET;
790			break;
791		case '6':
792			cfg_family = PF_INET6;
793			break;
794		case 'd':
795			cfg_mac_dst = optarg;
796			break;
797		case 'D':
798			daddr = optarg;
799			break;
800		case 'e':
801			cfg_encap = true;
802			break;
803		case 'E':
804			cfg_bad_csum = true;
805			break;
806		case 'i':
807			cfg_ifname = optarg;
808			break;
809		case 'l':
810			cfg_payload_len = strtol(optarg, NULL, 0);
811			break;
812		case 'L':
813			cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
814			break;
815		case 'n':
816			cfg_num_pkt = strtol(optarg, NULL, 0);
817			break;
818		case 'r':
819			cfg_random_seed = strtol(optarg, NULL, 0);
820			break;
821		case 'P':
822			cfg_send_pfpacket = true;
823			break;
824		case 'R':
825			/* only Rx: used with two machine tests */
826			cfg_do_tx = false;
827			break;
828		case 's':
829			cfg_mac_src = optarg;
830			break;
831		case 'S':
832			saddr = optarg;
833			break;
834		case 't':
835			cfg_proto = IPPROTO_TCP;
836			break;
837		case 'T':
838			/* only Tx: used with two machine tests */
839			cfg_do_rx = false;
840			break;
841		case 'u':
842			cfg_proto = IPPROTO_UDP;
843			break;
844		case 'U':
845			/* send using real udp socket,
846			 * to exercise tx checksum offload
847			 */
848			cfg_send_udp = true;
849			break;
850		case 'z':
851			cfg_zero_disable = true;
852			break;
853		case 'Z':
854			cfg_zero_sum = true;
855			break;
856		default:
857			error(1, 0, "unknown arg %c", c);
858		}
859	}
860
861	if (!daddr || !saddr)
862		error(1, 0, "Must pass -D <daddr> and -S <saddr>");
863
864	if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
865		error(1, 0, "Transmit with pf_packet requires mac addresses");
866
867	if (cfg_payload_len > MAX_PAYLOAD_LEN)
868		error(1, 0, "Payload length exceeds max");
869
870	if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
871		error(1, 0, "Only UDP supports zero csum");
872
873	if (cfg_zero_sum && !cfg_send_udp)
874		error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
875	if (cfg_zero_sum && cfg_bad_csum)
876		error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
877	if (cfg_zero_sum && cfg_random_seed)
878		error(1, 0, "Cannot combine zero checksum conversion with randomization");
879
880	if (cfg_family == PF_INET6) {
881		cfg_saddr6.sin6_port = htons(cfg_port_src);
882		cfg_daddr6.sin6_port = htons(cfg_port_dst);
883
884		if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
885			error(1, errno, "Cannot parse ipv6 -D");
886		if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
887			error(1, errno, "Cannot parse ipv6 -S");
888	} else {
889		cfg_saddr4.sin_port = htons(cfg_port_src);
890		cfg_daddr4.sin_port = htons(cfg_port_dst);
891
892		if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
893			error(1, errno, "Cannot parse ipv4 -D");
894		if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
895			error(1, errno, "Cannot parse ipv4 -S");
896	}
897
898	if (cfg_do_tx && cfg_random_seed) {
899		/* special case: time-based seed */
900		if (cfg_random_seed == 1)
901			cfg_random_seed = (unsigned int)gettimeofday_ms();
902		srand(cfg_random_seed);
903		fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
904	}
905}
906
907static void do_tx(void)
908{
909	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
910	char *buf;
911	int fd, len, i;
912
913	buf = build_packet(_buf, sizeof(_buf), &len);
914
915	if (cfg_send_pfpacket)
916		fd = open_packet();
917	else if (cfg_send_udp)
918		fd = open_inet(SOCK_DGRAM, 0);
919	else
920		fd = open_inet(SOCK_RAW, IPPROTO_RAW);
921
922	for (i = 0; i < cfg_num_pkt; i++) {
923		if (cfg_send_pfpacket)
924			send_packet(fd, buf, len);
925		else
926			send_inet(fd, buf, len);
927
928		/* randomize each packet individually to increase coverage */
929		if (cfg_random_seed) {
930			cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
931			buf = build_packet(_buf, sizeof(_buf), &len);
932		}
933	}
934
935	if (close(fd))
936		error(1, errno, "close tx");
937}
938
939static void do_rx(int fdp, int fdr)
940{
941	unsigned long count_udp = 0, count_pkt = 0;
942	long tleft, tstop;
943	struct pollfd pfd;
944
945	tstop = gettimeofday_ms() + cfg_timeout_ms;
946	tleft = cfg_timeout_ms;
947
948	do {
949		pfd.events = POLLIN;
950		pfd.fd = fdp;
951		if (poll(&pfd, 1, tleft) == -1)
952			error(1, errno, "poll");
953
954		if (pfd.revents & POLLIN)
955			count_pkt += recv_packet(fdp);
956
957		if (cfg_proto == IPPROTO_UDP)
958			count_udp += recv_udp(fdr);
959
960		tleft = tstop - gettimeofday_ms();
961	} while (tleft > 0);
962
963	if (close(fdr))
964		error(1, errno, "close r");
965	if (close(fdp))
966		error(1, errno, "close p");
967
968	if (count_pkt < cfg_num_pkt)
969		error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
970		      count_pkt, cfg_num_pkt);
971
972	if (cfg_proto == IPPROTO_UDP) {
973		if (cfg_bad_csum && count_udp)
974			error(1, 0, "rx: unexpected packets at udp");
975		if (!cfg_bad_csum && !count_udp)
976			error(1, 0, "rx: missing packets at udp");
977	}
978}
979
980int main(int argc, char *const argv[])
981{
982	int fdp = -1, fdr = -1;		/* -1 to silence -Wmaybe-uninitialized */
983
984	parse_args(argc, argv);
985
986	/* open receive sockets before transmitting */
987	if (cfg_do_rx) {
988		fdp = recv_prepare_packet();
989		fdr = recv_prepare_udp();
990	}
991
992	if (cfg_do_tx)
993		do_tx();
994
995	if (cfg_do_rx)
996		do_rx(fdp, fdr);
997
998	fprintf(stderr, "OK\n");
999	return 0;
1000}
1001