1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29#include "opt_inet.h"
30#include "opt_inet6.h"
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/rmlock.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/kernel.h>
39#include <sys/socket.h>
40#include <sys/sysctl.h>
41#include <net/vnet.h>
42
43#include <net/if.h>
44#include <net/if_var.h>
45
46#include <netinet/in.h>
47#include <netinet/in_fib.h>
48#include <netinet/ip.h>
49
50#include <netinet6/in6_fib.h>
51
52#include <net/route.h>
53#include <net/route/nhop.h>
54#include <net/route/route_ctl.h>
55#include <net/route/route_var.h>
56#include <net/route/fib_algo.h>
57
58#define	CHUNK_SIZE	10000
59
60VNET_DEFINE_STATIC(struct in_addr *, inet_addr_list);
61#define	V_inet_addr_list	VNET(inet_addr_list)
62VNET_DEFINE_STATIC(int, inet_list_size);
63#define	V_inet_list_size	VNET(inet_list_size)
64
65VNET_DEFINE_STATIC(struct in6_addr *, inet6_addr_list);
66#define	V_inet6_addr_list	VNET(inet6_addr_list)
67VNET_DEFINE_STATIC(int, inet6_list_size);
68#define	V_inet6_list_size	VNET(inet6_list_size)
69
70SYSCTL_DECL(_net_route);
71SYSCTL_NODE(_net_route, OID_AUTO, test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
72    "Route algorithm lookups");
73
74static int
75add_addr(int family, char *addr_str)
76{
77
78	if (family == AF_INET) {
79		struct in_addr *paddr_old = V_inet_addr_list;
80		int size_old = V_inet_list_size;
81		struct in_addr addr;
82
83		if (inet_pton(AF_INET, addr_str, &addr) != 1)
84			return (EINVAL);
85
86		struct in_addr *paddr = mallocarray(size_old + 1,
87		    sizeof(struct in_addr), M_TEMP, M_ZERO | M_WAITOK);
88
89		if (paddr_old != NULL) {
90			memcpy(paddr, paddr_old, size_old * sizeof(struct in_addr));
91			free(paddr_old, M_TEMP);
92		}
93		paddr[size_old] = addr;
94
95		V_inet_addr_list = paddr;
96		V_inet_list_size = size_old + 1;
97		inet_ntop(AF_INET, &addr, addr_str, sizeof(addr_str));
98	} else if (family == AF_INET6) {
99		struct in6_addr *paddr_old = V_inet6_addr_list;
100		int size_old = V_inet6_list_size;
101		struct in6_addr addr6;
102
103		if (inet_pton(AF_INET6, addr_str, &addr6) != 1)
104			return (EINVAL);
105
106		struct in6_addr *paddr = mallocarray(size_old + 1,
107		    sizeof(struct in6_addr), M_TEMP, M_ZERO | M_WAITOK);
108
109		if (paddr_old != NULL) {
110			memcpy(paddr, paddr_old, size_old * sizeof(struct in6_addr));
111			free(paddr_old, M_TEMP);
112		}
113		paddr[size_old] = addr6;
114
115		V_inet6_addr_list = paddr;
116		V_inet6_list_size = size_old + 1;
117		inet_ntop(AF_INET6, &addr6, addr_str, sizeof(addr_str));
118	}
119
120	return (0);
121}
122
123static int
124add_addr_sysctl_handler(struct sysctl_oid *oidp, struct sysctl_req *req, int family)
125{
126	char addr_str[INET6_ADDRSTRLEN];
127	int error;
128
129	bzero(addr_str, sizeof(addr_str));
130
131	error = sysctl_handle_string(oidp, addr_str, sizeof(addr_str), req);
132	if (error != 0 || req->newptr == NULL)
133		return (error);
134
135	error = add_addr(family, addr_str);
136
137	return (0);
138}
139
140static int
141add_inet_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
142{
143
144	return (add_addr_sysctl_handler(oidp, req, AF_INET));
145}
146SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet_addr,
147    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
148    add_inet_addr_sysctl_handler, "A", "Set");
149
150static int
151add_inet6_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
152{
153
154	return (add_addr_sysctl_handler(oidp, req, AF_INET6));
155}
156SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet6_addr,
157    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
158    add_inet6_addr_sysctl_handler, "A", "Set");
159
160static uint64_t
161run_test_inet_one_pass(uint32_t fibnum)
162{
163	/* Assume epoch */
164	int sz = V_inet_list_size;
165	int tries = CHUNK_SIZE / sz;
166	const struct in_addr *a = V_inet_addr_list;
167	uint64_t count = 0;
168
169	for (int pass = 0; pass < tries; pass++) {
170		for (int i = 0; i < sz; i++) {
171			fib4_lookup(fibnum, a[i], 0, NHR_NONE, 0);
172			count++;
173		}
174	}
175	return (count);
176}
177
178static int
179run_test_inet(SYSCTL_HANDLER_ARGS)
180{
181	struct epoch_tracker et;
182
183	int count = 0;
184	int error = sysctl_handle_int(oidp, &count, 0, req);
185	if (error != 0)
186		return (error);
187
188	if (count == 0)
189		return (0);
190
191	if (V_inet_list_size <= 0)
192		return (ENOENT);
193
194	printf("run: %d packets vnet %p\n", count, curvnet);
195	if (count < CHUNK_SIZE)
196		count = CHUNK_SIZE;
197
198	struct timespec ts_pre, ts_post;
199	int64_t pass_diff, total_diff = 0;
200	uint64_t pass_packets, total_packets = 0;
201	uint32_t fibnum = curthread->td_proc->p_fibnum;
202
203	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
204		NET_EPOCH_ENTER(et);
205		nanouptime(&ts_pre);
206		pass_packets = run_test_inet_one_pass(fibnum);
207		nanouptime(&ts_post);
208		NET_EPOCH_EXIT(et);
209
210		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
211		    (ts_post.tv_nsec - ts_pre.tv_nsec);
212		total_diff += pass_diff;
213		total_packets += pass_packets;
214	}
215
216	printf("%zu packets in %zu nanoseconds, %zu pps\n",
217	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
218
219	return (0);
220}
221SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet,
222    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
223    0, 0, run_test_inet, "I", "Execute fib4_lookup test");
224
225static uint64_t
226run_test_inet6_one_pass(uint32_t fibnum)
227{
228	/* Assume epoch */
229	int sz = V_inet6_list_size;
230	int tries = CHUNK_SIZE / sz;
231	const struct in6_addr *a = V_inet6_addr_list;
232	uint64_t count = 0;
233
234	for (int pass = 0; pass < tries; pass++) {
235		for (int i = 0; i < sz; i++) {
236			fib6_lookup(fibnum, &a[i], 0, NHR_NONE, 0);
237			count++;
238		}
239	}
240	return (count);
241}
242
243static int
244run_test_inet6(SYSCTL_HANDLER_ARGS)
245{
246	struct epoch_tracker et;
247
248	int count = 0;
249	int error = sysctl_handle_int(oidp, &count, 0, req);
250	if (error != 0)
251		return (error);
252
253	if (count == 0)
254		return (0);
255
256	if (V_inet6_list_size <= 0)
257		return (ENOENT);
258
259	printf("run: %d packets vnet %p\n", count, curvnet);
260	if (count < CHUNK_SIZE)
261		count = CHUNK_SIZE;
262
263	struct timespec ts_pre, ts_post;
264	int64_t pass_diff, total_diff = 0;
265	uint64_t pass_packets, total_packets = 0;
266	uint32_t fibnum = curthread->td_proc->p_fibnum;
267
268	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
269		NET_EPOCH_ENTER(et);
270		nanouptime(&ts_pre);
271		pass_packets = run_test_inet6_one_pass(fibnum);
272		nanouptime(&ts_post);
273		NET_EPOCH_EXIT(et);
274
275		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
276		    (ts_post.tv_nsec - ts_pre.tv_nsec);
277		total_diff += pass_diff;
278		total_packets += pass_packets;
279	}
280
281	printf("%zu packets in %zu nanoseconds, %zu pps\n",
282	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
283
284	return (0);
285}
286SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6,
287    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
288    0, 0, run_test_inet6, "I", "Execute fib6_lookup test");
289
290static bool
291cmp_dst(uint32_t fibnum, struct in_addr a)
292{
293	struct nhop_object *nh_fib;
294	struct rtentry *rt;
295	struct route_nhop_data rnd = {};
296
297	nh_fib = fib4_lookup(fibnum, a, 0, NHR_NONE, 0);
298	rt = fib4_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
299
300	if (nh_fib == NULL && rt == NULL) {
301		return (true);
302	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
303		return (true);
304	}
305
306	struct in_addr dst;
307	int plen;
308	uint32_t scopeid;
309	char key_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
310
311	inet_ntop(AF_INET, &a, key_str, sizeof(key_str));
312	if (rnd.rnd_nhop == NULL) {
313		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
314		    key_str, nhop_get_idx(nh_fib));
315	} else {
316		rt_get_inet_prefix_plen(rt, &dst, &plen, &scopeid);
317		inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
318		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
319		    key_str, dst_str, plen,
320		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
321		    nh_fib ? nhop_get_idx(nh_fib) : 0);
322	}
323
324	return (false);
325}
326
327static bool
328cmp_dst6(uint32_t fibnum, const struct in6_addr *a)
329{
330	struct nhop_object *nh_fib;
331	struct rtentry *rt;
332	struct route_nhop_data rnd = {};
333
334	nh_fib = fib6_lookup(fibnum, a, 0, NHR_NONE, 0);
335	rt = fib6_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
336
337	if (nh_fib == NULL && rt == NULL) {
338		return (true);
339	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
340		return (true);
341	}
342
343	struct in6_addr dst;
344	int plen;
345	uint32_t scopeid;
346	char key_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
347
348	inet_ntop(AF_INET6, a, key_str, sizeof(key_str));
349	if (rnd.rnd_nhop == NULL) {
350		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
351		    key_str, nhop_get_idx(nh_fib));
352	} else {
353		rt_get_inet6_prefix_plen(rt, &dst, &plen, &scopeid);
354		inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
355		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
356		    key_str, dst_str, plen,
357		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
358		    nh_fib ? nhop_get_idx(nh_fib) : 0);
359	}
360
361	return (false);
362}
363
364/* Random lookups: correctness verification */
365static uint64_t
366run_test_inet_one_pass_random(uint32_t fibnum)
367{
368	/* Assume epoch */
369	struct in_addr a[64];
370	int sz = 64;
371	uint64_t count = 0;
372
373	for (int pass = 0; pass < CHUNK_SIZE / sz; pass++) {
374		arc4random_buf(a, sizeof(a));
375		for (int i = 0; i < sz; i++) {
376			if (!cmp_dst(fibnum, a[i]))
377				return (0);
378			count++;
379		}
380	}
381	return (count);
382}
383
384static int
385run_test_inet_random(SYSCTL_HANDLER_ARGS)
386{
387	struct epoch_tracker et;
388
389	int count = 0;
390	int error = sysctl_handle_int(oidp, &count, 0, req);
391	if (error != 0)
392		return (error);
393
394	if (count == 0)
395		return (0);
396
397	if (count < CHUNK_SIZE)
398		count = CHUNK_SIZE;
399
400	struct timespec ts_pre, ts_post;
401	int64_t pass_diff, total_diff = 1;
402	uint64_t pass_packets, total_packets = 0;
403	uint32_t fibnum = curthread->td_proc->p_fibnum;
404
405	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
406		NET_EPOCH_ENTER(et);
407		nanouptime(&ts_pre);
408		pass_packets = run_test_inet_one_pass_random(fibnum);
409		nanouptime(&ts_post);
410		NET_EPOCH_EXIT(et);
411
412		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
413		    (ts_post.tv_nsec - ts_pre.tv_nsec);
414		total_diff += pass_diff;
415		total_packets += pass_packets;
416
417		if (pass_packets == 0)
418			break;
419	}
420
421	/* Signal error to userland */
422	if (pass_packets == 0)
423		return (EINVAL);
424
425	printf("%zu packets in %zu nanoseconds, %zu pps\n",
426	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
427
428	return (0);
429}
430SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_random,
431    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
432    0, 0, run_test_inet_random, "I", "Execute fib4_lookup random check tests");
433
434
435struct inet_array {
436	uint32_t alloc_items;
437	uint32_t num_items;
438	uint32_t rnh_prefixes;
439	int error;
440	struct in_addr *arr;
441};
442
443/*
444 * For each prefix, add the following records to the lookup array:
445 * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
446 */
447static int
448add_prefix(struct rtentry *rt, void *_data)
449{
450	struct inet_array *pa = (struct inet_array *)_data;
451	struct in_addr addr;
452	int plen;
453	uint32_t scopeid, haddr;
454
455	pa->rnh_prefixes++;
456
457	if (pa->num_items + 5 >= pa->alloc_items) {
458		if (pa->error == 0)
459			pa->error = ENOSPC;
460		return (0);
461	}
462
463	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
464
465	pa->arr[pa->num_items++] = addr;
466	haddr = ntohl(addr.s_addr);
467	if (haddr > 0) {
468		pa->arr[pa->num_items++].s_addr = htonl(haddr - 1);
469		pa->arr[pa->num_items++].s_addr = htonl(haddr + 1);
470		/* assume mask != 0 */
471		uint32_t mlen = (1 << (32 - plen)) - 1;
472		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen);
473		/* can overflow, but who cares */
474		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen + 1);
475	}
476
477	return (0);
478}
479
480static bool
481prepare_list(uint32_t fibnum, struct inet_array *pa)
482{
483	struct rib_head *rh;
484
485	rh = rt_tables_get_rnh(fibnum, AF_INET);
486
487	uint32_t num_prefixes = rh->rnh_prefixes;
488	bzero(pa, sizeof(struct inet_array));
489	pa->alloc_items = (num_prefixes + 10) * 5;
490	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in_addr),
491	    M_TEMP, M_ZERO | M_WAITOK);
492
493	rib_walk(fibnum, AF_INET, false, add_prefix, pa);
494
495	if (pa->error != 0) {
496		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
497		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
498	}
499
500	return (pa->error == 0);
501}
502
503static int
504run_test_inet_scan(SYSCTL_HANDLER_ARGS)
505{
506	struct epoch_tracker et;
507
508	int count = 0;
509	int error = sysctl_handle_int(oidp, &count, 0, req);
510	if (error != 0)
511		return (error);
512
513	if (count == 0)
514		return (0);
515
516	struct inet_array pa = {};
517	uint32_t fibnum = curthread->td_proc->p_fibnum;
518
519	if (!prepare_list(fibnum, &pa))
520		return (pa.error);
521
522	struct timespec ts_pre, ts_post;
523	int64_t total_diff = 1;
524	uint64_t total_packets = 0;
525	int failure_count = 0;
526
527	NET_EPOCH_ENTER(et);
528	nanouptime(&ts_pre);
529	for (int i = 0; i < pa.num_items; i++) {
530		if (!cmp_dst(fibnum, pa.arr[i])) {
531			failure_count++;
532		}
533		total_packets++;
534	}
535	nanouptime(&ts_post);
536	NET_EPOCH_EXIT(et);
537
538	if (pa.arr != NULL)
539		free(pa.arr, M_TEMP);
540
541	/* Signal error to userland */
542	if (failure_count > 0) {
543		printf("[RT ERROR] total failures: %d\n", failure_count);
544		return (EINVAL);
545	}
546
547	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
548	    (ts_post.tv_nsec - ts_pre.tv_nsec);
549	printf("%zu packets in %zu nanoseconds, %zu pps\n",
550	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
551
552	return (0);
553}
554SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_scan,
555    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
556    0, 0, run_test_inet_scan, "I", "Execute fib4_lookup scan tests");
557
558struct inet6_array {
559	uint32_t alloc_items;
560	uint32_t num_items;
561	uint32_t rnh_prefixes;
562	int error;
563	struct in6_addr *arr;
564};
565
566static bool
567safe_add(uint32_t *v, uint32_t inc)
568{
569	if (*v < (UINT32_MAX - inc)) {
570		*v += inc;
571		return (true);
572	} else {
573		*v -= (UINT32_MAX - inc + 1);
574		return (false);
575	}
576}
577
578static bool
579safe_dec(uint32_t *v, uint32_t inc)
580{
581	if (*v >= inc) {
582		*v -= inc;
583		return (true);
584	} else {
585		*v += (UINT32_MAX - inc + 1);
586		return (false);
587	}
588}
589
590static void
591inc_prefix6(struct in6_addr *addr, int inc)
592{
593	for (int i = 0; i < 4; i++) {
594		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
595		bool ret = safe_add(&v, inc);
596		addr->s6_addr32[3 - i] = htonl(v);
597		if (ret)
598			return;
599		inc = 1;
600	}
601}
602
603static void
604dec_prefix6(struct in6_addr *addr, int dec)
605{
606	for (int i = 0; i < 4; i++) {
607		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
608		bool ret = safe_dec(&v, dec);
609		addr->s6_addr32[3 - i] = htonl(v);
610		if (ret)
611			return;
612		dec = 1;
613	}
614}
615
616static void
617ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
618{
619	uint32_t *cp;
620
621	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
622		*cp++ = 0xFFFFFFFF;
623	if (mask > 0)
624		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
625}
626
627/*
628 * For each prefix, add the following records to the lookup array:
629 * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
630 */
631static int
632add_prefix6(struct rtentry *rt, void *_data)
633{
634	struct inet6_array *pa = (struct inet6_array *)_data;
635	struct in6_addr addr, naddr;
636	int plen;
637	uint32_t scopeid;
638
639	pa->rnh_prefixes++;
640
641	if (pa->num_items + 5 >= pa->alloc_items) {
642		if (pa->error == 0)
643			pa->error = ENOSPC;
644		return (0);
645	}
646
647	rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
648
649	pa->arr[pa->num_items++] = addr;
650	if (!IN6_ARE_ADDR_EQUAL(&addr, &in6addr_any)) {
651		naddr = addr;
652		dec_prefix6(&naddr, 1);
653		pa->arr[pa->num_items++] = naddr;
654		naddr = addr;
655		inc_prefix6(&naddr, 1);
656		pa->arr[pa->num_items++] = naddr;
657
658		/* assume mask != 0 */
659		struct in6_addr mask6;
660		ipv6_writemask(&mask6, plen);
661		naddr = addr;
662		for (int i = 0; i < 3; i++)
663			naddr.s6_addr32[i] = htonl(ntohl(naddr.s6_addr32[i]) | ~ntohl(mask6.s6_addr32[i]));
664
665		pa->arr[pa->num_items++] = naddr;
666		inc_prefix6(&naddr, 1);
667		pa->arr[pa->num_items++] = naddr;
668	}
669
670	return (0);
671}
672
673static bool
674prepare_list6(uint32_t fibnum, struct inet6_array *pa)
675{
676	struct rib_head *rh;
677
678	rh = rt_tables_get_rnh(fibnum, AF_INET6);
679
680	uint32_t num_prefixes = rh->rnh_prefixes;
681	bzero(pa, sizeof(struct inet6_array));
682	pa->alloc_items = (num_prefixes + 10) * 5;
683	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in6_addr),
684	    M_TEMP, M_ZERO | M_WAITOK);
685
686	rib_walk(fibnum, AF_INET6, false, add_prefix6, pa);
687
688	if (pa->error != 0) {
689		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
690		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
691	}
692
693	return (pa->error == 0);
694}
695
696static int
697run_test_inet6_scan(SYSCTL_HANDLER_ARGS)
698{
699	struct epoch_tracker et;
700
701	int count = 0;
702	int error = sysctl_handle_int(oidp, &count, 0, req);
703	if (error != 0)
704		return (error);
705
706	if (count == 0)
707		return (0);
708
709	struct inet6_array pa = {};
710	uint32_t fibnum = curthread->td_proc->p_fibnum;
711
712	if (!prepare_list6(fibnum, &pa))
713		return (pa.error);
714
715	struct timespec ts_pre, ts_post;
716	int64_t total_diff = 1;
717	uint64_t total_packets = 0;
718	int failure_count = 0;
719
720	NET_EPOCH_ENTER(et);
721	nanouptime(&ts_pre);
722	for (int i = 0; i < pa.num_items; i++) {
723		if (!cmp_dst6(fibnum, &pa.arr[i])) {
724			failure_count++;
725		}
726		total_packets++;
727	}
728	nanouptime(&ts_post);
729	NET_EPOCH_EXIT(et);
730
731	if (pa.arr != NULL)
732		free(pa.arr, M_TEMP);
733
734	/* Signal error to userland */
735	if (failure_count > 0) {
736		printf("[RT ERROR] total failures: %d\n", failure_count);
737		return (EINVAL);
738	}
739
740	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
741	    (ts_post.tv_nsec - ts_pre.tv_nsec);
742	printf("%zu packets in %zu nanoseconds, %zu pps\n",
743	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
744
745	return (0);
746}
747SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6_scan,
748    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
749    0, 0, run_test_inet6_scan, "I", "Execute fib6_lookup scan tests");
750
751#define	LPS_SEQ		0x1
752#define	LPS_ANN		0x2
753#define	LPS_REP		0x4
754
755struct lps_walk_state {
756	uint32_t *keys;
757	int pos;
758	int lim;
759};
760
761static int
762reduce_keys(struct rtentry *rt, void *_data)
763{
764        struct lps_walk_state *wa = (struct lps_walk_state *) _data;
765	struct in_addr addr;
766	uint32_t scopeid;
767	int plen;
768
769	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
770	wa->keys[wa->pos] = ntohl(addr.s_addr) |
771	    (wa->keys[wa->pos] & ~(0xffffffffU << (32 - plen)));
772
773	wa->pos++;
774	return (wa->pos == wa->lim);
775}
776
777static int
778rnd_lps(SYSCTL_HANDLER_ARGS)
779{
780	struct epoch_tracker et;
781	struct in_addr key;
782	struct lps_walk_state wa;
783	struct timespec ts_pre, ts_post;
784	struct nhop_object *nh_fib;
785	uint64_t total_diff, lps;
786	uint32_t *keys, fibnum;
787	uint32_t t, p;
788	uintptr_t acc = 0;
789	int i, pos, count = 0;
790	int seq = 0, rep = 0;
791	int error;
792
793	error = sysctl_handle_int(oidp, &count, 0, req);
794	if (error != 0)
795		return (error);
796	if (count <= 0)
797		return (0);
798	fibnum = curthread->td_proc->p_fibnum;
799
800	keys = malloc(sizeof(*keys) * count, M_TEMP, M_NOWAIT);
801	if (keys == NULL)
802		return (ENOMEM);
803	printf("Preparing %d random keys...\n", count);
804	arc4random_buf(keys, sizeof(*keys) * count);
805	if (arg2 & LPS_ANN) {
806		wa.keys = keys;
807		wa.pos = 0;
808		wa.lim = count;
809		printf("Reducing keys to announced address space...\n");
810		do {
811			rib_walk(fibnum, AF_INET, false, reduce_keys,
812			    &wa);
813		} while (wa.pos < wa.lim);
814		printf("Reshuffling keys...\n");
815		for (int i = 0; i < count; i++) {
816			p = random() % count;
817			t = keys[p];
818			keys[p] = keys[i];
819			keys[i] = t;
820		}
821	}
822
823	if (arg2 & LPS_REP) {
824		rep = 1;
825		printf("REP ");
826	}
827	if (arg2 & LPS_SEQ) {
828		seq = 1;
829		printf("SEQ");
830	} else if (arg2 & LPS_ANN)
831		printf("ANN");
832	else
833		printf("RND");
834	printf(" LPS test starting...\n");
835
836	NET_EPOCH_ENTER(et);
837	nanouptime(&ts_pre);
838	for (i = 0, pos = 0; i < count; i++) {
839		key.s_addr = keys[pos++] ^ ((acc >> 10) & 0xff);
840		nh_fib = fib4_lookup(fibnum, key, 0, NHR_NONE, 0);
841		if (seq) {
842			if (nh_fib != NULL) {
843				acc += (uintptr_t) nh_fib + 123;
844				if (acc & 0x1000)
845					acc += (uintptr_t) nh_fib->nh_ifp;
846				else
847					acc -= (uintptr_t) nh_fib->nh_ifp;
848			} else
849				acc ^= (acc >> 3) + (acc << 2) + i;
850			if (acc & 0x800)
851				pos++;
852			if (pos >= count)
853				pos = 0;
854		}
855		if (rep && ((i & 0xf) == 0xf)) {
856			pos -= 0xf;
857			if (pos < 0)
858				pos += 0xf;
859		}
860	}
861	nanouptime(&ts_post);
862	NET_EPOCH_EXIT(et);
863
864	free(keys, M_TEMP);
865
866	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
867	    (ts_post.tv_nsec - ts_pre.tv_nsec);
868	lps = 1000000000ULL * count / total_diff;
869	printf("%d lookups in %zu.%06zu milliseconds, %lu.%06lu MLPS\n",
870	    count, total_diff / 1000000, total_diff % 1000000,
871	    lps / 1000000, lps % 1000000);
872
873	return (0);
874}
875SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd,
876    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
877    0, 0, rnd_lps, "I",
878    "Measure lookups per second, uniformly random keys, independent lookups");
879SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann,
880    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
881    0, LPS_ANN, rnd_lps, "I",
882    "Measure lookups per second, random keys from announced address space, "
883    "independent lookups");
884SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq,
885    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
886    0, LPS_SEQ, rnd_lps, "I",
887    "Measure lookups per second, uniformly random keys, "
888    "artificial dependencies between lookups");
889SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann,
890    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
891    0, LPS_SEQ | LPS_ANN, rnd_lps, "I",
892    "Measure lookups per second, random keys from announced address space, "
893    "artificial dependencies between lookups");
894SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_rep,
895    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
896    0, LPS_REP, rnd_lps, "I",
897    "Measure lookups per second, uniformly random keys, independent lookups, "
898    "repeated keys");
899SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann_rep,
900    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
901    0, LPS_ANN | LPS_REP, rnd_lps, "I",
902    "Measure lookups per second, random keys from announced address space, "
903    "independent lookups, repeated keys");
904SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_rep,
905    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
906    0, LPS_SEQ | LPS_REP, rnd_lps, "I",
907    "Measure lookups per second, uniformly random keys, "
908    "artificial dependencies between lookups, repeated keys");
909SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann_rep,
910    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
911    0, LPS_SEQ | LPS_ANN | LPS_REP, rnd_lps, "I",
912    "Measure lookups per second, random keys from announced address space, "
913    "artificial dependencies between lookups, repeated keys");
914
915static int
916test_fib_lookup_modevent(module_t mod, int type, void *unused)
917{
918	int error = 0;
919
920	switch (type) {
921	case MOD_LOAD:
922		break;
923	case MOD_UNLOAD:
924		if (V_inet_addr_list != NULL)
925			free(V_inet_addr_list, M_TEMP);
926		if (V_inet6_addr_list != NULL)
927			free(V_inet6_addr_list, M_TEMP);
928		break;
929	default:
930		error = EOPNOTSUPP;
931		break;
932	}
933	return (error);
934}
935
936static moduledata_t testfiblookupmod = {
937        "test_fib_lookup",
938        test_fib_lookup_modevent,
939        0
940};
941
942DECLARE_MODULE(testfiblookupmod, testfiblookupmod, SI_SUB_PSEUDO, SI_ORDER_ANY);
943MODULE_VERSION(testfiblookup, 1);
944