1/*	$OpenBSD: if_wg.c,v 1.38 2024/04/09 12:53:08 claudio Exp $ */
2
3/*
4 * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5 * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include "bpfilter.h"
21#include "pf.h"
22
23#include <sys/types.h>
24#include <sys/systm.h>
25#include <sys/param.h>
26#include <sys/pool.h>
27
28#include <sys/socket.h>
29#include <sys/socketvar.h>
30#include <sys/percpu.h>
31#include <sys/ioctl.h>
32#include <sys/mbuf.h>
33
34#include <net/if.h>
35#include <net/if_var.h>
36#include <net/if_types.h>
37#include <net/if_wg.h>
38
39#include <net/wg_noise.h>
40#include <net/wg_cookie.h>
41
42#include <net/pfvar.h>
43#include <net/route.h>
44#include <net/bpf.h>
45#include <net/art.h>
46
47#include <netinet/ip.h>
48#include <netinet/ip6.h>
49#include <netinet/udp.h>
50#include <netinet/in_pcb.h>
51
52#include <crypto/siphash.h>
53
54#define DEFAULT_MTU		1420
55
56#define MAX_STAGED_PKT		128
57#define MAX_QUEUED_PKT		1024
58#define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
59
60#define MAX_QUEUED_HANDSHAKES	4096
61
62#define HASHTABLE_PEER_SIZE	(1 << 11)
63#define HASHTABLE_INDEX_SIZE	(1 << 13)
64#define MAX_PEERS_PER_IFACE	(1 << 20)
65
66#define REKEY_TIMEOUT		5
67#define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
68#define KEEPALIVE_TIMEOUT	10
69#define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
70#define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
71#define UNDERLOAD_TIMEOUT	1
72
73#define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
74    printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
75
76#define CONTAINER_OF(ptr, type, member) ({			\
77	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
78	(type *)( (char *)__mptr - offsetof(type,member) );})
79
80/* First byte indicating packet type on the wire */
81#define WG_PKT_INITIATION htole32(1)
82#define WG_PKT_RESPONSE htole32(2)
83#define WG_PKT_COOKIE htole32(3)
84#define WG_PKT_DATA htole32(4)
85
86#define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
87#define WG_KEY_SIZE		WG_KEY_LEN
88
89struct wg_pkt_initiation {
90	uint32_t		t;
91	uint32_t		s_idx;
92	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
93	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
94	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
95	struct cookie_macs	m;
96};
97
98struct wg_pkt_response {
99	uint32_t		t;
100	uint32_t		s_idx;
101	uint32_t		r_idx;
102	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
103	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
104	struct cookie_macs	m;
105};
106
107struct wg_pkt_cookie {
108	uint32_t		t;
109	uint32_t		r_idx;
110	uint8_t			nonce[COOKIE_NONCE_SIZE];
111	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
112};
113
114struct wg_pkt_data {
115	uint32_t		t;
116	uint32_t		r_idx;
117	uint8_t			nonce[sizeof(uint64_t)];
118	uint8_t			buf[];
119};
120
121struct wg_endpoint {
122	union {
123		struct sockaddr		r_sa;
124		struct sockaddr_in	r_sin;
125#ifdef INET6
126		struct sockaddr_in6	r_sin6;
127#endif
128	} e_remote;
129	union {
130		struct in_addr		l_in;
131#ifdef INET6
132		struct in6_pktinfo	l_pktinfo6;
133#define l_in6 l_pktinfo6.ipi6_addr
134#endif
135	} e_local;
136};
137
138struct wg_tag {
139	struct wg_endpoint	 t_endpoint;
140	struct wg_peer		*t_peer;
141	struct mbuf		*t_mbuf;
142	int			 t_done;
143	int			 t_mtu;
144};
145
146struct wg_index {
147	LIST_ENTRY(wg_index)	 i_entry;
148	SLIST_ENTRY(wg_index)	 i_unused_entry;
149	uint32_t		 i_key;
150	struct noise_remote	*i_value;
151};
152
153struct wg_timers {
154	/* t_mtx is for blocking wg_timers_event_* when setting t_disabled. */
155	struct mutex		 t_mtx;
156
157	int			 t_disabled;
158	int			 t_need_another_keepalive;
159	uint16_t		 t_persistent_keepalive_interval;
160	struct timeout		 t_new_handshake;
161	struct timeout		 t_send_keepalive;
162	struct timeout		 t_retry_handshake;
163	struct timeout		 t_zero_key_material;
164	struct timeout		 t_persistent_keepalive;
165
166	struct mutex		 t_handshake_mtx;
167	struct timespec		 t_handshake_last_sent;	/* nanouptime */
168	struct timespec		 t_handshake_complete;	/* nanotime */
169	int			 t_handshake_retries;
170};
171
172struct wg_aip {
173	struct art_node		 a_node;
174	LIST_ENTRY(wg_aip)	 a_entry;
175	struct wg_peer		*a_peer;
176	struct wg_aip_io	 a_data;
177};
178
179struct wg_queue {
180	struct mutex		 q_mtx;
181	struct mbuf_list	 q_list;
182};
183
184struct wg_ring {
185	struct mutex	 r_mtx;
186	uint32_t	 r_head;
187	uint32_t	 r_tail;
188	struct mbuf	*r_buf[MAX_QUEUED_PKT];
189};
190
191struct wg_peer {
192	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
193	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
194	uint64_t		 p_id;
195	struct wg_softc		*p_sc;
196
197	struct noise_remote	 p_remote;
198	struct cookie_maker	 p_cookie;
199	struct wg_timers	 p_timers;
200
201	struct mutex		 p_counters_mtx;
202	uint64_t		 p_counters_tx;
203	uint64_t		 p_counters_rx;
204
205	struct mutex		 p_endpoint_mtx;
206	struct wg_endpoint	 p_endpoint;
207
208	struct task		 p_send_initiation;
209	struct task		 p_send_keepalive;
210	struct task		 p_clear_secrets;
211	struct task		 p_deliver_out;
212	struct task		 p_deliver_in;
213
214	struct mbuf_queue	 p_stage_queue;
215	struct wg_queue		 p_encap_queue;
216	struct wg_queue		 p_decap_queue;
217
218	SLIST_HEAD(,wg_index)	 p_unused_index;
219	struct wg_index		 p_index[3];
220
221	LIST_HEAD(,wg_aip)	 p_aip;
222
223	SLIST_ENTRY(wg_peer)	 p_start_list;
224	int			 p_start_onlist;
225
226	char			 p_description[IFDESCRSIZE];
227};
228
229struct wg_softc {
230	struct ifnet		 sc_if;
231	SIPHASH_KEY		 sc_secret;
232
233	struct rwlock		 sc_lock;
234	struct noise_local	 sc_local;
235	struct cookie_checker	 sc_cookie;
236	in_port_t		 sc_udp_port;
237	int			 sc_udp_rtable;
238
239	struct rwlock		 sc_so_lock;
240	struct socket		*sc_so4;
241#ifdef INET6
242	struct socket		*sc_so6;
243#endif
244
245	size_t			 sc_aip_num;
246	struct art_root		*sc_aip4;
247#ifdef INET6
248	struct art_root		*sc_aip6;
249#endif
250
251	struct rwlock		 sc_peer_lock;
252	size_t			 sc_peer_num;
253	LIST_HEAD(,wg_peer)	*sc_peer;
254	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
255	u_long			 sc_peer_mask;
256
257	struct mutex		 sc_index_mtx;
258	LIST_HEAD(,wg_index)	*sc_index;
259	u_long			 sc_index_mask;
260
261	struct task		 sc_handshake;
262	struct mbuf_queue	 sc_handshake_queue;
263
264	struct task		 sc_encap;
265	struct task		 sc_decap;
266	struct wg_ring		 sc_encap_ring;
267	struct wg_ring		 sc_decap_ring;
268};
269
270struct wg_peer *
271	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
272struct wg_peer *
273	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
274void	wg_peer_destroy(struct wg_peer *);
275void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
276void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
277int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
278void	wg_peer_clear_src(struct wg_peer *);
279void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
280void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
281
282int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
283struct wg_peer *
284	wg_aip_lookup(struct art_root *, void *);
285int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
286	    struct wg_aip_io *);
287
288int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
289void	wg_socket_close(struct socket **);
290int	wg_bind(struct wg_softc *, in_port_t *, int *);
291void	wg_unbind(struct wg_softc *);
292int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
293void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
294	    size_t);
295
296struct wg_tag *
297	wg_tag_get(struct mbuf *);
298
299void	wg_timers_init(struct wg_timers *);
300void	wg_timers_enable(struct wg_timers *);
301void	wg_timers_disable(struct wg_timers *);
302void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
303int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
304void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
305int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
306int	wg_timers_check_handshake_last_sent(struct wg_timers *);
307
308void	wg_timers_event_data_sent(struct wg_timers *);
309void	wg_timers_event_data_received(struct wg_timers *);
310void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
311void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
312void	wg_timers_event_handshake_initiated(struct wg_timers *);
313void	wg_timers_event_handshake_responded(struct wg_timers *);
314void	wg_timers_event_handshake_complete(struct wg_timers *);
315void	wg_timers_event_session_derived(struct wg_timers *);
316void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
317void	wg_timers_event_want_initiation(struct wg_timers *);
318void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
319
320void	wg_timers_run_send_initiation(void *, int);
321void	wg_timers_run_retry_handshake(void *);
322void	wg_timers_run_send_keepalive(void *);
323void	wg_timers_run_new_handshake(void *);
324void	wg_timers_run_zero_key_material(void *);
325void	wg_timers_run_persistent_keepalive(void *);
326
327void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
328void	wg_send_initiation(void *);
329void	wg_send_response(struct wg_peer *);
330void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
331	    struct wg_endpoint *);
332void	wg_send_keepalive(void *);
333void	wg_peer_clear_secrets(void *);
334void	wg_handshake(struct wg_softc *, struct mbuf *);
335void	wg_handshake_worker(void *);
336
337void	wg_encap(struct wg_softc *, struct mbuf *);
338void	wg_decap(struct wg_softc *, struct mbuf *);
339void	wg_encap_worker(void *);
340void	wg_decap_worker(void *);
341void	wg_deliver_out(void *);
342void	wg_deliver_in(void *);
343
344int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
345void	wg_queue_out(struct wg_softc *, struct wg_peer *);
346struct mbuf *
347	wg_ring_dequeue(struct wg_ring *);
348struct mbuf *
349	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
350size_t	wg_queue_len(struct wg_queue *);
351
352struct noise_remote *
353	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
354uint32_t
355	wg_index_set(void *, struct noise_remote *);
356struct noise_remote *
357	wg_index_get(void *, uint32_t);
358void	wg_index_drop(void *, uint32_t);
359
360struct mbuf *
361	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
362	    int);
363int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
364	    struct rtentry *);
365int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
366int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
367int	wg_ioctl(struct ifnet *, u_long, caddr_t);
368int	wg_up(struct wg_softc *);
369void	wg_down(struct wg_softc *);
370
371int	wg_clone_create(struct if_clone *, int);
372int	wg_clone_destroy(struct ifnet *);
373void	wgattach(int);
374
375uint64_t	peer_counter = 0;
376struct pool	wg_aip_pool;
377struct pool	wg_peer_pool;
378struct pool	wg_ratelimit_pool;
379struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
380
381size_t		 wg_counter = 0;
382struct taskq	*wg_handshake_taskq;
383struct taskq	*wg_crypt_taskq;
384
385struct if_clone	wg_cloner =
386    IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
387
388struct wg_peer *
389wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
390{
391	struct wg_peer	*peer;
392	uint64_t	 idx;
393
394	rw_assert_wrlock(&sc->sc_lock);
395
396	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
397		return NULL;
398
399	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
400		return NULL;
401
402	peer->p_id = peer_counter++;
403	peer->p_sc = sc;
404
405	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
406	cookie_maker_init(&peer->p_cookie, public);
407	wg_timers_init(&peer->p_timers);
408
409	mtx_init(&peer->p_counters_mtx, IPL_NET);
410	peer->p_counters_tx = 0;
411	peer->p_counters_rx = 0;
412
413	strlcpy(peer->p_description, "", IFDESCRSIZE);
414
415	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
416	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
417
418	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
419	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
420	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
421	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
422	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
423
424	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
425	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
426	ml_init(&peer->p_encap_queue.q_list);
427	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
428	ml_init(&peer->p_decap_queue.q_list);
429
430	SLIST_INIT(&peer->p_unused_index);
431	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
432	    i_unused_entry);
433	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
434	    i_unused_entry);
435	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
436	    i_unused_entry);
437
438	LIST_INIT(&peer->p_aip);
439
440	peer->p_start_onlist = 0;
441
442	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
443	idx &= sc->sc_peer_mask;
444
445	rw_enter_write(&sc->sc_peer_lock);
446	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
447	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
448	sc->sc_peer_num++;
449	rw_exit_write(&sc->sc_peer_lock);
450
451	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
452	return peer;
453}
454
455struct wg_peer *
456wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
457{
458	uint8_t		 peer_key[WG_KEY_SIZE];
459	struct wg_peer	*peer;
460	uint64_t	 idx;
461
462	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
463	idx &= sc->sc_peer_mask;
464
465	rw_enter_read(&sc->sc_peer_lock);
466	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
467		noise_remote_keys(&peer->p_remote, peer_key, NULL);
468		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
469			goto done;
470	}
471	peer = NULL;
472done:
473	rw_exit_read(&sc->sc_peer_lock);
474	return peer;
475}
476
477void
478wg_peer_destroy(struct wg_peer *peer)
479{
480	struct wg_softc	*sc = peer->p_sc;
481	struct wg_aip *aip, *taip;
482
483	rw_assert_wrlock(&sc->sc_lock);
484
485	/*
486	 * Remove peer from the pubkey hashtable and disable all timeouts.
487	 * After this, and flushing wg_handshake_taskq, then no more handshakes
488	 * can be started.
489	 */
490	rw_enter_write(&sc->sc_peer_lock);
491	LIST_REMOVE(peer, p_pubkey_entry);
492	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
493	sc->sc_peer_num--;
494	rw_exit_write(&sc->sc_peer_lock);
495
496	wg_timers_disable(&peer->p_timers);
497
498	taskq_barrier(wg_handshake_taskq);
499
500	/*
501	 * Now we drop all allowed ips, to drop all outgoing packets to the
502	 * peer. Then drop all the indexes to drop all incoming packets to the
503	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
504	 * ensure no more references to the peer exist.
505	 */
506	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
507		wg_aip_remove(sc, peer, &aip->a_data);
508
509	noise_remote_clear(&peer->p_remote);
510
511	NET_LOCK();
512	while (!ifq_empty(&sc->sc_if.if_snd)) {
513		/*
514		 * XXX: `if_snd' of stopped interface could still
515		 * contain packets
516		 */
517		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
518			ifq_purge(&sc->sc_if.if_snd);
519			continue;
520		}
521		NET_UNLOCK();
522		tsleep_nsec(&nowake, PWAIT, "wg_ifq", 1000);
523		NET_LOCK();
524	}
525	NET_UNLOCK();
526
527	taskq_barrier(wg_crypt_taskq);
528	taskq_barrier(net_tq(sc->sc_if.if_index));
529
530	if (!mq_empty(&peer->p_stage_queue))
531		mq_purge(&peer->p_stage_queue);
532
533	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
534	explicit_bzero(peer, sizeof(*peer));
535	pool_put(&wg_peer_pool, peer);
536}
537
538void
539wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
540{
541	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
542	    sizeof(t->t_endpoint)) == 0)
543		return;
544
545	mtx_enter(&peer->p_endpoint_mtx);
546	peer->p_endpoint = t->t_endpoint;
547	mtx_leave(&peer->p_endpoint_mtx);
548}
549
550void
551wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
552{
553	mtx_enter(&peer->p_endpoint_mtx);
554	memcpy(&peer->p_endpoint.e_remote, remote,
555	       sizeof(peer->p_endpoint.e_remote));
556	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
557	mtx_leave(&peer->p_endpoint_mtx);
558}
559
560int
561wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
562{
563	int	ret = 0;
564
565	mtx_enter(&peer->p_endpoint_mtx);
566	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
567		memcpy(remote, &peer->p_endpoint.e_remote,
568		       sizeof(peer->p_endpoint.e_remote));
569	else
570		ret = ENOENT;
571	mtx_leave(&peer->p_endpoint_mtx);
572	return ret;
573}
574
575void
576wg_peer_clear_src(struct wg_peer *peer)
577{
578	mtx_enter(&peer->p_endpoint_mtx);
579	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
580	mtx_leave(&peer->p_endpoint_mtx);
581}
582
583void
584wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
585{
586	mtx_enter(&peer->p_endpoint_mtx);
587	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
588	mtx_leave(&peer->p_endpoint_mtx);
589}
590
591void
592wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
593{
594	mtx_enter(&peer->p_counters_mtx);
595	peer->p_counters_tx += tx;
596	peer->p_counters_rx += rx;
597	mtx_leave(&peer->p_counters_mtx);
598}
599
600int
601wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
602{
603	struct art_root	*root;
604	struct art_node	*node;
605	struct wg_aip	*aip;
606	int		 ret = 0;
607
608	switch (d->a_af) {
609	case AF_INET:	root = sc->sc_aip4; break;
610#ifdef INET6
611	case AF_INET6:	root = sc->sc_aip6; break;
612#endif
613	default: return EAFNOSUPPORT;
614	}
615
616	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
617		return ENOBUFS;
618
619	rw_enter_write(&root->ar_lock);
620	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
621
622	if (node == &aip->a_node) {
623		aip->a_peer = peer;
624		aip->a_data = *d;
625		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
626		sc->sc_aip_num++;
627	} else {
628		pool_put(&wg_aip_pool, aip);
629		aip = (struct wg_aip *) node;
630		if (aip->a_peer != peer) {
631			LIST_REMOVE(aip, a_entry);
632			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
633			aip->a_peer = peer;
634		}
635	}
636	rw_exit_write(&root->ar_lock);
637	return ret;
638}
639
640struct wg_peer *
641wg_aip_lookup(struct art_root *root, void *addr)
642{
643	struct srp_ref	 sr;
644	struct art_node	*node;
645
646	node = art_match(root, addr, &sr);
647	srp_leave(&sr);
648
649	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
650}
651
652int
653wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
654{
655	struct srp_ref	 sr;
656	struct art_root	*root;
657	struct art_node	*node;
658	struct wg_aip	*aip;
659	int		 ret = 0;
660
661	switch (d->a_af) {
662	case AF_INET:	root = sc->sc_aip4; break;
663#ifdef INET6
664	case AF_INET6:	root = sc->sc_aip6; break;
665#endif
666	default: return EAFNOSUPPORT;
667	}
668
669	rw_enter_write(&root->ar_lock);
670	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
671		ret = ENOENT;
672	} else if (((struct wg_aip *) node)->a_peer != peer) {
673		ret = EXDEV;
674	} else {
675		aip = (struct wg_aip *)node;
676		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
677			panic("art_delete failed to delete node %p", node);
678
679		sc->sc_aip_num--;
680		LIST_REMOVE(aip, a_entry);
681		pool_put(&wg_aip_pool, aip);
682	}
683
684	srp_leave(&sr);
685	rw_exit_write(&root->ar_lock);
686	return ret;
687}
688
689int
690wg_socket_open(struct socket **so, int af, in_port_t *port,
691    int *rtable, void *upcall_arg)
692{
693	struct mbuf		 mhostnam, mrtable;
694#ifdef INET6
695	struct sockaddr_in6	*sin6;
696#endif
697	struct sockaddr_in	*sin;
698	int			 ret;
699
700	m_inithdr(&mhostnam);
701	m_inithdr(&mrtable);
702
703	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
704	*mtod(&mrtable, u_int *) = *rtable;
705	mrtable.m_len = sizeof(u_int);
706
707	if (af == AF_INET) {
708		sin = mtod(&mhostnam, struct sockaddr_in *);
709		bzero(sin, sizeof(*sin));
710		sin->sin_len = sizeof(*sin);
711		sin->sin_family = AF_INET;
712		sin->sin_port = *port;
713		sin->sin_addr.s_addr = INADDR_ANY;
714		mhostnam.m_len = sin->sin_len;
715#ifdef INET6
716	} else if (af == AF_INET6) {
717		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
718		bzero(sin6, sizeof(*sin6));
719		sin6->sin6_len = sizeof(*sin6);
720		sin6->sin6_family = AF_INET6;
721		sin6->sin6_port = *port;
722		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
723		mhostnam.m_len = sin6->sin6_len;
724#endif
725	} else {
726		return EAFNOSUPPORT;
727	}
728
729	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
730		return ret;
731
732	solock(*so);
733	sotoinpcb(*so)->inp_upcall = wg_input;
734	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
735	sounlock(*so);
736
737	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
738		solock(*so);
739		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
740			*port = sotoinpcb(*so)->inp_lport;
741			*rtable = sotoinpcb(*so)->inp_rtableid;
742		}
743		sounlock(*so);
744	}
745
746	if (ret != 0)
747		wg_socket_close(so);
748
749	return ret;
750}
751
752void
753wg_socket_close(struct socket **so)
754{
755	if (*so != NULL && soclose(*so, 0) != 0)
756		panic("Unable to close wg socket");
757	*so = NULL;
758}
759
760int
761wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
762{
763	int		 ret = 0, rtable = *rtablep;
764	in_port_t	 port = *portp;
765	struct socket	*so4;
766#ifdef INET6
767	struct socket	*so6;
768	int		 retries = 0;
769retry:
770#endif
771	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
772		return ret;
773
774#ifdef INET6
775	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
776		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
777			goto retry;
778		wg_socket_close(&so4);
779		return ret;
780	}
781#endif
782
783	rw_enter_write(&sc->sc_so_lock);
784	wg_socket_close(&sc->sc_so4);
785	sc->sc_so4 = so4;
786#ifdef INET6
787	wg_socket_close(&sc->sc_so6);
788	sc->sc_so6 = so6;
789#endif
790	rw_exit_write(&sc->sc_so_lock);
791
792	*portp = port;
793	*rtablep = rtable;
794	return 0;
795}
796
797void
798wg_unbind(struct wg_softc *sc)
799{
800	rw_enter_write(&sc->sc_so_lock);
801	wg_socket_close(&sc->sc_so4);
802#ifdef INET6
803	wg_socket_close(&sc->sc_so6);
804#endif
805	rw_exit_write(&sc->sc_so_lock);
806}
807
808int
809wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
810{
811	struct mbuf	 peernam, *control = NULL;
812	int		 ret;
813
814	/* Get local control address before locking */
815	if (e->e_remote.r_sa.sa_family == AF_INET) {
816		if (e->e_local.l_in.s_addr != INADDR_ANY)
817			control = sbcreatecontrol(&e->e_local.l_in,
818			    sizeof(struct in_addr), IP_SENDSRCADDR,
819			    IPPROTO_IP);
820#ifdef INET6
821	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
822		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
823			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
824			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
825			    IPPROTO_IPV6);
826#endif
827	} else {
828		m_freem(m);
829		return EAFNOSUPPORT;
830	}
831
832	/* Get remote address */
833	peernam.m_type = MT_SONAME;
834	peernam.m_next = NULL;
835	peernam.m_nextpkt = NULL;
836	peernam.m_data = (void *)&e->e_remote.r_sa;
837	peernam.m_len = e->e_remote.r_sa.sa_len;
838	peernam.m_flags = 0;
839
840	rw_enter_read(&sc->sc_so_lock);
841	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
842		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
843#ifdef INET6
844	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
845		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
846#endif
847	else {
848		ret = ENOTCONN;
849		m_freem(control);
850		m_freem(m);
851	}
852	rw_exit_read(&sc->sc_so_lock);
853
854	return ret;
855}
856
857void
858wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
859    size_t len)
860{
861	struct mbuf	*m;
862	int		 ret = 0;
863
864retry:
865	m = m_gethdr(M_WAIT, MT_DATA);
866	m->m_len = 0;
867	m_copyback(m, 0, len, buf, M_WAIT);
868
869	/* As we're sending a handshake packet here, we want high priority */
870	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
871
872	if (ret == 0) {
873		ret = wg_send(sc, e, m);
874		/* Retry if we couldn't bind to e->e_local */
875		if (ret == EADDRNOTAVAIL) {
876			bzero(&e->e_local, sizeof(e->e_local));
877			goto retry;
878		}
879	} else {
880		ret = wg_send(sc, e, m);
881		if (ret != 0)
882			DPRINTF(sc, "Unable to send packet\n");
883	}
884}
885
886struct wg_tag *
887wg_tag_get(struct mbuf *m)
888{
889	struct m_tag	*mtag;
890
891	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
892		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
893		    M_NOWAIT);
894		if (mtag == NULL)
895			return (NULL);
896		bzero(mtag + 1, sizeof(struct wg_tag));
897		m_tag_prepend(m, mtag);
898	}
899	return ((struct wg_tag *)(mtag + 1));
900}
901
902/*
903 * The following section handles the timeout callbacks for a WireGuard session.
904 * These functions provide an "event based" model for controlling wg(8) session
905 * timers. All function calls occur after the specified event below.
906 *
907 * wg_timers_event_data_sent:
908 *	tx: data
909 * wg_timers_event_data_received:
910 *	rx: data
911 * wg_timers_event_any_authenticated_packet_sent:
912 *	tx: keepalive, data, handshake
913 * wg_timers_event_any_authenticated_packet_received:
914 *	rx: keepalive, data, handshake
915 * wg_timers_event_any_authenticated_packet_traversal:
916 *	tx, rx: keepalive, data, handshake
917 * wg_timers_event_handshake_initiated:
918 *	tx: initiation
919 * wg_timers_event_handshake_responded:
920 *	tx: response
921 * wg_timers_event_handshake_complete:
922 *	rx: response, confirmation data
923 * wg_timers_event_session_derived:
924 *	tx: response, rx: response
925 * wg_timers_event_want_initiation:
926 *	tx: data failed, old keys expiring
927 * wg_timers_event_reset_handshake_last_sent:
928 * 	anytime we may immediately want a new handshake
929 */
930void
931wg_timers_init(struct wg_timers *t)
932{
933	bzero(t, sizeof(*t));
934	mtx_init_flags(&t->t_mtx, IPL_NET, "wg_timers", 0);
935	mtx_init(&t->t_handshake_mtx, IPL_NET);
936
937	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
938	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
939	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
940	timeout_set(&t->t_persistent_keepalive,
941	    wg_timers_run_persistent_keepalive, t);
942	timeout_set(&t->t_zero_key_material,
943	    wg_timers_run_zero_key_material, t);
944}
945
946void
947wg_timers_enable(struct wg_timers *t)
948{
949	mtx_enter(&t->t_mtx);
950	t->t_disabled = 0;
951	mtx_leave(&t->t_mtx);
952	wg_timers_run_persistent_keepalive(t);
953}
954
955void
956wg_timers_disable(struct wg_timers *t)
957{
958	mtx_enter(&t->t_mtx);
959	t->t_disabled = 1;
960	t->t_need_another_keepalive = 0;
961	mtx_leave(&t->t_mtx);
962
963	timeout_del_barrier(&t->t_new_handshake);
964	timeout_del_barrier(&t->t_send_keepalive);
965	timeout_del_barrier(&t->t_retry_handshake);
966	timeout_del_barrier(&t->t_persistent_keepalive);
967	timeout_del_barrier(&t->t_zero_key_material);
968}
969
970void
971wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
972{
973	mtx_enter(&t->t_mtx);
974	if (!t->t_disabled) {
975		t->t_persistent_keepalive_interval = interval;
976		wg_timers_run_persistent_keepalive(t);
977	}
978	mtx_leave(&t->t_mtx);
979}
980
981int
982wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
983{
984	*interval = t->t_persistent_keepalive_interval;
985	return *interval > 0 ? 0 : ENOENT;
986}
987
988void
989wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
990{
991	mtx_enter(&t->t_handshake_mtx);
992	*time = t->t_handshake_complete;
993	mtx_leave(&t->t_handshake_mtx);
994}
995
996int
997wg_timers_expired_handshake_last_sent(struct wg_timers *t)
998{
999	struct timespec uptime;
1000	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
1001
1002	getnanouptime(&uptime);
1003	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
1004	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
1005}
1006
1007int
1008wg_timers_check_handshake_last_sent(struct wg_timers *t)
1009{
1010	int ret;
1011	mtx_enter(&t->t_handshake_mtx);
1012	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
1013		getnanouptime(&t->t_handshake_last_sent);
1014	mtx_leave(&t->t_handshake_mtx);
1015	return ret;
1016}
1017
1018void
1019wg_timers_event_data_sent(struct wg_timers *t)
1020{
1021	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1022	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1023
1024	mtx_enter(&t->t_mtx);
1025	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1026		timeout_add_msec(&t->t_new_handshake, msecs);
1027	mtx_leave(&t->t_mtx);
1028}
1029
1030void
1031wg_timers_event_data_received(struct wg_timers *t)
1032{
1033	mtx_enter(&t->t_mtx);
1034	if (!t->t_disabled) {
1035		if (!timeout_pending(&t->t_send_keepalive))
1036			timeout_add_sec(&t->t_send_keepalive,
1037			    KEEPALIVE_TIMEOUT);
1038		else
1039			t->t_need_another_keepalive = 1;
1040	}
1041	mtx_leave(&t->t_mtx);
1042}
1043
1044void
1045wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1046{
1047	timeout_del(&t->t_send_keepalive);
1048}
1049
1050void
1051wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1052{
1053	timeout_del(&t->t_new_handshake);
1054}
1055
1056void
1057wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1058{
1059	mtx_enter(&t->t_mtx);
1060	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1061		timeout_add_sec(&t->t_persistent_keepalive,
1062		    t->t_persistent_keepalive_interval);
1063	mtx_leave(&t->t_mtx);
1064}
1065
1066void
1067wg_timers_event_handshake_initiated(struct wg_timers *t)
1068{
1069	int	msecs = REKEY_TIMEOUT * 1000;
1070	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1071
1072	mtx_enter(&t->t_mtx);
1073	if (!t->t_disabled)
1074		timeout_add_msec(&t->t_retry_handshake, msecs);
1075	mtx_leave(&t->t_mtx);
1076}
1077
1078void
1079wg_timers_event_handshake_responded(struct wg_timers *t)
1080{
1081	mtx_enter(&t->t_handshake_mtx);
1082	getnanouptime(&t->t_handshake_last_sent);
1083	mtx_leave(&t->t_handshake_mtx);
1084}
1085
1086void
1087wg_timers_event_handshake_complete(struct wg_timers *t)
1088{
1089	mtx_enter(&t->t_mtx);
1090	if (!t->t_disabled) {
1091		mtx_enter(&t->t_handshake_mtx);
1092		timeout_del(&t->t_retry_handshake);
1093		t->t_handshake_retries = 0;
1094		getnanotime(&t->t_handshake_complete);
1095		mtx_leave(&t->t_handshake_mtx);
1096		wg_timers_run_send_keepalive(t);
1097	}
1098	mtx_leave(&t->t_mtx);
1099}
1100
1101void
1102wg_timers_event_session_derived(struct wg_timers *t)
1103{
1104	mtx_enter(&t->t_mtx);
1105	if (!t->t_disabled)
1106		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1107	mtx_leave(&t->t_mtx);
1108}
1109
1110void
1111wg_timers_event_want_initiation(struct wg_timers *t)
1112{
1113	mtx_enter(&t->t_mtx);
1114	if (!t->t_disabled)
1115		wg_timers_run_send_initiation(t, 0);
1116	mtx_leave(&t->t_mtx);
1117}
1118
1119void
1120wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1121{
1122	mtx_enter(&t->t_handshake_mtx);
1123	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1124	mtx_leave(&t->t_handshake_mtx);
1125}
1126
1127void
1128wg_timers_run_send_initiation(void *_t, int is_retry)
1129{
1130	struct wg_timers *t = _t;
1131	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1132	if (!is_retry)
1133		t->t_handshake_retries = 0;
1134	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1135		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1136}
1137
1138void
1139wg_timers_run_retry_handshake(void *_t)
1140{
1141	struct wg_timers *t = _t;
1142	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1143
1144	mtx_enter(&t->t_handshake_mtx);
1145	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1146		t->t_handshake_retries++;
1147		mtx_leave(&t->t_handshake_mtx);
1148
1149		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1150		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1151		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1152		wg_peer_clear_src(peer);
1153		wg_timers_run_send_initiation(t, 1);
1154	} else {
1155		mtx_leave(&t->t_handshake_mtx);
1156
1157		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1158		    "after %d retries, giving up\n", peer->p_id,
1159		    MAX_TIMER_HANDSHAKES + 2);
1160
1161		timeout_del(&t->t_send_keepalive);
1162		mq_purge(&peer->p_stage_queue);
1163		if (!timeout_pending(&t->t_zero_key_material))
1164			timeout_add_sec(&t->t_zero_key_material,
1165			    REJECT_AFTER_TIME * 3);
1166	}
1167}
1168
1169void
1170wg_timers_run_send_keepalive(void *_t)
1171{
1172	struct wg_timers *t = _t;
1173	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1174
1175	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1176	if (t->t_need_another_keepalive) {
1177		t->t_need_another_keepalive = 0;
1178		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1179	}
1180}
1181
1182void
1183wg_timers_run_new_handshake(void *_t)
1184{
1185	struct wg_timers *t = _t;
1186	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1187
1188	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1189	    "stopped hearing back after %d seconds\n",
1190	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1191	wg_peer_clear_src(peer);
1192
1193	wg_timers_run_send_initiation(t, 0);
1194}
1195
1196void
1197wg_timers_run_zero_key_material(void *_t)
1198{
1199	struct wg_timers *t = _t;
1200	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1201
1202	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1203	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1204}
1205
1206void
1207wg_timers_run_persistent_keepalive(void *_t)
1208{
1209	struct wg_timers *t = _t;
1210	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1211	if (t->t_persistent_keepalive_interval != 0)
1212		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1213}
1214
1215/* The following functions handle handshakes */
1216void
1217wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1218{
1219	struct wg_endpoint	 endpoint;
1220
1221	wg_peer_counters_add(peer, len, 0);
1222	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1223	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1224	wg_peer_get_endpoint(peer, &endpoint);
1225	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1226}
1227
1228void
1229wg_send_initiation(void *_peer)
1230{
1231	struct wg_peer			*peer = _peer;
1232	struct wg_pkt_initiation	 pkt;
1233
1234	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1235		return;
1236
1237	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1238	    peer->p_id);
1239
1240	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1241				    pkt.ets) != 0)
1242		return;
1243	pkt.t = WG_PKT_INITIATION;
1244	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1245	    sizeof(pkt)-sizeof(pkt.m));
1246	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1247	wg_timers_event_handshake_initiated(&peer->p_timers);
1248}
1249
1250void
1251wg_send_response(struct wg_peer *peer)
1252{
1253	struct wg_pkt_response	 pkt;
1254
1255	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1256	    peer->p_id);
1257
1258	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1259				  pkt.ue, pkt.en) != 0)
1260		return;
1261	if (noise_remote_begin_session(&peer->p_remote) != 0)
1262		return;
1263	wg_timers_event_session_derived(&peer->p_timers);
1264	pkt.t = WG_PKT_RESPONSE;
1265	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1266	    sizeof(pkt)-sizeof(pkt.m));
1267	wg_timers_event_handshake_responded(&peer->p_timers);
1268	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1269}
1270
1271void
1272wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1273    struct wg_endpoint *e)
1274{
1275	struct wg_pkt_cookie	pkt;
1276
1277	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1278
1279	pkt.t = WG_PKT_COOKIE;
1280	pkt.r_idx = idx;
1281
1282	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1283	    pkt.ec, &e->e_remote.r_sa);
1284
1285	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1286}
1287
1288void
1289wg_send_keepalive(void *_peer)
1290{
1291	struct wg_peer	*peer = _peer;
1292	struct wg_softc	*sc = peer->p_sc;
1293	struct wg_tag	*t;
1294	struct mbuf	*m;
1295
1296	if (!mq_empty(&peer->p_stage_queue))
1297		goto send;
1298
1299	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1300		return;
1301
1302	if ((t = wg_tag_get(m)) == NULL) {
1303		m_freem(m);
1304		return;
1305	}
1306
1307	m->m_len = 0;
1308	m_calchdrlen(m);
1309
1310	t->t_peer = peer;
1311	t->t_mbuf = NULL;
1312	t->t_done = 0;
1313	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1314
1315	mq_push(&peer->p_stage_queue, m);
1316send:
1317	if (noise_remote_ready(&peer->p_remote) == 0) {
1318		wg_queue_out(sc, peer);
1319		task_add(wg_crypt_taskq, &sc->sc_encap);
1320	} else {
1321		wg_timers_event_want_initiation(&peer->p_timers);
1322	}
1323}
1324
1325void
1326wg_peer_clear_secrets(void *_peer)
1327{
1328	struct wg_peer *peer = _peer;
1329	noise_remote_clear(&peer->p_remote);
1330}
1331
1332void
1333wg_handshake(struct wg_softc *sc, struct mbuf *m)
1334{
1335	struct wg_tag			*t;
1336	struct wg_pkt_initiation	*init;
1337	struct wg_pkt_response		*resp;
1338	struct wg_pkt_cookie		*cook;
1339	struct wg_peer			*peer;
1340	struct noise_remote		*remote;
1341	int				 res, underload = 0;
1342	static struct timeval		 wg_last_underload; /* microuptime */
1343
1344	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1345		getmicrouptime(&wg_last_underload);
1346		underload = 1;
1347	} else if (wg_last_underload.tv_sec != 0) {
1348		if (!ratecheck(&wg_last_underload, &underload_interval))
1349			underload = 1;
1350		else
1351			bzero(&wg_last_underload, sizeof(wg_last_underload));
1352	}
1353
1354	t = wg_tag_get(m);
1355
1356	switch (*mtod(m, uint32_t *)) {
1357	case WG_PKT_INITIATION:
1358		init = mtod(m, struct wg_pkt_initiation *);
1359
1360		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1361				init, sizeof(*init) - sizeof(init->m),
1362				underload, &t->t_endpoint.e_remote.r_sa);
1363
1364		if (res == EINVAL) {
1365			DPRINTF(sc, "Invalid initiation MAC\n");
1366			goto error;
1367		} else if (res == ECONNREFUSED) {
1368			DPRINTF(sc, "Handshake ratelimited\n");
1369			goto error;
1370		} else if (res == EAGAIN) {
1371			wg_send_cookie(sc, &init->m, init->s_idx,
1372			    &t->t_endpoint);
1373			goto error;
1374		} else if (res != 0) {
1375			panic("unexpected response: %d", res);
1376		}
1377
1378		if (noise_consume_initiation(&sc->sc_local, &remote,
1379		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1380			DPRINTF(sc, "Invalid handshake initiation\n");
1381			goto error;
1382		}
1383
1384		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1385
1386		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1387		    peer->p_id);
1388
1389		wg_peer_counters_add(peer, 0, sizeof(*init));
1390		wg_peer_set_endpoint_from_tag(peer, t);
1391		wg_send_response(peer);
1392		break;
1393	case WG_PKT_RESPONSE:
1394		resp = mtod(m, struct wg_pkt_response *);
1395
1396		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1397				resp, sizeof(*resp) - sizeof(resp->m),
1398				underload, &t->t_endpoint.e_remote.r_sa);
1399
1400		if (res == EINVAL) {
1401			DPRINTF(sc, "Invalid response MAC\n");
1402			goto error;
1403		} else if (res == ECONNREFUSED) {
1404			DPRINTF(sc, "Handshake ratelimited\n");
1405			goto error;
1406		} else if (res == EAGAIN) {
1407			wg_send_cookie(sc, &resp->m, resp->s_idx,
1408			    &t->t_endpoint);
1409			goto error;
1410		} else if (res != 0) {
1411			panic("unexpected response: %d", res);
1412		}
1413
1414		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1415			DPRINTF(sc, "Unknown handshake response\n");
1416			goto error;
1417		}
1418
1419		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1420
1421		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1422					   resp->ue, resp->en) != 0) {
1423			DPRINTF(sc, "Invalid handshake response\n");
1424			goto error;
1425		}
1426
1427		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1428				peer->p_id);
1429
1430		wg_peer_counters_add(peer, 0, sizeof(*resp));
1431		wg_peer_set_endpoint_from_tag(peer, t);
1432		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1433			wg_timers_event_session_derived(&peer->p_timers);
1434			wg_timers_event_handshake_complete(&peer->p_timers);
1435		}
1436		break;
1437	case WG_PKT_COOKIE:
1438		cook = mtod(m, struct wg_pkt_cookie *);
1439
1440		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1441			DPRINTF(sc, "Unknown cookie index\n");
1442			goto error;
1443		}
1444
1445		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1446
1447		if (cookie_maker_consume_payload(&peer->p_cookie,
1448		    cook->nonce, cook->ec) != 0) {
1449			DPRINTF(sc, "Could not decrypt cookie response\n");
1450			goto error;
1451		}
1452
1453		DPRINTF(sc, "Receiving cookie response\n");
1454		goto error;
1455	default:
1456		panic("invalid packet in handshake queue");
1457	}
1458
1459	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1460	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1461error:
1462	m_freem(m);
1463}
1464
1465void
1466wg_handshake_worker(void *_sc)
1467{
1468	struct mbuf *m;
1469	struct wg_softc *sc = _sc;
1470	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1471		wg_handshake(sc, m);
1472}
1473
1474/*
1475 * The following functions handle encapsulation (encryption) and
1476 * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1477 * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1478 * in nettq.
1479 *
1480 * The packets are tracked in two queues, a serial queue and a parallel queue.
1481 *  - The parallel queue is used to distribute the encryption across multiple
1482 *    threads.
1483 *  - The serial queue ensures that packets are not reordered and are
1484 *    delivered in sequence.
1485 * The wg_tag attached to the packet contains two flags to help the two queues
1486 * interact.
1487 *  - t_done: The parallel queue has finished with the packet, now the serial
1488 *            queue can do it's work.
1489 *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1490 *            this is a newly allocated packet, and in the case of decryption,
1491 *            it is a pointer to the same packet, that has been decrypted and
1492 *            truncated. If t_mbuf is NULL, then *cryption failed and this
1493 *            packet should not be passed.
1494 * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1495 * on the serial queue.
1496 */
1497void
1498wg_encap(struct wg_softc *sc, struct mbuf *m)
1499{
1500	int res = 0;
1501	struct wg_pkt_data	*data;
1502	struct wg_peer		*peer;
1503	struct wg_tag		*t;
1504	struct mbuf		*mc;
1505	size_t			 padding_len, plaintext_len, out_len;
1506	uint64_t		 nonce;
1507
1508	t = wg_tag_get(m);
1509	peer = t->t_peer;
1510
1511	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1512	padding_len = plaintext_len - m->m_pkthdr.len;
1513	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1514
1515	/*
1516	 * For the time being we allocate a new packet with sufficient size to
1517	 * hold the encrypted data and headers. It would be difficult to
1518	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1519	 * If we m_makespace or similar, we risk corrupting that list.
1520	 * Additionally, we only pass a buf and buf length to
1521	 * noise_remote_encrypt. Technically it would be possible to teach
1522	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1523	 * p_encap_queue situation first.
1524	 */
1525	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1526		goto error;
1527
1528	data = mtod(mc, struct wg_pkt_data *);
1529	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1530	bzero(data->buf + m->m_pkthdr.len, padding_len);
1531	data->t = WG_PKT_DATA;
1532
1533	/*
1534	 * Copy the flow hash from the inner packet to the outer packet, so
1535	 * that fq_codel can property separate streams, rather than falling
1536	 * back to random buckets.
1537	 */
1538	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1539
1540	mc->m_pkthdr.pf.prio = m->m_pkthdr.pf.prio;
1541
1542	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1543				   data->buf, plaintext_len);
1544	nonce = htole64(nonce); /* Wire format is little endian. */
1545	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1546
1547	if (__predict_false(res == EINVAL)) {
1548		m_freem(mc);
1549		goto error;
1550	} else if (__predict_false(res == ESTALE)) {
1551		wg_timers_event_want_initiation(&peer->p_timers);
1552	} else if (__predict_false(res != 0)) {
1553		panic("unexpected result: %d", res);
1554	}
1555
1556	/* A packet with length 0 is a keepalive packet */
1557	if (__predict_false(m->m_pkthdr.len == 0))
1558		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1559		    peer->p_id);
1560
1561	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1562	mc->m_flags &= ~(M_MCAST | M_BCAST);
1563	mc->m_len = out_len;
1564	m_calchdrlen(mc);
1565
1566	/*
1567	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1568	 * already does that for us, so no need to worry about it.
1569	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1570	    m->m_pkthdr.len);
1571	 */
1572	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1573
1574	t->t_mbuf = mc;
1575error:
1576	t->t_done = 1;
1577	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1578}
1579
1580void
1581wg_decap(struct wg_softc *sc, struct mbuf *m)
1582{
1583	int			 res, len;
1584	struct ip		*ip;
1585	struct ip6_hdr		*ip6;
1586	struct wg_pkt_data	*data;
1587	struct wg_peer		*peer, *allowed_peer;
1588	struct wg_tag		*t;
1589	size_t			 payload_len;
1590	uint64_t		 nonce;
1591
1592	t = wg_tag_get(m);
1593	peer = t->t_peer;
1594
1595	/*
1596	 * Likewise to wg_encap, we pass a buf and buf length to
1597	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1598	 * but need to get over the p_decap_queue situation first. However,
1599	 * we do not need to allocate a new mbuf as the decrypted packet is
1600	 * strictly smaller than encrypted. We just set t_mbuf to m and
1601	 * wg_deliver_in knows how to deal with that.
1602	 */
1603	data = mtod(m, struct wg_pkt_data *);
1604	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1605	memcpy(&nonce, data->nonce, sizeof(nonce));
1606	nonce = le64toh(nonce); /* Wire format is little endian. */
1607	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1608				   data->buf, payload_len);
1609
1610	if (__predict_false(res == EINVAL)) {
1611		goto error;
1612	} else if (__predict_false(res == ECONNRESET)) {
1613		wg_timers_event_handshake_complete(&peer->p_timers);
1614	} else if (__predict_false(res == ESTALE)) {
1615		wg_timers_event_want_initiation(&peer->p_timers);
1616	} else if (__predict_false(res != 0)) {
1617		panic("unexpected response: %d", res);
1618	}
1619
1620	wg_peer_set_endpoint_from_tag(peer, t);
1621
1622	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1623
1624	m_adj(m, sizeof(struct wg_pkt_data));
1625	m_adj(m, -NOISE_AUTHTAG_LEN);
1626
1627	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1628	    m->m_pkthdr.len);
1629
1630	/* A packet with length 0 is a keepalive packet */
1631	if (__predict_false(m->m_pkthdr.len == 0)) {
1632		DPRINTF(sc, "Receiving keepalive packet from peer "
1633		    "%llu\n", peer->p_id);
1634		goto done;
1635	}
1636
1637	/*
1638	 * We can let the network stack handle the intricate validation of the
1639	 * IP header, we just worry about the sizeof and the version, so we can
1640	 * read the source address in wg_aip_lookup.
1641	 *
1642	 * We also need to trim the packet, as it was likely padded before
1643	 * encryption. While we could drop it here, it will be more helpful to
1644	 * pass it to bpf_mtap and use the counters that people are expecting
1645	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1646	 * ipv6_input to properly validate the headers.
1647	 */
1648	ip = mtod(m, struct ip *);
1649	ip6 = mtod(m, struct ip6_hdr *);
1650
1651	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1652		m->m_pkthdr.ph_family = AF_INET;
1653
1654		len = ntohs(ip->ip_len);
1655		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1656			m_adj(m, len - m->m_pkthdr.len);
1657
1658		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1659#ifdef INET6
1660	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1661	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1662		m->m_pkthdr.ph_family = AF_INET6;
1663
1664		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1665		if (len < m->m_pkthdr.len)
1666			m_adj(m, len - m->m_pkthdr.len);
1667
1668		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1669#endif
1670	} else {
1671		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1672		    "peer %llu\n", peer->p_id);
1673		goto error;
1674	}
1675
1676	if (__predict_false(peer != allowed_peer)) {
1677		DPRINTF(sc, "Packet has unallowed src IP from peer "
1678		    "%llu\n", peer->p_id);
1679		goto error;
1680	}
1681
1682	/* tunneled packet was not offloaded */
1683	m->m_pkthdr.csum_flags = 0;
1684
1685	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1686	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1687	m->m_flags &= ~(M_MCAST | M_BCAST);
1688#if NPF > 0
1689	pf_pkt_addr_changed(m);
1690#endif /* NPF > 0 */
1691
1692done:
1693	t->t_mbuf = m;
1694error:
1695	t->t_done = 1;
1696	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1697}
1698
1699void
1700wg_encap_worker(void *_sc)
1701{
1702	struct mbuf *m;
1703	struct wg_softc *sc = _sc;
1704	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1705		wg_encap(sc, m);
1706}
1707
1708void
1709wg_decap_worker(void *_sc)
1710{
1711	struct mbuf *m;
1712	struct wg_softc *sc = _sc;
1713	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1714		wg_decap(sc, m);
1715}
1716
1717void
1718wg_deliver_out(void *_peer)
1719{
1720	struct wg_peer		*peer = _peer;
1721	struct wg_softc		*sc = peer->p_sc;
1722	struct wg_endpoint	 endpoint;
1723	struct wg_tag		*t;
1724	struct mbuf		*m;
1725	int			 ret;
1726
1727	wg_peer_get_endpoint(peer, &endpoint);
1728
1729	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1730		/* t_mbuf will contain the encrypted packet */
1731		if (t->t_mbuf == NULL){
1732			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1733			m_freem(m);
1734			continue;
1735		}
1736
1737		ret = wg_send(sc, &endpoint, t->t_mbuf);
1738
1739		if (ret == 0) {
1740			wg_timers_event_any_authenticated_packet_traversal(
1741			    &peer->p_timers);
1742			wg_timers_event_any_authenticated_packet_sent(
1743			    &peer->p_timers);
1744
1745			if (m->m_pkthdr.len != 0)
1746				wg_timers_event_data_sent(&peer->p_timers);
1747		} else if (ret == EADDRNOTAVAIL) {
1748			wg_peer_clear_src(peer);
1749			wg_peer_get_endpoint(peer, &endpoint);
1750		}
1751
1752		m_freem(m);
1753	}
1754}
1755
1756void
1757wg_deliver_in(void *_peer)
1758{
1759	struct wg_peer	*peer = _peer;
1760	struct wg_softc	*sc = peer->p_sc;
1761	struct wg_tag	*t;
1762	struct mbuf	*m;
1763
1764	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1765		/* t_mbuf will contain the decrypted packet */
1766		if (t->t_mbuf == NULL) {
1767			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1768			m_freem(m);
1769			continue;
1770		}
1771
1772		/* From here on m == t->t_mbuf */
1773		KASSERT(m == t->t_mbuf);
1774
1775		wg_timers_event_any_authenticated_packet_received(
1776		    &peer->p_timers);
1777		wg_timers_event_any_authenticated_packet_traversal(
1778		    &peer->p_timers);
1779
1780		if (m->m_pkthdr.len == 0) {
1781			m_freem(m);
1782			continue;
1783		}
1784
1785#if NBPFILTER > 0
1786		if (sc->sc_if.if_bpf != NULL)
1787			bpf_mtap_af(sc->sc_if.if_bpf,
1788			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1789#endif
1790
1791		NET_LOCK();
1792		if (m->m_pkthdr.ph_family == AF_INET)
1793			ipv4_input(&sc->sc_if, m);
1794#ifdef INET6
1795		else if (m->m_pkthdr.ph_family == AF_INET6)
1796			ipv6_input(&sc->sc_if, m);
1797#endif
1798		else
1799			panic("invalid ph_family");
1800		NET_UNLOCK();
1801
1802		wg_timers_event_data_received(&peer->p_timers);
1803	}
1804}
1805
1806int
1807wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1808{
1809	struct wg_ring		*parallel = &sc->sc_decap_ring;
1810	struct wg_queue		*serial = &peer->p_decap_queue;
1811	struct wg_tag		*t;
1812
1813	mtx_enter(&serial->q_mtx);
1814	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1815		ml_enqueue(&serial->q_list, m);
1816		mtx_leave(&serial->q_mtx);
1817	} else {
1818		mtx_leave(&serial->q_mtx);
1819		m_freem(m);
1820		return ENOBUFS;
1821	}
1822
1823	mtx_enter(&parallel->r_mtx);
1824	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1825		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1826		parallel->r_tail++;
1827		mtx_leave(&parallel->r_mtx);
1828	} else {
1829		mtx_leave(&parallel->r_mtx);
1830		t = wg_tag_get(m);
1831		t->t_done = 1;
1832		return ENOBUFS;
1833	}
1834
1835	return 0;
1836}
1837
1838void
1839wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1840{
1841	struct wg_ring		*parallel = &sc->sc_encap_ring;
1842	struct wg_queue		*serial = &peer->p_encap_queue;
1843	struct mbuf_list 	 ml, ml_free;
1844	struct mbuf		*m;
1845	struct wg_tag		*t;
1846	int			 dropped;
1847
1848	/*
1849	 * We delist all staged packets and then add them to the queues. This
1850	 * can race with wg_qstart when called from wg_send_keepalive, however
1851	 * wg_qstart will not race as it is serialised.
1852	 */
1853	mq_delist(&peer->p_stage_queue, &ml);
1854	ml_init(&ml_free);
1855
1856	while ((m = ml_dequeue(&ml)) != NULL) {
1857		mtx_enter(&serial->q_mtx);
1858		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1859			ml_enqueue(&serial->q_list, m);
1860			mtx_leave(&serial->q_mtx);
1861		} else {
1862			mtx_leave(&serial->q_mtx);
1863			ml_enqueue(&ml_free, m);
1864			continue;
1865		}
1866
1867		mtx_enter(&parallel->r_mtx);
1868		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1869			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1870			parallel->r_tail++;
1871			mtx_leave(&parallel->r_mtx);
1872		} else {
1873			mtx_leave(&parallel->r_mtx);
1874			t = wg_tag_get(m);
1875			t->t_done = 1;
1876		}
1877	}
1878
1879	if ((dropped = ml_purge(&ml_free)) > 0)
1880		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1881}
1882
1883struct mbuf *
1884wg_ring_dequeue(struct wg_ring *r)
1885{
1886	struct mbuf *m = NULL;
1887	mtx_enter(&r->r_mtx);
1888	if (r->r_head != r->r_tail) {
1889		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1890		r->r_head++;
1891	}
1892	mtx_leave(&r->r_mtx);
1893	return m;
1894}
1895
1896struct mbuf *
1897wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1898{
1899	struct mbuf *m;
1900	mtx_enter(&q->q_mtx);
1901	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1902		ml_dequeue(&q->q_list);
1903	else
1904		m = NULL;
1905	mtx_leave(&q->q_mtx);
1906	return m;
1907}
1908
1909size_t
1910wg_queue_len(struct wg_queue *q)
1911{
1912	size_t len;
1913	mtx_enter(&q->q_mtx);
1914	len = q->q_list.ml_len;
1915	mtx_leave(&q->q_mtx);
1916	return len;
1917}
1918
1919struct noise_remote *
1920wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1921{
1922	struct wg_peer	*peer;
1923	struct wg_softc	*sc = _sc;
1924	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1925		return NULL;
1926	return &peer->p_remote;
1927}
1928
1929uint32_t
1930wg_index_set(void *_sc, struct noise_remote *remote)
1931{
1932	struct wg_peer	*peer;
1933	struct wg_softc	*sc = _sc;
1934	struct wg_index *index, *iter;
1935	uint32_t	 key;
1936
1937	/*
1938	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1939	 * guaranteed to be serialised (per remote).
1940	 */
1941	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1942	index = SLIST_FIRST(&peer->p_unused_index);
1943	KASSERT(index != NULL);
1944	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1945
1946	index->i_value = remote;
1947
1948	mtx_enter(&sc->sc_index_mtx);
1949assign_id:
1950	key = index->i_key = arc4random();
1951	key &= sc->sc_index_mask;
1952	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1953		if (iter->i_key == index->i_key)
1954			goto assign_id;
1955
1956	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1957
1958	mtx_leave(&sc->sc_index_mtx);
1959
1960	/* Likewise, no need to lock for index here. */
1961	return index->i_key;
1962}
1963
1964struct noise_remote *
1965wg_index_get(void *_sc, uint32_t key0)
1966{
1967	struct wg_softc		*sc = _sc;
1968	struct wg_index		*iter;
1969	struct noise_remote	*remote = NULL;
1970	uint32_t		 key = key0 & sc->sc_index_mask;
1971
1972	mtx_enter(&sc->sc_index_mtx);
1973	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1974		if (iter->i_key == key0) {
1975			remote = iter->i_value;
1976			break;
1977		}
1978	mtx_leave(&sc->sc_index_mtx);
1979	return remote;
1980}
1981
1982void
1983wg_index_drop(void *_sc, uint32_t key0)
1984{
1985	struct wg_softc	*sc = _sc;
1986	struct wg_index	*iter;
1987	struct wg_peer	*peer = NULL;
1988	uint32_t	 key = key0 & sc->sc_index_mask;
1989
1990	mtx_enter(&sc->sc_index_mtx);
1991	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1992		if (iter->i_key == key0) {
1993			LIST_REMOVE(iter, i_entry);
1994			break;
1995		}
1996	mtx_leave(&sc->sc_index_mtx);
1997
1998	/* We expect a peer */
1999	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
2000	KASSERT(peer != NULL);
2001	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
2002}
2003
2004struct mbuf *
2005wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
2006    void *_uh, int hlen)
2007{
2008	struct wg_pkt_data	*data;
2009	struct noise_remote	*remote;
2010	struct wg_tag		*t;
2011	struct wg_softc		*sc = _sc;
2012	struct udphdr		*uh = _uh;
2013
2014	NET_ASSERT_LOCKED();
2015
2016	if ((t = wg_tag_get(m)) == NULL) {
2017		m_freem(m);
2018		return NULL;
2019	}
2020
2021	if (ip != NULL) {
2022		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2023		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2024		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2025		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2026		t->t_endpoint.e_local.l_in = ip->ip_dst;
2027#ifdef INET6
2028	} else if (ip6 != NULL) {
2029		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2030		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2031		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2032		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2033		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2034#endif
2035	} else {
2036		m_freem(m);
2037		return NULL;
2038	}
2039
2040	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2041	m_adj(m, hlen);
2042
2043	/*
2044	 * Ensure mbuf is contiguous over full length of packet. This is done
2045	 * so we can directly read the handshake values in wg_handshake, and so
2046	 * we can decrypt a transport packet by passing a single buffer to
2047	 * noise_remote_decrypt in wg_decap.
2048	 */
2049	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2050		return NULL;
2051
2052	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2053		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2054	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2055		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2056	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2057		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2058
2059		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2060			DPRINTF(sc, "Dropping handshake packet\n");
2061		task_add(wg_handshake_taskq, &sc->sc_handshake);
2062
2063	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2064	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2065
2066		data = mtod(m, struct wg_pkt_data *);
2067
2068		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2069			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2070			    p_remote);
2071			t->t_mbuf = NULL;
2072			t->t_done = 0;
2073
2074			if (wg_queue_in(sc, t->t_peer, m) != 0)
2075				counters_inc(sc->sc_if.if_counters,
2076				    ifc_iqdrops);
2077			task_add(wg_crypt_taskq, &sc->sc_decap);
2078		} else {
2079			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2080			m_freem(m);
2081		}
2082	} else {
2083		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2084		m_freem(m);
2085	}
2086
2087	return NULL;
2088}
2089
2090void
2091wg_qstart(struct ifqueue *ifq)
2092{
2093	struct ifnet		*ifp = ifq->ifq_if;
2094	struct wg_softc		*sc = ifp->if_softc;
2095	struct wg_peer		*peer;
2096	struct wg_tag		*t;
2097	struct mbuf		*m;
2098	SLIST_HEAD(,wg_peer)	 start_list;
2099
2100	SLIST_INIT(&start_list);
2101
2102	/*
2103	 * We should be OK to modify p_start_list, p_start_onlist in this
2104	 * function as there should only be one ifp->if_qstart invoked at a
2105	 * time.
2106	 */
2107	while ((m = ifq_dequeue(ifq)) != NULL) {
2108		t = wg_tag_get(m);
2109		peer = t->t_peer;
2110		if (mq_push(&peer->p_stage_queue, m) != 0)
2111			counters_inc(ifp->if_counters, ifc_oqdrops);
2112		if (!peer->p_start_onlist) {
2113			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2114			peer->p_start_onlist = 1;
2115		}
2116	}
2117	SLIST_FOREACH(peer, &start_list, p_start_list) {
2118		if (noise_remote_ready(&peer->p_remote) == 0)
2119			wg_queue_out(sc, peer);
2120		else
2121			wg_timers_event_want_initiation(&peer->p_timers);
2122		peer->p_start_onlist = 0;
2123	}
2124	task_add(wg_crypt_taskq, &sc->sc_encap);
2125}
2126
2127int
2128wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2129    struct rtentry *rt)
2130{
2131	struct wg_softc	*sc = ifp->if_softc;
2132	struct wg_peer	*peer;
2133	struct wg_tag	*t;
2134	int		 af, ret = EINVAL;
2135
2136	NET_ASSERT_LOCKED();
2137
2138	if ((t = wg_tag_get(m)) == NULL) {
2139		ret = ENOBUFS;
2140		goto error;
2141	}
2142
2143	m->m_pkthdr.ph_family = sa->sa_family;
2144	if (sa->sa_family == AF_INET) {
2145		peer = wg_aip_lookup(sc->sc_aip4,
2146		    &mtod(m, struct ip *)->ip_dst);
2147#ifdef INET6
2148	} else if (sa->sa_family == AF_INET6) {
2149		peer = wg_aip_lookup(sc->sc_aip6,
2150		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2151#endif
2152	} else {
2153		ret = EAFNOSUPPORT;
2154		goto error;
2155	}
2156
2157#if NBPFILTER > 0
2158	if (sc->sc_if.if_bpf)
2159		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2160		    BPF_DIRECTION_OUT);
2161#endif
2162
2163	if (peer == NULL) {
2164		ret = ENETUNREACH;
2165		goto error;
2166	}
2167
2168	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2169	if (af != AF_INET && af != AF_INET6) {
2170		DPRINTF(sc, "No valid endpoint has been configured or "
2171				"discovered for peer %llu\n", peer->p_id);
2172		ret = EDESTADDRREQ;
2173		goto error;
2174	}
2175
2176	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2177		DPRINTF(sc, "Packet looped\n");
2178		ret = ELOOP;
2179		goto error;
2180	}
2181
2182	/*
2183	 * As we hold a reference to peer in the mbuf, we can't handle a
2184	 * delayed packet without doing some refcnting. If a peer is removed
2185	 * while a delayed holds a reference, bad things will happen. For the
2186	 * time being, delayed packets are unsupported. This may be fixed with
2187	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2188	 */
2189	if (m->m_pkthdr.pf.delay > 0) {
2190		DPRINTF(sc, "PF delay unsupported\n");
2191		ret = EOPNOTSUPP;
2192		goto error;
2193	}
2194
2195	t->t_peer = peer;
2196	t->t_mbuf = NULL;
2197	t->t_done = 0;
2198	t->t_mtu = ifp->if_mtu;
2199
2200	/*
2201	 * We still have an issue with ifq that will count a packet that gets
2202	 * dropped in wg_qstart, or not encrypted. These get counted as
2203	 * ofails or oqdrops, so the packet gets counted twice.
2204	 */
2205	return if_enqueue(ifp, m);
2206error:
2207	counters_inc(ifp->if_counters, ifc_oerrors);
2208	m_freem(m);
2209	return ret;
2210}
2211
2212int
2213wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2214{
2215	struct wg_interface_io	*iface_p, iface_o;
2216	struct wg_peer_io	*peer_p, peer_o;
2217	struct wg_aip_io	*aip_p, aip_o;
2218
2219	struct wg_peer		*peer, *tpeer;
2220	struct wg_aip		*aip, *taip;
2221
2222	in_port_t		 port;
2223	int			 rtable;
2224
2225	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2226	size_t			 i, j;
2227	int			 ret, has_identity;
2228
2229	if ((ret = suser(curproc)) != 0)
2230		return ret;
2231
2232	rw_enter_write(&sc->sc_lock);
2233
2234	iface_p = data->wgd_interface;
2235	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2236		goto error;
2237
2238	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2239		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2240			wg_peer_destroy(peer);
2241
2242	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2243	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2244	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2245		if (curve25519_generate_public(public, iface_o.i_private)) {
2246			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2247				wg_peer_destroy(peer);
2248		}
2249		noise_local_lock_identity(&sc->sc_local);
2250		has_identity = noise_local_set_private(&sc->sc_local,
2251						       iface_o.i_private);
2252		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2253			noise_remote_precompute(&peer->p_remote);
2254			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2255			noise_remote_expire_current(&peer->p_remote);
2256		}
2257		cookie_checker_update(&sc->sc_cookie,
2258				      has_identity == 0 ? public : NULL);
2259		noise_local_unlock_identity(&sc->sc_local);
2260	}
2261
2262	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2263		port = htons(iface_o.i_port);
2264	else
2265		port = sc->sc_udp_port;
2266
2267	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2268		rtable = iface_o.i_rtable;
2269	else
2270		rtable = sc->sc_udp_rtable;
2271
2272	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2273		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2274			wg_peer_clear_src(peer);
2275
2276		if (sc->sc_if.if_flags & IFF_RUNNING)
2277			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2278				goto error;
2279
2280		sc->sc_udp_port = port;
2281		sc->sc_udp_rtable = rtable;
2282	}
2283
2284	peer_p = &iface_p->i_peers[0];
2285	for (i = 0; i < iface_o.i_peers_count; i++) {
2286		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2287			goto error;
2288
2289		/* Peer must have public key */
2290		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2291			goto next_peer;
2292
2293		/* 0 = latest protocol, 1 = this protocol */
2294		if (peer_o.p_protocol_version != 0) {
2295			if (peer_o.p_protocol_version > 1) {
2296				ret = EPFNOSUPPORT;
2297				goto error;
2298			}
2299		}
2300
2301		/* Get local public and check that peer key doesn't match */
2302		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2303		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2304			goto next_peer;
2305
2306		/* Lookup peer, or create if it doesn't exist */
2307		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2308			/* If we want to delete, no need creating a new one.
2309			 * Also, don't create a new one if we only want to
2310			 * update. */
2311			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2312				goto next_peer;
2313
2314			if ((peer = wg_peer_create(sc,
2315			    peer_o.p_public)) == NULL) {
2316				ret = ENOMEM;
2317				goto error;
2318			}
2319		}
2320
2321		/* Remove peer and continue if specified */
2322		if (peer_o.p_flags & WG_PEER_REMOVE) {
2323			wg_peer_destroy(peer);
2324			goto next_peer;
2325		}
2326
2327		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2328			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2329
2330		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2331			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2332
2333		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2334			wg_timers_set_persistent_keepalive(&peer->p_timers,
2335			    peer_o.p_pka);
2336
2337		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2338			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2339				wg_aip_remove(sc, peer, &aip->a_data);
2340			}
2341		}
2342
2343		if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION)
2344			strlcpy(peer->p_description, peer_o.p_description,
2345			    IFDESCRSIZE);
2346
2347		aip_p = &peer_p->p_aips[0];
2348		for (j = 0; j < peer_o.p_aips_count; j++) {
2349			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2350				goto error;
2351			ret = wg_aip_add(sc, peer, &aip_o);
2352			if (ret != 0)
2353				goto error;
2354			aip_p++;
2355		}
2356
2357		peer_p = (struct wg_peer_io *)aip_p;
2358		continue;
2359next_peer:
2360		aip_p = &peer_p->p_aips[0];
2361		aip_p += peer_o.p_aips_count;
2362		peer_p = (struct wg_peer_io *)aip_p;
2363	}
2364
2365error:
2366	rw_exit_write(&sc->sc_lock);
2367	explicit_bzero(&iface_o, sizeof(iface_o));
2368	explicit_bzero(&peer_o, sizeof(peer_o));
2369	explicit_bzero(&aip_o, sizeof(aip_o));
2370	explicit_bzero(public, sizeof(public));
2371	explicit_bzero(private, sizeof(private));
2372	return ret;
2373}
2374
2375int
2376wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2377{
2378	struct wg_interface_io	*iface_p, iface_o;
2379	struct wg_peer_io	*peer_p, peer_o;
2380	struct wg_aip_io	*aip_p;
2381
2382	struct wg_peer		*peer;
2383	struct wg_aip		*aip;
2384
2385	size_t			 size, peer_count, aip_count;
2386	int			 ret = 0, is_suser = suser(curproc) == 0;
2387
2388	size = sizeof(struct wg_interface_io);
2389	if (data->wgd_size < size && !is_suser)
2390		goto ret_size;
2391
2392	iface_p = data->wgd_interface;
2393	bzero(&iface_o, sizeof(iface_o));
2394
2395	rw_enter_read(&sc->sc_lock);
2396
2397	if (sc->sc_udp_port != 0) {
2398		iface_o.i_port = ntohs(sc->sc_udp_port);
2399		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2400	}
2401
2402	if (sc->sc_udp_rtable != 0) {
2403		iface_o.i_rtable = sc->sc_udp_rtable;
2404		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2405	}
2406
2407	if (!is_suser)
2408		goto copy_out_iface;
2409
2410	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2411	    iface_o.i_private) == 0) {
2412		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2413		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2414	}
2415
2416	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2417	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2418	if (data->wgd_size < size)
2419		goto unlock_and_ret_size;
2420
2421	peer_count = 0;
2422	peer_p = &iface_p->i_peers[0];
2423	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2424		bzero(&peer_o, sizeof(peer_o));
2425		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2426		peer_o.p_protocol_version = 1;
2427
2428		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2429		    peer_o.p_psk) == 0)
2430			peer_o.p_flags |= WG_PEER_HAS_PSK;
2431
2432		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2433		    &peer_o.p_pka) == 0)
2434			peer_o.p_flags |= WG_PEER_HAS_PKA;
2435
2436		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2437			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2438
2439		mtx_enter(&peer->p_counters_mtx);
2440		peer_o.p_txbytes = peer->p_counters_tx;
2441		peer_o.p_rxbytes = peer->p_counters_rx;
2442		mtx_leave(&peer->p_counters_mtx);
2443
2444		wg_timers_get_last_handshake(&peer->p_timers,
2445		    &peer_o.p_last_handshake);
2446
2447		aip_count = 0;
2448		aip_p = &peer_p->p_aips[0];
2449		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2450			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2451				goto unlock_and_ret_size;
2452			aip_p++;
2453			aip_count++;
2454		}
2455		peer_o.p_aips_count = aip_count;
2456
2457		strlcpy(peer_o.p_description, peer->p_description, IFDESCRSIZE);
2458
2459		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2460			goto unlock_and_ret_size;
2461
2462		peer_p = (struct wg_peer_io *)aip_p;
2463		peer_count++;
2464	}
2465	iface_o.i_peers_count = peer_count;
2466
2467copy_out_iface:
2468	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2469unlock_and_ret_size:
2470	rw_exit_read(&sc->sc_lock);
2471	explicit_bzero(&iface_o, sizeof(iface_o));
2472	explicit_bzero(&peer_o, sizeof(peer_o));
2473ret_size:
2474	data->wgd_size = size;
2475	return ret;
2476}
2477
2478int
2479wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2480{
2481	struct ifreq	*ifr = (struct ifreq *) data;
2482	struct wg_softc	*sc = ifp->if_softc;
2483	int		 ret = 0;
2484
2485	switch (cmd) {
2486	case SIOCSWG:
2487		NET_UNLOCK();
2488		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2489		NET_LOCK();
2490		break;
2491	case SIOCGWG:
2492		NET_UNLOCK();
2493		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2494		NET_LOCK();
2495		break;
2496	/* Interface IOCTLs */
2497	case SIOCSIFADDR:
2498		SET(ifp->if_flags, IFF_UP);
2499		/* FALLTHROUGH */
2500	case SIOCSIFFLAGS:
2501		if (ISSET(ifp->if_flags, IFF_UP))
2502			ret = wg_up(sc);
2503		else
2504			wg_down(sc);
2505		break;
2506	case SIOCSIFMTU:
2507		/* Arbitrary limits */
2508		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2509			ret = EINVAL;
2510		else
2511			ifp->if_mtu = ifr->ifr_mtu;
2512		break;
2513	case SIOCADDMULTI:
2514	case SIOCDELMULTI:
2515		break;
2516	default:
2517		ret = ENOTTY;
2518	}
2519
2520	return ret;
2521}
2522
2523int
2524wg_up(struct wg_softc *sc)
2525{
2526	struct wg_peer	*peer;
2527	int		 ret = 0;
2528
2529	NET_ASSERT_LOCKED();
2530	/*
2531	 * We use IFF_RUNNING as an exclusive access here. We also may want
2532	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2533	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2534	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2535	 */
2536	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2537		SET(sc->sc_if.if_flags, IFF_RUNNING);
2538		NET_UNLOCK();
2539
2540		rw_enter_write(&sc->sc_lock);
2541		/*
2542		 * If we successfully bind the socket, then enable the timers
2543		 * for the peer. This will send all staged packets and a
2544		 * keepalive if necessary.
2545		 */
2546		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2547		if (ret == 0) {
2548			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2549				wg_timers_enable(&peer->p_timers);
2550				wg_queue_out(sc, peer);
2551			}
2552		}
2553		rw_exit_write(&sc->sc_lock);
2554
2555		NET_LOCK();
2556		if (ret != 0)
2557			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2558	}
2559	return ret;
2560}
2561
2562void
2563wg_down(struct wg_softc *sc)
2564{
2565	struct wg_peer	*peer;
2566
2567	NET_ASSERT_LOCKED();
2568	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2569		return;
2570	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2571	NET_UNLOCK();
2572
2573	/*
2574	 * We only need a read lock here, as we aren't writing to anything
2575	 * that isn't granularly locked.
2576	 */
2577	rw_enter_read(&sc->sc_lock);
2578	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2579		mq_purge(&peer->p_stage_queue);
2580		wg_timers_disable(&peer->p_timers);
2581	}
2582
2583	taskq_barrier(wg_handshake_taskq);
2584	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2585		noise_remote_clear(&peer->p_remote);
2586		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2587	}
2588
2589	wg_unbind(sc);
2590	rw_exit_read(&sc->sc_lock);
2591	NET_LOCK();
2592}
2593
2594int
2595wg_clone_create(struct if_clone *ifc, int unit)
2596{
2597	struct ifnet		*ifp;
2598	struct wg_softc		*sc;
2599	struct noise_upcall	 local_upcall;
2600
2601	KERNEL_ASSERT_LOCKED();
2602
2603	if (wg_counter == 0) {
2604		wg_handshake_taskq = taskq_create("wg_handshake",
2605		    2, IPL_NET, TASKQ_MPSAFE);
2606		wg_crypt_taskq = taskq_create("wg_crypt",
2607		    ncpus, IPL_NET, TASKQ_MPSAFE);
2608
2609		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2610			if (wg_handshake_taskq != NULL)
2611				taskq_destroy(wg_handshake_taskq);
2612			if (wg_crypt_taskq != NULL)
2613				taskq_destroy(wg_crypt_taskq);
2614			wg_handshake_taskq = NULL;
2615			wg_crypt_taskq = NULL;
2616			return ENOTRECOVERABLE;
2617		}
2618	}
2619	wg_counter++;
2620
2621	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2622		goto ret_00;
2623
2624	local_upcall.u_arg = sc;
2625	local_upcall.u_remote_get = wg_remote_get;
2626	local_upcall.u_index_set = wg_index_set;
2627	local_upcall.u_index_drop = wg_index_drop;
2628
2629	TAILQ_INIT(&sc->sc_peer_seq);
2630
2631	/* sc_if is initialised after everything else */
2632	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2633
2634	rw_init(&sc->sc_lock, "wg");
2635	noise_local_init(&sc->sc_local, &local_upcall);
2636	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2637		goto ret_01;
2638	sc->sc_udp_port = 0;
2639	sc->sc_udp_rtable = 0;
2640
2641	rw_init(&sc->sc_so_lock, "wg_so");
2642	sc->sc_so4 = NULL;
2643#ifdef INET6
2644	sc->sc_so6 = NULL;
2645#endif
2646
2647	sc->sc_aip_num = 0;
2648	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2649		goto ret_02;
2650#ifdef INET6
2651	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2652		goto ret_03;
2653#endif
2654
2655	rw_init(&sc->sc_peer_lock, "wg_peer");
2656	sc->sc_peer_num = 0;
2657	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2658	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2659		goto ret_04;
2660
2661	mtx_init(&sc->sc_index_mtx, IPL_NET);
2662	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2663	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2664		goto ret_05;
2665
2666	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2667	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2668
2669	task_set(&sc->sc_encap, wg_encap_worker, sc);
2670	task_set(&sc->sc_decap, wg_decap_worker, sc);
2671
2672	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2673	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2674	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2675	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2676
2677	/* We've setup the softc, now we can setup the ifnet */
2678	ifp = &sc->sc_if;
2679	ifp->if_softc = sc;
2680
2681	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2682
2683	ifp->if_mtu = DEFAULT_MTU;
2684	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2685	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2686	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2687
2688	ifp->if_ioctl = wg_ioctl;
2689	ifp->if_qstart = wg_qstart;
2690	ifp->if_output = wg_output;
2691
2692	ifp->if_type = IFT_WIREGUARD;
2693	ifp->if_rtrequest = p2p_rtrequest;
2694
2695	if_counters_alloc(ifp);
2696	if_attach(ifp);
2697	if_alloc_sadl(ifp);
2698
2699#if NBPFILTER > 0
2700	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2701#endif
2702
2703	DPRINTF(sc, "Interface created\n");
2704
2705	return 0;
2706ret_05:
2707	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2708ret_04:
2709#ifdef INET6
2710	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2711ret_03:
2712#endif
2713	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2714ret_02:
2715	cookie_checker_deinit(&sc->sc_cookie);
2716ret_01:
2717	free(sc, M_DEVBUF, sizeof(*sc));
2718ret_00:
2719	return ENOBUFS;
2720}
2721int
2722wg_clone_destroy(struct ifnet *ifp)
2723{
2724	struct wg_softc	*sc = ifp->if_softc;
2725	struct wg_peer	*peer, *tpeer;
2726
2727	KERNEL_ASSERT_LOCKED();
2728
2729	rw_enter_write(&sc->sc_lock);
2730	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2731		wg_peer_destroy(peer);
2732	rw_exit_write(&sc->sc_lock);
2733
2734	wg_unbind(sc);
2735	if_detach(ifp);
2736
2737	wg_counter--;
2738	if (wg_counter == 0) {
2739		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2740		taskq_destroy(wg_handshake_taskq);
2741		taskq_destroy(wg_crypt_taskq);
2742		wg_handshake_taskq = NULL;
2743		wg_crypt_taskq = NULL;
2744	}
2745
2746	DPRINTF(sc, "Destroyed interface\n");
2747
2748	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2749	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2750#ifdef INET6
2751	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2752#endif
2753	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2754	cookie_checker_deinit(&sc->sc_cookie);
2755	free(sc, M_DEVBUF, sizeof(*sc));
2756	return 0;
2757}
2758
2759void
2760wgattach(int nwg)
2761{
2762#ifdef WGTEST
2763	cookie_test();
2764	noise_test();
2765#endif
2766	if_clone_attach(&wg_cloner);
2767
2768	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2769			IPL_NET, 0, "wgaip", NULL);
2770	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2771			IPL_NET, 0, "wgpeer", NULL);
2772	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2773			IPL_NET, 0, "wgratelimit", NULL);
2774}
2775