1/*	$NetBSD: if_wg.c,v 1.78 2024/03/10 04:21:47 riastradh Exp $	*/
2
3/*
4 * Copyright (C) Ryota Ozaki <ozaki.ryota@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * This network interface aims to implement the WireGuard protocol.
34 * The implementation is based on the paper of WireGuard as of
35 * 2018-06-30 [1].  The paper is referred in the source code with label
36 * [W].  Also the specification of the Noise protocol framework as of
37 * 2018-07-11 [2] is referred with label [N].
38 *
39 * [1] https://www.wireguard.com/papers/wireguard.pdf
40 * [2] http://noiseprotocol.org/noise.pdf
41 */
42
43#include <sys/cdefs.h>
44__KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.78 2024/03/10 04:21:47 riastradh Exp $");
45
46#ifdef _KERNEL_OPT
47#include "opt_altq_enabled.h"
48#include "opt_inet.h"
49#endif
50
51#include <sys/param.h>
52#include <sys/types.h>
53
54#include <sys/atomic.h>
55#include <sys/callout.h>
56#include <sys/cprng.h>
57#include <sys/cpu.h>
58#include <sys/device.h>
59#include <sys/domain.h>
60#include <sys/errno.h>
61#include <sys/intr.h>
62#include <sys/ioctl.h>
63#include <sys/kernel.h>
64#include <sys/kmem.h>
65#include <sys/mbuf.h>
66#include <sys/module.h>
67#include <sys/mutex.h>
68#include <sys/once.h>
69#include <sys/percpu.h>
70#include <sys/pserialize.h>
71#include <sys/psref.h>
72#include <sys/queue.h>
73#include <sys/rwlock.h>
74#include <sys/socket.h>
75#include <sys/socketvar.h>
76#include <sys/sockio.h>
77#include <sys/sysctl.h>
78#include <sys/syslog.h>
79#include <sys/systm.h>
80#include <sys/thmap.h>
81#include <sys/threadpool.h>
82#include <sys/time.h>
83#include <sys/timespec.h>
84#include <sys/workqueue.h>
85
86#include <net/bpf.h>
87#include <net/if.h>
88#include <net/if_types.h>
89#include <net/if_wg.h>
90#include <net/pktqueue.h>
91#include <net/route.h>
92
93#include <netinet/in.h>
94#include <netinet/in_pcb.h>
95#include <netinet/in_var.h>
96#include <netinet/ip.h>
97#include <netinet/ip_var.h>
98#include <netinet/udp.h>
99#include <netinet/udp_var.h>
100
101#ifdef INET6
102#include <netinet/ip6.h>
103#include <netinet6/in6_pcb.h>
104#include <netinet6/in6_var.h>
105#include <netinet6/ip6_var.h>
106#include <netinet6/udp6_var.h>
107#endif /* INET6 */
108
109#include <prop/proplib.h>
110
111#include <crypto/blake2/blake2s.h>
112#include <crypto/sodium/crypto_aead_chacha20poly1305.h>
113#include <crypto/sodium/crypto_aead_xchacha20poly1305.h>
114#include <crypto/sodium/crypto_scalarmult.h>
115
116#include "ioconf.h"
117
118#ifdef WG_RUMPKERNEL
119#include "wg_user.h"
120#endif
121
122/*
123 * Data structures
124 * - struct wg_softc is an instance of wg interfaces
125 *   - It has a list of peers (struct wg_peer)
126 *   - It has a threadpool job that sends/receives handshake messages and
127 *     runs event handlers
128 *   - It has its own two routing tables: one is for IPv4 and the other IPv6
129 * - struct wg_peer is a representative of a peer
130 *   - It has a struct work to handle handshakes and timer tasks
131 *   - It has a pair of session instances (struct wg_session)
132 *   - It has a pair of endpoint instances (struct wg_sockaddr)
133 *     - Normally one endpoint is used and the second one is used only on
134 *       a peer migration (a change of peer's IP address)
135 *   - It has a list of IP addresses and sub networks called allowedips
136 *     (struct wg_allowedip)
137 *     - A packets sent over a session is allowed if its destination matches
138 *       any IP addresses or sub networks of the list
139 * - struct wg_session represents a session of a secure tunnel with a peer
140 *   - Two instances of sessions belong to a peer; a stable session and a
141 *     unstable session
142 *   - A handshake process of a session always starts with a unstable instance
143 *   - Once a session is established, its instance becomes stable and the
144 *     other becomes unstable instead
145 *   - Data messages are always sent via a stable session
146 *
147 * Locking notes:
148 * - Each wg has a mutex(9) wg_lock, and a rwlock(9) wg_rwlock
149 *   - Changes to the peer list are serialized by wg_lock
150 *   - The peer list may be read with pserialize(9) and psref(9)
151 *   - The rwlock (wg_rwlock) protects the routing tables (wg_rtable_ipv[46])
152 *     => XXX replace by pserialize when routing table is psz-safe
153 * - Each peer (struct wg_peer, wgp) has a mutex wgp_lock, which can be taken
154 *   only in thread context and serializes:
155 *   - the stable and unstable session pointers
156 *   - all unstable session state
157 * - Packet processing may be done in softint context:
158 *   - The stable session can be read under pserialize(9) or psref(9)
159 *     - The stable session is always ESTABLISHED
160 *     - On a session swap, we must wait for all readers to release a
161 *       reference to a stable session before changing wgs_state and
162 *       session states
163 * - Lock order: wg_lock -> wgp_lock
164 */
165
166
167#define WGLOG(level, fmt, args...)					      \
168	log(level, "%s: " fmt, __func__, ##args)
169
170/* Debug options */
171#ifdef WG_DEBUG
172/* Output debug logs */
173#ifndef WG_DEBUG_LOG
174#define WG_DEBUG_LOG
175#endif
176/* Output trace logs */
177#ifndef WG_DEBUG_TRACE
178#define WG_DEBUG_TRACE
179#endif
180/* Output hash values, etc. */
181#ifndef WG_DEBUG_DUMP
182#define WG_DEBUG_DUMP
183#endif
184/* Make some internal parameters configurable for testing and debugging */
185#ifndef WG_DEBUG_PARAMS
186#define WG_DEBUG_PARAMS
187#endif
188#endif
189
190#ifdef WG_DEBUG_TRACE
191#define WG_TRACE(msg)							      \
192	log(LOG_DEBUG, "%s:%d: %s\n", __func__, __LINE__, (msg))
193#else
194#define WG_TRACE(msg)	__nothing
195#endif
196
197#ifdef WG_DEBUG_LOG
198#define WG_DLOG(fmt, args...)	log(LOG_DEBUG, "%s: " fmt, __func__, ##args)
199#else
200#define WG_DLOG(fmt, args...)	__nothing
201#endif
202
203#define WG_LOG_RATECHECK(wgprc, level, fmt, args...)	do {		\
204	if (ppsratecheck(&(wgprc)->wgprc_lasttime,			\
205	    &(wgprc)->wgprc_curpps, 1)) {				\
206		log(level, fmt, ##args);				\
207	}								\
208} while (0)
209
210#ifdef WG_DEBUG_PARAMS
211static bool wg_force_underload = false;
212#endif
213
214#ifdef WG_DEBUG_DUMP
215
216static char *
217gethexdump(const char *p, size_t n)
218{
219	char *buf;
220	size_t i;
221
222	if (n > SIZE_MAX/3 - 1)
223		return NULL;
224	buf = kmem_alloc(3*n + 1, KM_NOSLEEP);
225	if (buf == NULL)
226		return NULL;
227	for (i = 0; i < n; i++)
228		snprintf(buf + 3*i, 3 + 1, " %02hhx", p[i]);
229	return buf;
230}
231
232static void
233puthexdump(char *buf, const void *p, size_t n)
234{
235
236	if (buf == NULL)
237		return;
238	kmem_free(buf, 3*n + 1);
239}
240
241#ifdef WG_RUMPKERNEL
242static void
243wg_dump_buf(const char *func, const char *buf, const size_t size)
244{
245	char *hex = gethexdump(buf, size);
246
247	log(LOG_DEBUG, "%s: %s\n", func, hex ? hex : "(enomem)");
248	puthexdump(hex, buf, size);
249}
250#endif
251
252static void
253wg_dump_hash(const uint8_t *func, const uint8_t *name, const uint8_t *hash,
254    const size_t size)
255{
256	char *hex = gethexdump(hash, size);
257
258	log(LOG_DEBUG, "%s: %s: %s\n", func, name, hex ? hex : "(enomem)");
259	puthexdump(hex, hash, size);
260}
261
262#define WG_DUMP_HASH(name, hash) \
263	wg_dump_hash(__func__, name, hash, WG_HASH_LEN)
264#define WG_DUMP_HASH48(name, hash) \
265	wg_dump_hash(__func__, name, hash, 48)
266#define WG_DUMP_BUF(buf, size) \
267	wg_dump_buf(__func__, buf, size)
268#else
269#define WG_DUMP_HASH(name, hash)	__nothing
270#define WG_DUMP_HASH48(name, hash)	__nothing
271#define WG_DUMP_BUF(buf, size)	__nothing
272#endif /* WG_DEBUG_DUMP */
273
274/* chosen somewhat arbitrarily -- fits in signed 16 bits NUL-terminated */
275#define	WG_MAX_PROPLEN		32766
276
277#define WG_MTU			1420
278#define WG_ALLOWEDIPS		16
279
280#define CURVE25519_KEY_LEN	32
281#define TAI64N_LEN		sizeof(uint32_t) * 3
282#define POLY1305_AUTHTAG_LEN	16
283#define HMAC_BLOCK_LEN		64
284
285/* [N] 4.1: "DHLEN must be 32 or greater."  WireGuard chooses 32. */
286/* [N] 4.3: Hash functions */
287#define NOISE_DHLEN		32
288/* [N] 4.3: "Must be 32 or 64."  WireGuard chooses 32. */
289#define NOISE_HASHLEN		32
290#define NOISE_BLOCKLEN		64
291#define NOISE_HKDF_OUTPUT_LEN	NOISE_HASHLEN
292/* [N] 5.1: "k" */
293#define NOISE_CIPHER_KEY_LEN	32
294/*
295 * [N] 9.2: "psk"
296 *          "... psk is a 32-byte secret value provided by the application."
297 */
298#define NOISE_PRESHARED_KEY_LEN	32
299
300#define WG_STATIC_KEY_LEN	CURVE25519_KEY_LEN
301#define WG_TIMESTAMP_LEN	TAI64N_LEN
302
303#define WG_PRESHARED_KEY_LEN	NOISE_PRESHARED_KEY_LEN
304
305#define WG_COOKIE_LEN		16
306#define WG_MAC_LEN		16
307#define WG_RANDVAL_LEN		24
308
309#define WG_EPHEMERAL_KEY_LEN	CURVE25519_KEY_LEN
310/* [N] 5.2: "ck: A chaining key of HASHLEN bytes" */
311#define WG_CHAINING_KEY_LEN	NOISE_HASHLEN
312/* [N] 5.2: "h: A hash output of HASHLEN bytes" */
313#define WG_HASH_LEN		NOISE_HASHLEN
314#define WG_CIPHER_KEY_LEN	NOISE_CIPHER_KEY_LEN
315#define WG_DH_OUTPUT_LEN	NOISE_DHLEN
316#define WG_KDF_OUTPUT_LEN	NOISE_HKDF_OUTPUT_LEN
317#define WG_AUTHTAG_LEN		POLY1305_AUTHTAG_LEN
318#define WG_DATA_KEY_LEN		32
319#define WG_SALT_LEN		24
320
321/*
322 * The protocol messages
323 */
324struct wg_msg {
325	uint32_t	wgm_type;
326} __packed;
327
328/* [W] 5.4.2 First Message: Initiator to Responder */
329struct wg_msg_init {
330	uint32_t	wgmi_type;
331	uint32_t	wgmi_sender;
332	uint8_t		wgmi_ephemeral[WG_EPHEMERAL_KEY_LEN];
333	uint8_t		wgmi_static[WG_STATIC_KEY_LEN + WG_AUTHTAG_LEN];
334	uint8_t		wgmi_timestamp[WG_TIMESTAMP_LEN + WG_AUTHTAG_LEN];
335	uint8_t		wgmi_mac1[WG_MAC_LEN];
336	uint8_t		wgmi_mac2[WG_MAC_LEN];
337} __packed;
338
339/* [W] 5.4.3 Second Message: Responder to Initiator */
340struct wg_msg_resp {
341	uint32_t	wgmr_type;
342	uint32_t	wgmr_sender;
343	uint32_t	wgmr_receiver;
344	uint8_t		wgmr_ephemeral[WG_EPHEMERAL_KEY_LEN];
345	uint8_t		wgmr_empty[0 + WG_AUTHTAG_LEN];
346	uint8_t		wgmr_mac1[WG_MAC_LEN];
347	uint8_t		wgmr_mac2[WG_MAC_LEN];
348} __packed;
349
350/* [W] 5.4.6 Subsequent Messages: Transport Data Messages */
351struct wg_msg_data {
352	uint32_t	wgmd_type;
353	uint32_t	wgmd_receiver;
354	uint64_t	wgmd_counter;
355	uint32_t	wgmd_packet[0];
356} __packed;
357
358/* [W] 5.4.7 Under Load: Cookie Reply Message */
359struct wg_msg_cookie {
360	uint32_t	wgmc_type;
361	uint32_t	wgmc_receiver;
362	uint8_t		wgmc_salt[WG_SALT_LEN];
363	uint8_t		wgmc_cookie[WG_COOKIE_LEN + WG_AUTHTAG_LEN];
364} __packed;
365
366#define WG_MSG_TYPE_INIT		1
367#define WG_MSG_TYPE_RESP		2
368#define WG_MSG_TYPE_COOKIE		3
369#define WG_MSG_TYPE_DATA		4
370#define WG_MSG_TYPE_MAX			WG_MSG_TYPE_DATA
371
372/* Sliding windows */
373
374#define	SLIWIN_BITS	2048u
375#define	SLIWIN_TYPE	uint32_t
376#define	SLIWIN_BPW	NBBY*sizeof(SLIWIN_TYPE)
377#define	SLIWIN_WORDS	howmany(SLIWIN_BITS, SLIWIN_BPW)
378#define	SLIWIN_NPKT	(SLIWIN_BITS - NBBY*sizeof(SLIWIN_TYPE))
379
380struct sliwin {
381	SLIWIN_TYPE	B[SLIWIN_WORDS];
382	uint64_t	T;
383};
384
385static void
386sliwin_reset(struct sliwin *W)
387{
388
389	memset(W, 0, sizeof(*W));
390}
391
392static int
393sliwin_check_fast(const volatile struct sliwin *W, uint64_t S)
394{
395
396	/*
397	 * If it's more than one window older than the highest sequence
398	 * number we've seen, reject.
399	 */
400#ifdef __HAVE_ATOMIC64_LOADSTORE
401	if (S + SLIWIN_NPKT < atomic_load_relaxed(&W->T))
402		return EAUTH;
403#endif
404
405	/*
406	 * Otherwise, we need to take the lock to decide, so don't
407	 * reject just yet.  Caller must serialize a call to
408	 * sliwin_update in this case.
409	 */
410	return 0;
411}
412
413static int
414sliwin_update(struct sliwin *W, uint64_t S)
415{
416	unsigned word, bit;
417
418	/*
419	 * If it's more than one window older than the highest sequence
420	 * number we've seen, reject.
421	 */
422	if (S + SLIWIN_NPKT < W->T)
423		return EAUTH;
424
425	/*
426	 * If it's higher than the highest sequence number we've seen,
427	 * advance the window.
428	 */
429	if (S > W->T) {
430		uint64_t i = W->T / SLIWIN_BPW;
431		uint64_t j = S / SLIWIN_BPW;
432		unsigned k;
433
434		for (k = 0; k < MIN(j - i, SLIWIN_WORDS); k++)
435			W->B[(i + k + 1) % SLIWIN_WORDS] = 0;
436#ifdef __HAVE_ATOMIC64_LOADSTORE
437		atomic_store_relaxed(&W->T, S);
438#else
439		W->T = S;
440#endif
441	}
442
443	/* Test and set the bit -- if already set, reject.  */
444	word = (S / SLIWIN_BPW) % SLIWIN_WORDS;
445	bit = S % SLIWIN_BPW;
446	if (W->B[word] & (1UL << bit))
447		return EAUTH;
448	W->B[word] |= 1U << bit;
449
450	/* Accept!  */
451	return 0;
452}
453
454struct wg_session {
455	struct wg_peer	*wgs_peer;
456	struct psref_target
457			wgs_psref;
458
459	int		wgs_state;
460#define WGS_STATE_UNKNOWN	0
461#define WGS_STATE_INIT_ACTIVE	1
462#define WGS_STATE_INIT_PASSIVE	2
463#define WGS_STATE_ESTABLISHED	3
464#define WGS_STATE_DESTROYING	4
465
466	time_t		wgs_time_established;
467	time_t		wgs_time_last_data_sent;
468	bool		wgs_is_initiator;
469
470	uint32_t	wgs_local_index;
471	uint32_t	wgs_remote_index;
472#ifdef __HAVE_ATOMIC64_LOADSTORE
473	volatile uint64_t
474			wgs_send_counter;
475#else
476	kmutex_t	wgs_send_counter_lock;
477	uint64_t	wgs_send_counter;
478#endif
479
480	struct {
481		kmutex_t	lock;
482		struct sliwin	window;
483	}		*wgs_recvwin;
484
485	uint8_t		wgs_handshake_hash[WG_HASH_LEN];
486	uint8_t		wgs_chaining_key[WG_CHAINING_KEY_LEN];
487	uint8_t		wgs_ephemeral_key_pub[WG_EPHEMERAL_KEY_LEN];
488	uint8_t		wgs_ephemeral_key_priv[WG_EPHEMERAL_KEY_LEN];
489	uint8_t		wgs_ephemeral_key_peer[WG_EPHEMERAL_KEY_LEN];
490	uint8_t		wgs_tkey_send[WG_DATA_KEY_LEN];
491	uint8_t		wgs_tkey_recv[WG_DATA_KEY_LEN];
492};
493
494struct wg_sockaddr {
495	union {
496		struct sockaddr_storage _ss;
497		struct sockaddr _sa;
498		struct sockaddr_in _sin;
499		struct sockaddr_in6 _sin6;
500	};
501	struct psref_target	wgsa_psref;
502};
503
504#define wgsatoss(wgsa)		(&(wgsa)->_ss)
505#define wgsatosa(wgsa)		(&(wgsa)->_sa)
506#define wgsatosin(wgsa)		(&(wgsa)->_sin)
507#define wgsatosin6(wgsa)	(&(wgsa)->_sin6)
508
509#define	wgsa_family(wgsa)	(wgsatosa(wgsa)->sa_family)
510
511struct wg_peer;
512struct wg_allowedip {
513	struct radix_node	wga_nodes[2];
514	struct wg_sockaddr	_wga_sa_addr;
515	struct wg_sockaddr	_wga_sa_mask;
516#define wga_sa_addr		_wga_sa_addr._sa
517#define wga_sa_mask		_wga_sa_mask._sa
518
519	int			wga_family;
520	uint8_t			wga_cidr;
521	union {
522		struct in_addr _ip4;
523		struct in6_addr _ip6;
524	} wga_addr;
525#define wga_addr4	wga_addr._ip4
526#define wga_addr6	wga_addr._ip6
527
528	struct wg_peer		*wga_peer;
529};
530
531typedef uint8_t wg_timestamp_t[WG_TIMESTAMP_LEN];
532
533struct wg_ppsratecheck {
534	struct timeval		wgprc_lasttime;
535	int			wgprc_curpps;
536};
537
538struct wg_softc;
539struct wg_peer {
540	struct wg_softc		*wgp_sc;
541	char			wgp_name[WG_PEER_NAME_MAXLEN + 1];
542	struct pslist_entry	wgp_peerlist_entry;
543	pserialize_t		wgp_psz;
544	struct psref_target	wgp_psref;
545	kmutex_t		*wgp_lock;
546	kmutex_t		*wgp_intr_lock;
547
548	uint8_t	wgp_pubkey[WG_STATIC_KEY_LEN];
549	struct wg_sockaddr	*wgp_endpoint;
550	struct wg_sockaddr	*wgp_endpoint0;
551	volatile unsigned	wgp_endpoint_changing;
552	bool			wgp_endpoint_available;
553
554			/* The preshared key (optional) */
555	uint8_t		wgp_psk[WG_PRESHARED_KEY_LEN];
556
557	struct wg_session	*wgp_session_stable;
558	struct wg_session	*wgp_session_unstable;
559
560	/* first outgoing packet awaiting session initiation */
561	struct mbuf		*wgp_pending;
562
563	/* timestamp in big-endian */
564	wg_timestamp_t	wgp_timestamp_latest_init;
565
566	struct timespec		wgp_last_handshake_time;
567
568	callout_t		wgp_rekey_timer;
569	callout_t		wgp_handshake_timeout_timer;
570	callout_t		wgp_session_dtor_timer;
571
572	time_t			wgp_handshake_start_time;
573
574	int			wgp_n_allowedips;
575	struct wg_allowedip	wgp_allowedips[WG_ALLOWEDIPS];
576
577	time_t			wgp_latest_cookie_time;
578	uint8_t			wgp_latest_cookie[WG_COOKIE_LEN];
579	uint8_t			wgp_last_sent_mac1[WG_MAC_LEN];
580	bool			wgp_last_sent_mac1_valid;
581	uint8_t			wgp_last_sent_cookie[WG_COOKIE_LEN];
582	bool			wgp_last_sent_cookie_valid;
583
584	time_t			wgp_last_msg_received_time[WG_MSG_TYPE_MAX];
585
586	time_t			wgp_last_genrandval_time;
587	uint32_t		wgp_randval;
588
589	struct wg_ppsratecheck	wgp_ppsratecheck;
590
591	struct work		wgp_work;
592	unsigned int		wgp_tasks;
593#define WGP_TASK_SEND_INIT_MESSAGE		__BIT(0)
594#define WGP_TASK_RETRY_HANDSHAKE		__BIT(1)
595#define WGP_TASK_ESTABLISH_SESSION		__BIT(2)
596#define WGP_TASK_ENDPOINT_CHANGED		__BIT(3)
597#define WGP_TASK_SEND_KEEPALIVE_MESSAGE		__BIT(4)
598#define WGP_TASK_DESTROY_PREV_SESSION		__BIT(5)
599};
600
601struct wg_ops;
602
603struct wg_softc {
604	struct ifnet	wg_if;
605	LIST_ENTRY(wg_softc) wg_list;
606	kmutex_t	*wg_lock;
607	kmutex_t	*wg_intr_lock;
608	krwlock_t	*wg_rwlock;
609
610	uint8_t		wg_privkey[WG_STATIC_KEY_LEN];
611	uint8_t		wg_pubkey[WG_STATIC_KEY_LEN];
612
613	int		wg_npeers;
614	struct pslist_head	wg_peers;
615	struct thmap	*wg_peers_bypubkey;
616	struct thmap	*wg_peers_byname;
617	struct thmap	*wg_sessions_byindex;
618	uint16_t	wg_listen_port;
619
620	struct threadpool	*wg_threadpool;
621
622	struct threadpool_job	wg_job;
623	int			wg_upcalls;
624#define	WG_UPCALL_INET	__BIT(0)
625#define	WG_UPCALL_INET6	__BIT(1)
626
627#ifdef INET
628	struct socket		*wg_so4;
629	struct radix_node_head	*wg_rtable_ipv4;
630#endif
631#ifdef INET6
632	struct socket		*wg_so6;
633	struct radix_node_head	*wg_rtable_ipv6;
634#endif
635
636	struct wg_ppsratecheck	wg_ppsratecheck;
637
638	struct wg_ops		*wg_ops;
639
640#ifdef WG_RUMPKERNEL
641	struct wg_user		*wg_user;
642#endif
643};
644
645/* [W] 6.1 Preliminaries */
646#define WG_REKEY_AFTER_MESSAGES		(1ULL << 60)
647#define WG_REJECT_AFTER_MESSAGES	(UINT64_MAX - (1 << 13))
648#define WG_REKEY_AFTER_TIME		120
649#define WG_REJECT_AFTER_TIME		180
650#define WG_REKEY_ATTEMPT_TIME		 90
651#define WG_REKEY_TIMEOUT		  5
652#define WG_KEEPALIVE_TIMEOUT		 10
653
654#define WG_COOKIE_TIME			120
655#define WG_RANDVAL_TIME			(2 * 60)
656
657static uint64_t wg_rekey_after_messages = WG_REKEY_AFTER_MESSAGES;
658static uint64_t wg_reject_after_messages = WG_REJECT_AFTER_MESSAGES;
659static unsigned wg_rekey_after_time = WG_REKEY_AFTER_TIME;
660static unsigned wg_reject_after_time = WG_REJECT_AFTER_TIME;
661static unsigned wg_rekey_attempt_time = WG_REKEY_ATTEMPT_TIME;
662static unsigned wg_rekey_timeout = WG_REKEY_TIMEOUT;
663static unsigned wg_keepalive_timeout = WG_KEEPALIVE_TIMEOUT;
664
665static struct mbuf *
666		wg_get_mbuf(size_t, size_t);
667
668static int	wg_send_data_msg(struct wg_peer *, struct wg_session *,
669		    struct mbuf *);
670static int	wg_send_cookie_msg(struct wg_softc *, struct wg_peer *,
671		    const uint32_t, const uint8_t [WG_MAC_LEN],
672		    const struct sockaddr *);
673static int	wg_send_handshake_msg_resp(struct wg_softc *, struct wg_peer *,
674		    struct wg_session *, const struct wg_msg_init *);
675static void	wg_send_keepalive_msg(struct wg_peer *, struct wg_session *);
676
677static struct wg_peer *
678		wg_pick_peer_by_sa(struct wg_softc *, const struct sockaddr *,
679		    struct psref *);
680static struct wg_peer *
681		wg_lookup_peer_by_pubkey(struct wg_softc *,
682		    const uint8_t [WG_STATIC_KEY_LEN], struct psref *);
683
684static struct wg_session *
685		wg_lookup_session_by_index(struct wg_softc *,
686		    const uint32_t, struct psref *);
687
688static void	wg_update_endpoint_if_necessary(struct wg_peer *,
689		    const struct sockaddr *);
690
691static void	wg_schedule_rekey_timer(struct wg_peer *);
692static void	wg_schedule_session_dtor_timer(struct wg_peer *);
693
694static bool	wg_is_underload(struct wg_softc *, struct wg_peer *, int);
695static void	wg_calculate_keys(struct wg_session *, const bool);
696
697static void	wg_clear_states(struct wg_session *);
698
699static void	wg_get_peer(struct wg_peer *, struct psref *);
700static void	wg_put_peer(struct wg_peer *, struct psref *);
701
702static int	wg_send_so(struct wg_peer *, struct mbuf *);
703static int	wg_send_udp(struct wg_peer *, struct mbuf *);
704static int	wg_output(struct ifnet *, struct mbuf *,
705			   const struct sockaddr *, const struct rtentry *);
706static void	wg_input(struct ifnet *, struct mbuf *, const int);
707static int	wg_ioctl(struct ifnet *, u_long, void *);
708static int	wg_bind_port(struct wg_softc *, const uint16_t);
709static int	wg_init(struct ifnet *);
710#ifdef ALTQ
711static void	wg_start(struct ifnet *);
712#endif
713static void	wg_stop(struct ifnet *, int);
714
715static void	wg_peer_work(struct work *, void *);
716static void	wg_job(struct threadpool_job *);
717static void	wgintr(void *);
718static void	wg_purge_pending_packets(struct wg_peer *);
719
720static int	wg_clone_create(struct if_clone *, int);
721static int	wg_clone_destroy(struct ifnet *);
722
723struct wg_ops {
724	int (*send_hs_msg)(struct wg_peer *, struct mbuf *);
725	int (*send_data_msg)(struct wg_peer *, struct mbuf *);
726	void (*input)(struct ifnet *, struct mbuf *, const int);
727	int (*bind_port)(struct wg_softc *, const uint16_t);
728};
729
730struct wg_ops wg_ops_rumpkernel = {
731	.send_hs_msg	= wg_send_so,
732	.send_data_msg	= wg_send_udp,
733	.input		= wg_input,
734	.bind_port	= wg_bind_port,
735};
736
737#ifdef WG_RUMPKERNEL
738static bool	wg_user_mode(struct wg_softc *);
739static int	wg_ioctl_linkstr(struct wg_softc *, struct ifdrv *);
740
741static int	wg_send_user(struct wg_peer *, struct mbuf *);
742static void	wg_input_user(struct ifnet *, struct mbuf *, const int);
743static int	wg_bind_port_user(struct wg_softc *, const uint16_t);
744
745struct wg_ops wg_ops_rumpuser = {
746	.send_hs_msg	= wg_send_user,
747	.send_data_msg	= wg_send_user,
748	.input		= wg_input_user,
749	.bind_port	= wg_bind_port_user,
750};
751#endif
752
753#define WG_PEER_READER_FOREACH(wgp, wg)					\
754	PSLIST_READER_FOREACH((wgp), &(wg)->wg_peers, struct wg_peer,	\
755	    wgp_peerlist_entry)
756#define WG_PEER_WRITER_FOREACH(wgp, wg)					\
757	PSLIST_WRITER_FOREACH((wgp), &(wg)->wg_peers, struct wg_peer,	\
758	    wgp_peerlist_entry)
759#define WG_PEER_WRITER_INSERT_HEAD(wgp, wg)				\
760	PSLIST_WRITER_INSERT_HEAD(&(wg)->wg_peers, (wgp), wgp_peerlist_entry)
761#define WG_PEER_WRITER_REMOVE(wgp)					\
762	PSLIST_WRITER_REMOVE((wgp), wgp_peerlist_entry)
763
764struct wg_route {
765	struct radix_node	wgr_nodes[2];
766	struct wg_peer		*wgr_peer;
767};
768
769static struct radix_node_head *
770wg_rnh(struct wg_softc *wg, const int family)
771{
772
773	switch (family) {
774		case AF_INET:
775			return wg->wg_rtable_ipv4;
776#ifdef INET6
777		case AF_INET6:
778			return wg->wg_rtable_ipv6;
779#endif
780		default:
781			return NULL;
782	}
783}
784
785
786/*
787 * Global variables
788 */
789static volatile unsigned wg_count __cacheline_aligned;
790
791struct psref_class *wg_psref_class __read_mostly;
792
793static struct if_clone wg_cloner =
794    IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
795
796static struct pktqueue *wg_pktq __read_mostly;
797static struct workqueue *wg_wq __read_mostly;
798
799void wgattach(int);
800/* ARGSUSED */
801void
802wgattach(int count)
803{
804	/*
805	 * Nothing to do here, initialization is handled by the
806	 * module initialization code in wginit() below).
807	 */
808}
809
810static void
811wginit(void)
812{
813
814	wg_psref_class = psref_class_create("wg", IPL_SOFTNET);
815
816	if_clone_attach(&wg_cloner);
817}
818
819/*
820 * XXX Kludge: This should just happen in wginit, but workqueue_create
821 * cannot be run until after CPUs have been detected, and wginit runs
822 * before configure.
823 */
824static int
825wginitqueues(void)
826{
827	int error __diagused;
828
829	wg_pktq = pktq_create(IFQ_MAXLEN, wgintr, NULL);
830	KASSERT(wg_pktq != NULL);
831
832	error = workqueue_create(&wg_wq, "wgpeer", wg_peer_work, NULL,
833	    PRI_NONE, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
834	KASSERT(error == 0);
835
836	return 0;
837}
838
839static void
840wg_guarantee_initialized(void)
841{
842	static ONCE_DECL(init);
843	int error __diagused;
844
845	error = RUN_ONCE(&init, wginitqueues);
846	KASSERT(error == 0);
847}
848
849static int
850wg_count_inc(void)
851{
852	unsigned o, n;
853
854	do {
855		o = atomic_load_relaxed(&wg_count);
856		if (o == UINT_MAX)
857			return ENFILE;
858		n = o + 1;
859	} while (atomic_cas_uint(&wg_count, o, n) != o);
860
861	return 0;
862}
863
864static void
865wg_count_dec(void)
866{
867	unsigned c __diagused;
868
869	c = atomic_dec_uint_nv(&wg_count);
870	KASSERT(c != UINT_MAX);
871}
872
873static int
874wgdetach(void)
875{
876
877	/* Prevent new interface creation.  */
878	if_clone_detach(&wg_cloner);
879
880	/* Check whether there are any existing interfaces.  */
881	if (atomic_load_relaxed(&wg_count)) {
882		/* Back out -- reattach the cloner.  */
883		if_clone_attach(&wg_cloner);
884		return EBUSY;
885	}
886
887	/* No interfaces left.  Nuke it.  */
888	workqueue_destroy(wg_wq);
889	pktq_destroy(wg_pktq);
890	psref_class_destroy(wg_psref_class);
891
892	return 0;
893}
894
895static void
896wg_init_key_and_hash(uint8_t ckey[WG_CHAINING_KEY_LEN],
897    uint8_t hash[WG_HASH_LEN])
898{
899	/* [W] 5.4: CONSTRUCTION */
900	const char *signature = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
901	/* [W] 5.4: IDENTIFIER */
902	const char *id = "WireGuard v1 zx2c4 Jason@zx2c4.com";
903	struct blake2s state;
904
905	blake2s(ckey, WG_CHAINING_KEY_LEN, NULL, 0,
906	    signature, strlen(signature));
907
908	CTASSERT(WG_HASH_LEN == WG_CHAINING_KEY_LEN);
909	memcpy(hash, ckey, WG_CHAINING_KEY_LEN);
910
911	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
912	blake2s_update(&state, ckey, WG_CHAINING_KEY_LEN);
913	blake2s_update(&state, id, strlen(id));
914	blake2s_final(&state, hash);
915
916	WG_DUMP_HASH("ckey", ckey);
917	WG_DUMP_HASH("hash", hash);
918}
919
920static void
921wg_algo_hash(uint8_t hash[WG_HASH_LEN], const uint8_t input[],
922    const size_t inputsize)
923{
924	struct blake2s state;
925
926	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
927	blake2s_update(&state, hash, WG_HASH_LEN);
928	blake2s_update(&state, input, inputsize);
929	blake2s_final(&state, hash);
930}
931
932static void
933wg_algo_mac(uint8_t out[], const size_t outsize,
934    const uint8_t key[], const size_t keylen,
935    const uint8_t input1[], const size_t input1len,
936    const uint8_t input2[], const size_t input2len)
937{
938	struct blake2s state;
939
940	blake2s_init(&state, outsize, key, keylen);
941
942	blake2s_update(&state, input1, input1len);
943	if (input2 != NULL)
944		blake2s_update(&state, input2, input2len);
945	blake2s_final(&state, out);
946}
947
948static void
949wg_algo_mac_mac1(uint8_t out[], const size_t outsize,
950    const uint8_t input1[], const size_t input1len,
951    const uint8_t input2[], const size_t input2len)
952{
953	struct blake2s state;
954	/* [W] 5.4: LABEL-MAC1 */
955	const char *label = "mac1----";
956	uint8_t key[WG_HASH_LEN];
957
958	blake2s_init(&state, sizeof(key), NULL, 0);
959	blake2s_update(&state, label, strlen(label));
960	blake2s_update(&state, input1, input1len);
961	blake2s_final(&state, key);
962
963	blake2s_init(&state, outsize, key, sizeof(key));
964	if (input2 != NULL)
965		blake2s_update(&state, input2, input2len);
966	blake2s_final(&state, out);
967}
968
969static void
970wg_algo_mac_cookie(uint8_t out[], const size_t outsize,
971    const uint8_t input1[], const size_t input1len)
972{
973	struct blake2s state;
974	/* [W] 5.4: LABEL-COOKIE */
975	const char *label = "cookie--";
976
977	blake2s_init(&state, outsize, NULL, 0);
978	blake2s_update(&state, label, strlen(label));
979	blake2s_update(&state, input1, input1len);
980	blake2s_final(&state, out);
981}
982
983static void
984wg_algo_generate_keypair(uint8_t pubkey[WG_EPHEMERAL_KEY_LEN],
985    uint8_t privkey[WG_EPHEMERAL_KEY_LEN])
986{
987
988	CTASSERT(WG_EPHEMERAL_KEY_LEN == crypto_scalarmult_curve25519_BYTES);
989
990	cprng_strong(kern_cprng, privkey, WG_EPHEMERAL_KEY_LEN, 0);
991	crypto_scalarmult_base(pubkey, privkey);
992}
993
994static void
995wg_algo_dh(uint8_t out[WG_DH_OUTPUT_LEN],
996    const uint8_t privkey[WG_STATIC_KEY_LEN],
997    const uint8_t pubkey[WG_STATIC_KEY_LEN])
998{
999
1000	CTASSERT(WG_STATIC_KEY_LEN == crypto_scalarmult_curve25519_BYTES);
1001
1002	int ret __diagused = crypto_scalarmult(out, privkey, pubkey);
1003	KASSERT(ret == 0);
1004}
1005
1006static void
1007wg_algo_hmac(uint8_t out[], const size_t outlen,
1008    const uint8_t key[], const size_t keylen,
1009    const uint8_t in[], const size_t inlen)
1010{
1011#define IPAD	0x36
1012#define OPAD	0x5c
1013	uint8_t hmackey[HMAC_BLOCK_LEN] = {0};
1014	uint8_t ipad[HMAC_BLOCK_LEN];
1015	uint8_t opad[HMAC_BLOCK_LEN];
1016	size_t i;
1017	struct blake2s state;
1018
1019	KASSERT(outlen == WG_HASH_LEN);
1020	KASSERT(keylen <= HMAC_BLOCK_LEN);
1021
1022	memcpy(hmackey, key, keylen);
1023
1024	for (i = 0; i < sizeof(hmackey); i++) {
1025		ipad[i] = hmackey[i] ^ IPAD;
1026		opad[i] = hmackey[i] ^ OPAD;
1027	}
1028
1029	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1030	blake2s_update(&state, ipad, sizeof(ipad));
1031	blake2s_update(&state, in, inlen);
1032	blake2s_final(&state, out);
1033
1034	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1035	blake2s_update(&state, opad, sizeof(opad));
1036	blake2s_update(&state, out, WG_HASH_LEN);
1037	blake2s_final(&state, out);
1038#undef IPAD
1039#undef OPAD
1040}
1041
1042static void
1043wg_algo_kdf(uint8_t out1[WG_KDF_OUTPUT_LEN], uint8_t out2[WG_KDF_OUTPUT_LEN],
1044    uint8_t out3[WG_KDF_OUTPUT_LEN], const uint8_t ckey[WG_CHAINING_KEY_LEN],
1045    const uint8_t input[], const size_t inputlen)
1046{
1047	uint8_t tmp1[WG_KDF_OUTPUT_LEN], tmp2[WG_KDF_OUTPUT_LEN + 1];
1048	uint8_t one[1];
1049
1050	/*
1051	 * [N] 4.3: "an input_key_material byte sequence with length
1052	 * either zero bytes, 32 bytes, or DHLEN bytes."
1053	 */
1054	KASSERT(inputlen == 0 || inputlen == 32 || inputlen == NOISE_DHLEN);
1055
1056	WG_DUMP_HASH("ckey", ckey);
1057	if (input != NULL)
1058		WG_DUMP_HASH("input", input);
1059	wg_algo_hmac(tmp1, sizeof(tmp1), ckey, WG_CHAINING_KEY_LEN,
1060	    input, inputlen);
1061	WG_DUMP_HASH("tmp1", tmp1);
1062	one[0] = 1;
1063	wg_algo_hmac(out1, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1064	    one, sizeof(one));
1065	WG_DUMP_HASH("out1", out1);
1066	if (out2 == NULL)
1067		return;
1068	memcpy(tmp2, out1, WG_KDF_OUTPUT_LEN);
1069	tmp2[WG_KDF_OUTPUT_LEN] = 2;
1070	wg_algo_hmac(out2, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1071	    tmp2, sizeof(tmp2));
1072	WG_DUMP_HASH("out2", out2);
1073	if (out3 == NULL)
1074		return;
1075	memcpy(tmp2, out2, WG_KDF_OUTPUT_LEN);
1076	tmp2[WG_KDF_OUTPUT_LEN] = 3;
1077	wg_algo_hmac(out3, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1078	    tmp2, sizeof(tmp2));
1079	WG_DUMP_HASH("out3", out3);
1080}
1081
1082static void __noinline
1083wg_algo_dh_kdf(uint8_t ckey[WG_CHAINING_KEY_LEN],
1084    uint8_t cipher_key[WG_CIPHER_KEY_LEN],
1085    const uint8_t local_key[WG_STATIC_KEY_LEN],
1086    const uint8_t remote_key[WG_STATIC_KEY_LEN])
1087{
1088	uint8_t dhout[WG_DH_OUTPUT_LEN];
1089
1090	wg_algo_dh(dhout, local_key, remote_key);
1091	wg_algo_kdf(ckey, cipher_key, NULL, ckey, dhout, sizeof(dhout));
1092
1093	WG_DUMP_HASH("dhout", dhout);
1094	WG_DUMP_HASH("ckey", ckey);
1095	if (cipher_key != NULL)
1096		WG_DUMP_HASH("cipher_key", cipher_key);
1097}
1098
1099static void
1100wg_algo_aead_enc(uint8_t out[], size_t expected_outsize, const uint8_t key[],
1101    const uint64_t counter, const uint8_t plain[], const size_t plainsize,
1102    const uint8_t auth[], size_t authlen)
1103{
1104	uint8_t nonce[(32 + 64) / 8] = {0};
1105	long long unsigned int outsize;
1106	int error __diagused;
1107
1108	le64enc(&nonce[4], counter);
1109
1110	error = crypto_aead_chacha20poly1305_ietf_encrypt(out, &outsize, plain,
1111	    plainsize, auth, authlen, NULL, nonce, key);
1112	KASSERT(error == 0);
1113	KASSERT(outsize == expected_outsize);
1114}
1115
1116static int
1117wg_algo_aead_dec(uint8_t out[], size_t expected_outsize, const uint8_t key[],
1118    const uint64_t counter, const uint8_t encrypted[],
1119    const size_t encryptedsize, const uint8_t auth[], size_t authlen)
1120{
1121	uint8_t nonce[(32 + 64) / 8] = {0};
1122	long long unsigned int outsize;
1123	int error;
1124
1125	le64enc(&nonce[4], counter);
1126
1127	error = crypto_aead_chacha20poly1305_ietf_decrypt(out, &outsize, NULL,
1128	    encrypted, encryptedsize, auth, authlen, nonce, key);
1129	if (error == 0)
1130		KASSERT(outsize == expected_outsize);
1131	return error;
1132}
1133
1134static void
1135wg_algo_xaead_enc(uint8_t out[], const size_t expected_outsize,
1136    const uint8_t key[], const uint8_t plain[], const size_t plainsize,
1137    const uint8_t auth[], size_t authlen,
1138    const uint8_t nonce[WG_SALT_LEN])
1139{
1140	long long unsigned int outsize;
1141	int error __diagused;
1142
1143	CTASSERT(WG_SALT_LEN == crypto_aead_xchacha20poly1305_ietf_NPUBBYTES);
1144	error = crypto_aead_xchacha20poly1305_ietf_encrypt(out, &outsize,
1145	    plain, plainsize, auth, authlen, NULL, nonce, key);
1146	KASSERT(error == 0);
1147	KASSERT(outsize == expected_outsize);
1148}
1149
1150static int
1151wg_algo_xaead_dec(uint8_t out[], const size_t expected_outsize,
1152    const uint8_t key[], const uint8_t encrypted[], const size_t encryptedsize,
1153    const uint8_t auth[], size_t authlen,
1154    const uint8_t nonce[WG_SALT_LEN])
1155{
1156	long long unsigned int outsize;
1157	int error;
1158
1159	error = crypto_aead_xchacha20poly1305_ietf_decrypt(out, &outsize, NULL,
1160	    encrypted, encryptedsize, auth, authlen, nonce, key);
1161	if (error == 0)
1162		KASSERT(outsize == expected_outsize);
1163	return error;
1164}
1165
1166static void
1167wg_algo_tai64n(wg_timestamp_t timestamp)
1168{
1169	struct timespec ts;
1170
1171	/* FIXME strict TAI64N (https://cr.yp.to/libtai/tai64.html) */
1172	getnanotime(&ts);
1173	/* TAI64 label in external TAI64 format */
1174	be32enc(timestamp, 0x40000000U + (uint32_t)(ts.tv_sec >> 32));
1175	/* second beginning from 1970 TAI */
1176	be32enc(timestamp + 4, (uint32_t)(ts.tv_sec & 0xffffffffU));
1177	/* nanosecond in big-endian format */
1178	be32enc(timestamp + 8, (uint32_t)ts.tv_nsec);
1179}
1180
1181/*
1182 * wg_get_stable_session(wgp, psref)
1183 *
1184 *	Get a passive reference to the current stable session, or
1185 *	return NULL if there is no current stable session.
1186 *
1187 *	The pointer is always there but the session is not necessarily
1188 *	ESTABLISHED; if it is not ESTABLISHED, return NULL.  However,
1189 *	the session may transition from ESTABLISHED to DESTROYING while
1190 *	holding the passive reference.
1191 */
1192static struct wg_session *
1193wg_get_stable_session(struct wg_peer *wgp, struct psref *psref)
1194{
1195	int s;
1196	struct wg_session *wgs;
1197
1198	s = pserialize_read_enter();
1199	wgs = atomic_load_consume(&wgp->wgp_session_stable);
1200	if (__predict_false(wgs->wgs_state != WGS_STATE_ESTABLISHED))
1201		wgs = NULL;
1202	else
1203		psref_acquire(psref, &wgs->wgs_psref, wg_psref_class);
1204	pserialize_read_exit(s);
1205
1206	return wgs;
1207}
1208
1209static void
1210wg_put_session(struct wg_session *wgs, struct psref *psref)
1211{
1212
1213	psref_release(psref, &wgs->wgs_psref, wg_psref_class);
1214}
1215
1216static void
1217wg_destroy_session(struct wg_softc *wg, struct wg_session *wgs)
1218{
1219	struct wg_peer *wgp = wgs->wgs_peer;
1220	struct wg_session *wgs0 __diagused;
1221	void *garbage;
1222
1223	KASSERT(mutex_owned(wgp->wgp_lock));
1224	KASSERT(wgs->wgs_state != WGS_STATE_UNKNOWN);
1225
1226	/* Remove the session from the table.  */
1227	wgs0 = thmap_del(wg->wg_sessions_byindex,
1228	    &wgs->wgs_local_index, sizeof(wgs->wgs_local_index));
1229	KASSERT(wgs0 == wgs);
1230	garbage = thmap_stage_gc(wg->wg_sessions_byindex);
1231
1232	/* Wait for passive references to drain.  */
1233	pserialize_perform(wgp->wgp_psz);
1234	psref_target_destroy(&wgs->wgs_psref, wg_psref_class);
1235
1236	/* Free memory, zero state, and transition to UNKNOWN.  */
1237	thmap_gc(wg->wg_sessions_byindex, garbage);
1238	wg_clear_states(wgs);
1239	wgs->wgs_state = WGS_STATE_UNKNOWN;
1240}
1241
1242/*
1243 * wg_get_session_index(wg, wgs)
1244 *
1245 *	Choose a session index for wgs->wgs_local_index, and store it
1246 *	in wg's table of sessions by index.
1247 *
1248 *	wgs must be the unstable session of its peer, and must be
1249 *	transitioning out of the UNKNOWN state.
1250 */
1251static void
1252wg_get_session_index(struct wg_softc *wg, struct wg_session *wgs)
1253{
1254	struct wg_peer *wgp __diagused = wgs->wgs_peer;
1255	struct wg_session *wgs0;
1256	uint32_t index;
1257
1258	KASSERT(mutex_owned(wgp->wgp_lock));
1259	KASSERT(wgs == wgp->wgp_session_unstable);
1260	KASSERT(wgs->wgs_state == WGS_STATE_UNKNOWN);
1261
1262	do {
1263		/* Pick a uniform random index.  */
1264		index = cprng_strong32();
1265
1266		/* Try to take it.  */
1267		wgs->wgs_local_index = index;
1268		wgs0 = thmap_put(wg->wg_sessions_byindex,
1269		    &wgs->wgs_local_index, sizeof wgs->wgs_local_index, wgs);
1270
1271		/* If someone else beat us, start over.  */
1272	} while (__predict_false(wgs0 != wgs));
1273}
1274
1275/*
1276 * wg_put_session_index(wg, wgs)
1277 *
1278 *	Remove wgs from the table of sessions by index, wait for any
1279 *	passive references to drain, and transition the session to the
1280 *	UNKNOWN state.
1281 *
1282 *	wgs must be the unstable session of its peer, and must not be
1283 *	UNKNOWN or ESTABLISHED.
1284 */
1285static void
1286wg_put_session_index(struct wg_softc *wg, struct wg_session *wgs)
1287{
1288	struct wg_peer *wgp __diagused = wgs->wgs_peer;
1289
1290	KASSERT(mutex_owned(wgp->wgp_lock));
1291	KASSERT(wgs == wgp->wgp_session_unstable);
1292	KASSERT(wgs->wgs_state != WGS_STATE_UNKNOWN);
1293	KASSERT(wgs->wgs_state != WGS_STATE_ESTABLISHED);
1294
1295	wg_destroy_session(wg, wgs);
1296	psref_target_init(&wgs->wgs_psref, wg_psref_class);
1297}
1298
1299/*
1300 * Handshake patterns
1301 *
1302 * [W] 5: "These messages use the "IK" pattern from Noise"
1303 * [N] 7.5. Interactive handshake patterns (fundamental)
1304 *     "The first character refers to the initiator���s static key:"
1305 *     "I = Static key for initiator Immediately transmitted to responder,
1306 *          despite reduced or absent identity hiding"
1307 *     "The second character refers to the responder���s static key:"
1308 *     "K = Static key for responder Known to initiator"
1309 *     "IK:
1310 *        <- s
1311 *        ...
1312 *        -> e, es, s, ss
1313 *        <- e, ee, se"
1314 * [N] 9.4. Pattern modifiers
1315 *     "IKpsk2:
1316 *        <- s
1317 *        ...
1318 *        -> e, es, s, ss
1319 *        <- e, ee, se, psk"
1320 */
1321static void
1322wg_fill_msg_init(struct wg_softc *wg, struct wg_peer *wgp,
1323    struct wg_session *wgs, struct wg_msg_init *wgmi)
1324{
1325	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.2: Ci */
1326	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.2: Hi */
1327	uint8_t cipher_key[WG_CIPHER_KEY_LEN];
1328	uint8_t pubkey[WG_EPHEMERAL_KEY_LEN];
1329	uint8_t privkey[WG_EPHEMERAL_KEY_LEN];
1330
1331	KASSERT(mutex_owned(wgp->wgp_lock));
1332	KASSERT(wgs == wgp->wgp_session_unstable);
1333	KASSERT(wgs->wgs_state == WGS_STATE_INIT_ACTIVE);
1334
1335	wgmi->wgmi_type = htole32(WG_MSG_TYPE_INIT);
1336	wgmi->wgmi_sender = wgs->wgs_local_index;
1337
1338	/* [W] 5.4.2: First Message: Initiator to Responder */
1339
1340	/* Ci := HASH(CONSTRUCTION) */
1341	/* Hi := HASH(Ci || IDENTIFIER) */
1342	wg_init_key_and_hash(ckey, hash);
1343	/* Hi := HASH(Hi || Sr^pub) */
1344	wg_algo_hash(hash, wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey));
1345
1346	WG_DUMP_HASH("hash", hash);
1347
1348	/* [N] 2.2: "e" */
1349	/* Ei^priv, Ei^pub := DH-GENERATE() */
1350	wg_algo_generate_keypair(pubkey, privkey);
1351	/* Ci := KDF1(Ci, Ei^pub) */
1352	wg_algo_kdf(ckey, NULL, NULL, ckey, pubkey, sizeof(pubkey));
1353	/* msg.ephemeral := Ei^pub */
1354	memcpy(wgmi->wgmi_ephemeral, pubkey, sizeof(wgmi->wgmi_ephemeral));
1355	/* Hi := HASH(Hi || msg.ephemeral) */
1356	wg_algo_hash(hash, pubkey, sizeof(pubkey));
1357
1358	WG_DUMP_HASH("ckey", ckey);
1359	WG_DUMP_HASH("hash", hash);
1360
1361	/* [N] 2.2: "es" */
1362	/* Ci, k := KDF2(Ci, DH(Ei^priv, Sr^pub)) */
1363	wg_algo_dh_kdf(ckey, cipher_key, privkey, wgp->wgp_pubkey);
1364
1365	/* [N] 2.2: "s" */
1366	/* msg.static := AEAD(k, 0, Si^pub, Hi) */
1367	wg_algo_aead_enc(wgmi->wgmi_static, sizeof(wgmi->wgmi_static),
1368	    cipher_key, 0, wg->wg_pubkey, sizeof(wg->wg_pubkey),
1369	    hash, sizeof(hash));
1370	/* Hi := HASH(Hi || msg.static) */
1371	wg_algo_hash(hash, wgmi->wgmi_static, sizeof(wgmi->wgmi_static));
1372
1373	WG_DUMP_HASH48("wgmi_static", wgmi->wgmi_static);
1374
1375	/* [N] 2.2: "ss" */
1376	/* Ci, k := KDF2(Ci, DH(Si^priv, Sr^pub)) */
1377	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgp->wgp_pubkey);
1378
1379	/* msg.timestamp := AEAD(k, TIMESTAMP(), Hi) */
1380	wg_timestamp_t timestamp;
1381	wg_algo_tai64n(timestamp);
1382	wg_algo_aead_enc(wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp),
1383	    cipher_key, 0, timestamp, sizeof(timestamp), hash, sizeof(hash));
1384	/* Hi := HASH(Hi || msg.timestamp) */
1385	wg_algo_hash(hash, wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp));
1386
1387	/* [W] 5.4.4 Cookie MACs */
1388	wg_algo_mac_mac1(wgmi->wgmi_mac1, sizeof(wgmi->wgmi_mac1),
1389	    wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey),
1390	    (const uint8_t *)wgmi, offsetof(struct wg_msg_init, wgmi_mac1));
1391	/* Need mac1 to decrypt a cookie from a cookie message */
1392	memcpy(wgp->wgp_last_sent_mac1, wgmi->wgmi_mac1,
1393	    sizeof(wgp->wgp_last_sent_mac1));
1394	wgp->wgp_last_sent_mac1_valid = true;
1395
1396	if (wgp->wgp_latest_cookie_time == 0 ||
1397	    (time_uptime - wgp->wgp_latest_cookie_time) >= WG_COOKIE_TIME)
1398		memset(wgmi->wgmi_mac2, 0, sizeof(wgmi->wgmi_mac2));
1399	else {
1400		wg_algo_mac(wgmi->wgmi_mac2, sizeof(wgmi->wgmi_mac2),
1401		    wgp->wgp_latest_cookie, WG_COOKIE_LEN,
1402		    (const uint8_t *)wgmi,
1403		    offsetof(struct wg_msg_init, wgmi_mac2),
1404		    NULL, 0);
1405	}
1406
1407	memcpy(wgs->wgs_ephemeral_key_pub, pubkey, sizeof(pubkey));
1408	memcpy(wgs->wgs_ephemeral_key_priv, privkey, sizeof(privkey));
1409	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
1410	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
1411	WG_DLOG("%s: sender=%x\n", __func__, wgs->wgs_local_index);
1412}
1413
1414static void __noinline
1415wg_handle_msg_init(struct wg_softc *wg, const struct wg_msg_init *wgmi,
1416    const struct sockaddr *src)
1417{
1418	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.2: Ci */
1419	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.2: Hi */
1420	uint8_t cipher_key[WG_CIPHER_KEY_LEN];
1421	uint8_t peer_pubkey[WG_STATIC_KEY_LEN];
1422	struct wg_peer *wgp;
1423	struct wg_session *wgs;
1424	int error, ret;
1425	struct psref psref_peer;
1426	uint8_t mac1[WG_MAC_LEN];
1427
1428	WG_TRACE("init msg received");
1429
1430	wg_algo_mac_mac1(mac1, sizeof(mac1),
1431	    wg->wg_pubkey, sizeof(wg->wg_pubkey),
1432	    (const uint8_t *)wgmi, offsetof(struct wg_msg_init, wgmi_mac1));
1433
1434	/*
1435	 * [W] 5.3: Denial of Service Mitigation & Cookies
1436	 * "the responder, ..., must always reject messages with an invalid
1437	 *  msg.mac1"
1438	 */
1439	if (!consttime_memequal(mac1, wgmi->wgmi_mac1, sizeof(mac1))) {
1440		WG_DLOG("mac1 is invalid\n");
1441		return;
1442	}
1443
1444	/*
1445	 * [W] 5.4.2: First Message: Initiator to Responder
1446	 * "When the responder receives this message, it does the same
1447	 *  operations so that its final state variables are identical,
1448	 *  replacing the operands of the DH function to produce equivalent
1449	 *  values."
1450	 *  Note that the following comments of operations are just copies of
1451	 *  the initiator's ones.
1452	 */
1453
1454	/* Ci := HASH(CONSTRUCTION) */
1455	/* Hi := HASH(Ci || IDENTIFIER) */
1456	wg_init_key_and_hash(ckey, hash);
1457	/* Hi := HASH(Hi || Sr^pub) */
1458	wg_algo_hash(hash, wg->wg_pubkey, sizeof(wg->wg_pubkey));
1459
1460	/* [N] 2.2: "e" */
1461	/* Ci := KDF1(Ci, Ei^pub) */
1462	wg_algo_kdf(ckey, NULL, NULL, ckey, wgmi->wgmi_ephemeral,
1463	    sizeof(wgmi->wgmi_ephemeral));
1464	/* Hi := HASH(Hi || msg.ephemeral) */
1465	wg_algo_hash(hash, wgmi->wgmi_ephemeral, sizeof(wgmi->wgmi_ephemeral));
1466
1467	WG_DUMP_HASH("ckey", ckey);
1468
1469	/* [N] 2.2: "es" */
1470	/* Ci, k := KDF2(Ci, DH(Ei^priv, Sr^pub)) */
1471	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgmi->wgmi_ephemeral);
1472
1473	WG_DUMP_HASH48("wgmi_static", wgmi->wgmi_static);
1474
1475	/* [N] 2.2: "s" */
1476	/* msg.static := AEAD(k, 0, Si^pub, Hi) */
1477	error = wg_algo_aead_dec(peer_pubkey, WG_STATIC_KEY_LEN, cipher_key, 0,
1478	    wgmi->wgmi_static, sizeof(wgmi->wgmi_static), hash, sizeof(hash));
1479	if (error != 0) {
1480		WG_LOG_RATECHECK(&wg->wg_ppsratecheck, LOG_DEBUG,
1481		    "%s: wg_algo_aead_dec for secret key failed\n",
1482		    if_name(&wg->wg_if));
1483		return;
1484	}
1485	/* Hi := HASH(Hi || msg.static) */
1486	wg_algo_hash(hash, wgmi->wgmi_static, sizeof(wgmi->wgmi_static));
1487
1488	wgp = wg_lookup_peer_by_pubkey(wg, peer_pubkey, &psref_peer);
1489	if (wgp == NULL) {
1490		WG_DLOG("peer not found\n");
1491		return;
1492	}
1493
1494	/*
1495	 * Lock the peer to serialize access to cookie state.
1496	 *
1497	 * XXX Can we safely avoid holding the lock across DH?  Take it
1498	 * just to verify mac2 and then unlock/DH/lock?
1499	 */
1500	mutex_enter(wgp->wgp_lock);
1501
1502	if (__predict_false(wg_is_underload(wg, wgp, WG_MSG_TYPE_INIT))) {
1503		WG_TRACE("under load");
1504		/*
1505		 * [W] 5.3: Denial of Service Mitigation & Cookies
1506		 * "the responder, ..., and when under load may reject messages
1507		 *  with an invalid msg.mac2.  If the responder receives a
1508		 *  message with a valid msg.mac1 yet with an invalid msg.mac2,
1509		 *  and is under load, it may respond with a cookie reply
1510		 *  message"
1511		 */
1512		uint8_t zero[WG_MAC_LEN] = {0};
1513		if (consttime_memequal(wgmi->wgmi_mac2, zero, sizeof(zero))) {
1514			WG_TRACE("sending a cookie message: no cookie included");
1515			(void)wg_send_cookie_msg(wg, wgp, wgmi->wgmi_sender,
1516			    wgmi->wgmi_mac1, src);
1517			goto out;
1518		}
1519		if (!wgp->wgp_last_sent_cookie_valid) {
1520			WG_TRACE("sending a cookie message: no cookie sent ever");
1521			(void)wg_send_cookie_msg(wg, wgp, wgmi->wgmi_sender,
1522			    wgmi->wgmi_mac1, src);
1523			goto out;
1524		}
1525		uint8_t mac2[WG_MAC_LEN];
1526		wg_algo_mac(mac2, sizeof(mac2), wgp->wgp_last_sent_cookie,
1527		    WG_COOKIE_LEN, (const uint8_t *)wgmi,
1528		    offsetof(struct wg_msg_init, wgmi_mac2), NULL, 0);
1529		if (!consttime_memequal(mac2, wgmi->wgmi_mac2, sizeof(mac2))) {
1530			WG_DLOG("mac2 is invalid\n");
1531			goto out;
1532		}
1533		WG_TRACE("under load, but continue to sending");
1534	}
1535
1536	/* [N] 2.2: "ss" */
1537	/* Ci, k := KDF2(Ci, DH(Si^priv, Sr^pub)) */
1538	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgp->wgp_pubkey);
1539
1540	/* msg.timestamp := AEAD(k, TIMESTAMP(), Hi) */
1541	wg_timestamp_t timestamp;
1542	error = wg_algo_aead_dec(timestamp, sizeof(timestamp), cipher_key, 0,
1543	    wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp),
1544	    hash, sizeof(hash));
1545	if (error != 0) {
1546		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
1547		    "%s: peer %s: wg_algo_aead_dec for timestamp failed\n",
1548		    if_name(&wg->wg_if), wgp->wgp_name);
1549		goto out;
1550	}
1551	/* Hi := HASH(Hi || msg.timestamp) */
1552	wg_algo_hash(hash, wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp));
1553
1554	/*
1555	 * [W] 5.1 "The responder keeps track of the greatest timestamp
1556	 *      received per peer and discards packets containing
1557	 *      timestamps less than or equal to it."
1558	 */
1559	ret = memcmp(timestamp, wgp->wgp_timestamp_latest_init,
1560	    sizeof(timestamp));
1561	if (ret <= 0) {
1562		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
1563		    "%s: peer %s: invalid init msg: timestamp is old\n",
1564		    if_name(&wg->wg_if), wgp->wgp_name);
1565		goto out;
1566	}
1567	memcpy(wgp->wgp_timestamp_latest_init, timestamp, sizeof(timestamp));
1568
1569	/*
1570	 * Message is good -- we're committing to handle it now, unless
1571	 * we were already initiating a session.
1572	 */
1573	wgs = wgp->wgp_session_unstable;
1574	switch (wgs->wgs_state) {
1575	case WGS_STATE_UNKNOWN:		/* new session initiated by peer */
1576		wg_get_session_index(wg, wgs);
1577		break;
1578	case WGS_STATE_INIT_ACTIVE:	/* we're already initiating, drop */
1579		WG_TRACE("Session already initializing, ignoring the message");
1580		goto out;
1581	case WGS_STATE_INIT_PASSIVE:	/* peer is retrying, start over */
1582		WG_TRACE("Session already initializing, destroying old states");
1583		wg_clear_states(wgs);
1584		/* keep session index */
1585		break;
1586	case WGS_STATE_ESTABLISHED:	/* can't happen */
1587		panic("unstable session can't be established");
1588		break;
1589	case WGS_STATE_DESTROYING:	/* rekey initiated by peer */
1590		WG_TRACE("Session destroying, but force to clear");
1591		callout_stop(&wgp->wgp_session_dtor_timer);
1592		wg_clear_states(wgs);
1593		/* keep session index */
1594		break;
1595	default:
1596		panic("invalid session state: %d", wgs->wgs_state);
1597	}
1598	wgs->wgs_state = WGS_STATE_INIT_PASSIVE;
1599
1600	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
1601	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
1602	memcpy(wgs->wgs_ephemeral_key_peer, wgmi->wgmi_ephemeral,
1603	    sizeof(wgmi->wgmi_ephemeral));
1604
1605	wg_update_endpoint_if_necessary(wgp, src);
1606
1607	(void)wg_send_handshake_msg_resp(wg, wgp, wgs, wgmi);
1608
1609	wg_calculate_keys(wgs, false);
1610	wg_clear_states(wgs);
1611
1612out:
1613	mutex_exit(wgp->wgp_lock);
1614	wg_put_peer(wgp, &psref_peer);
1615}
1616
1617static struct socket *
1618wg_get_so_by_af(struct wg_softc *wg, const int af)
1619{
1620
1621	switch (af) {
1622#ifdef INET
1623	case AF_INET:
1624		return wg->wg_so4;
1625#endif
1626#ifdef INET6
1627	case AF_INET6:
1628		return wg->wg_so6;
1629#endif
1630	default:
1631		panic("wg: no such af: %d", af);
1632	}
1633}
1634
1635static struct socket *
1636wg_get_so_by_peer(struct wg_peer *wgp, struct wg_sockaddr *wgsa)
1637{
1638
1639	return wg_get_so_by_af(wgp->wgp_sc, wgsa_family(wgsa));
1640}
1641
1642static struct wg_sockaddr *
1643wg_get_endpoint_sa(struct wg_peer *wgp, struct psref *psref)
1644{
1645	struct wg_sockaddr *wgsa;
1646	int s;
1647
1648	s = pserialize_read_enter();
1649	wgsa = atomic_load_consume(&wgp->wgp_endpoint);
1650	psref_acquire(psref, &wgsa->wgsa_psref, wg_psref_class);
1651	pserialize_read_exit(s);
1652
1653	return wgsa;
1654}
1655
1656static void
1657wg_put_sa(struct wg_peer *wgp, struct wg_sockaddr *wgsa, struct psref *psref)
1658{
1659
1660	psref_release(psref, &wgsa->wgsa_psref, wg_psref_class);
1661}
1662
1663static int
1664wg_send_so(struct wg_peer *wgp, struct mbuf *m)
1665{
1666	int error;
1667	struct socket *so;
1668	struct psref psref;
1669	struct wg_sockaddr *wgsa;
1670
1671	wgsa = wg_get_endpoint_sa(wgp, &psref);
1672	so = wg_get_so_by_peer(wgp, wgsa);
1673	error = sosend(so, wgsatosa(wgsa), NULL, m, NULL, 0, curlwp);
1674	wg_put_sa(wgp, wgsa, &psref);
1675
1676	return error;
1677}
1678
1679static int
1680wg_send_handshake_msg_init(struct wg_softc *wg, struct wg_peer *wgp)
1681{
1682	int error;
1683	struct mbuf *m;
1684	struct wg_msg_init *wgmi;
1685	struct wg_session *wgs;
1686
1687	KASSERT(mutex_owned(wgp->wgp_lock));
1688
1689	wgs = wgp->wgp_session_unstable;
1690	/* XXX pull dispatch out into wg_task_send_init_message */
1691	switch (wgs->wgs_state) {
1692	case WGS_STATE_UNKNOWN:		/* new session initiated by us */
1693		wg_get_session_index(wg, wgs);
1694		break;
1695	case WGS_STATE_INIT_ACTIVE:	/* we're already initiating, stop */
1696		WG_TRACE("Session already initializing, skip starting new one");
1697		return EBUSY;
1698	case WGS_STATE_INIT_PASSIVE:	/* peer was trying -- XXX what now? */
1699		WG_TRACE("Session already initializing, destroying old states");
1700		wg_clear_states(wgs);
1701		/* keep session index */
1702		break;
1703	case WGS_STATE_ESTABLISHED:	/* can't happen */
1704		panic("unstable session can't be established");
1705		break;
1706	case WGS_STATE_DESTROYING:	/* rekey initiated by us too early */
1707		WG_TRACE("Session destroying");
1708		/* XXX should wait? */
1709		return EBUSY;
1710	}
1711	wgs->wgs_state = WGS_STATE_INIT_ACTIVE;
1712
1713	m = m_gethdr(M_WAIT, MT_DATA);
1714	if (sizeof(*wgmi) > MHLEN) {
1715		m_clget(m, M_WAIT);
1716		CTASSERT(sizeof(*wgmi) <= MCLBYTES);
1717	}
1718	m->m_pkthdr.len = m->m_len = sizeof(*wgmi);
1719	wgmi = mtod(m, struct wg_msg_init *);
1720	wg_fill_msg_init(wg, wgp, wgs, wgmi);
1721
1722	error = wg->wg_ops->send_hs_msg(wgp, m);
1723	if (error == 0) {
1724		WG_TRACE("init msg sent");
1725
1726		if (wgp->wgp_handshake_start_time == 0)
1727			wgp->wgp_handshake_start_time = time_uptime;
1728		callout_schedule(&wgp->wgp_handshake_timeout_timer,
1729		    MIN(wg_rekey_timeout, (unsigned)(INT_MAX / hz)) * hz);
1730	} else {
1731		wg_put_session_index(wg, wgs);
1732		/* Initiation failed; toss packet waiting for it if any.  */
1733		if ((m = atomic_swap_ptr(&wgp->wgp_pending, NULL)) != NULL)
1734			m_freem(m);
1735	}
1736
1737	return error;
1738}
1739
1740static void
1741wg_fill_msg_resp(struct wg_softc *wg, struct wg_peer *wgp,
1742    struct wg_session *wgs, struct wg_msg_resp *wgmr,
1743    const struct wg_msg_init *wgmi)
1744{
1745	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.3: Cr */
1746	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.3: Hr */
1747	uint8_t cipher_key[WG_KDF_OUTPUT_LEN];
1748	uint8_t pubkey[WG_EPHEMERAL_KEY_LEN];
1749	uint8_t privkey[WG_EPHEMERAL_KEY_LEN];
1750
1751	KASSERT(mutex_owned(wgp->wgp_lock));
1752	KASSERT(wgs == wgp->wgp_session_unstable);
1753	KASSERT(wgs->wgs_state == WGS_STATE_INIT_PASSIVE);
1754
1755	memcpy(hash, wgs->wgs_handshake_hash, sizeof(hash));
1756	memcpy(ckey, wgs->wgs_chaining_key, sizeof(ckey));
1757
1758	wgmr->wgmr_type = htole32(WG_MSG_TYPE_RESP);
1759	wgmr->wgmr_sender = wgs->wgs_local_index;
1760	wgmr->wgmr_receiver = wgmi->wgmi_sender;
1761
1762	/* [W] 5.4.3 Second Message: Responder to Initiator */
1763
1764	/* [N] 2.2: "e" */
1765	/* Er^priv, Er^pub := DH-GENERATE() */
1766	wg_algo_generate_keypair(pubkey, privkey);
1767	/* Cr := KDF1(Cr, Er^pub) */
1768	wg_algo_kdf(ckey, NULL, NULL, ckey, pubkey, sizeof(pubkey));
1769	/* msg.ephemeral := Er^pub */
1770	memcpy(wgmr->wgmr_ephemeral, pubkey, sizeof(wgmr->wgmr_ephemeral));
1771	/* Hr := HASH(Hr || msg.ephemeral) */
1772	wg_algo_hash(hash, pubkey, sizeof(pubkey));
1773
1774	WG_DUMP_HASH("ckey", ckey);
1775	WG_DUMP_HASH("hash", hash);
1776
1777	/* [N] 2.2: "ee" */
1778	/* Cr := KDF1(Cr, DH(Er^priv, Ei^pub)) */
1779	wg_algo_dh_kdf(ckey, NULL, privkey, wgs->wgs_ephemeral_key_peer);
1780
1781	/* [N] 2.2: "se" */
1782	/* Cr := KDF1(Cr, DH(Er^priv, Si^pub)) */
1783	wg_algo_dh_kdf(ckey, NULL, privkey, wgp->wgp_pubkey);
1784
1785	/* [N] 9.2: "psk" */
1786    {
1787	uint8_t kdfout[WG_KDF_OUTPUT_LEN];
1788	/* Cr, r, k := KDF3(Cr, Q) */
1789	wg_algo_kdf(ckey, kdfout, cipher_key, ckey, wgp->wgp_psk,
1790	    sizeof(wgp->wgp_psk));
1791	/* Hr := HASH(Hr || r) */
1792	wg_algo_hash(hash, kdfout, sizeof(kdfout));
1793    }
1794
1795	/* msg.empty := AEAD(k, 0, e, Hr) */
1796	wg_algo_aead_enc(wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty),
1797	    cipher_key, 0, NULL, 0, hash, sizeof(hash));
1798	/* Hr := HASH(Hr || msg.empty) */
1799	wg_algo_hash(hash, wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty));
1800
1801	WG_DUMP_HASH("wgmr_empty", wgmr->wgmr_empty);
1802
1803	/* [W] 5.4.4: Cookie MACs */
1804	/* msg.mac1 := MAC(HASH(LABEL-MAC1 || Sm'^pub), msg_a) */
1805	wg_algo_mac_mac1(wgmr->wgmr_mac1, sizeof(wgmi->wgmi_mac1),
1806	    wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey),
1807	    (const uint8_t *)wgmr, offsetof(struct wg_msg_resp, wgmr_mac1));
1808	/* Need mac1 to decrypt a cookie from a cookie message */
1809	memcpy(wgp->wgp_last_sent_mac1, wgmr->wgmr_mac1,
1810	    sizeof(wgp->wgp_last_sent_mac1));
1811	wgp->wgp_last_sent_mac1_valid = true;
1812
1813	if (wgp->wgp_latest_cookie_time == 0 ||
1814	    (time_uptime - wgp->wgp_latest_cookie_time) >= WG_COOKIE_TIME)
1815		/* msg.mac2 := 0^16 */
1816		memset(wgmr->wgmr_mac2, 0, sizeof(wgmr->wgmr_mac2));
1817	else {
1818		/* msg.mac2 := MAC(Lm, msg_b) */
1819		wg_algo_mac(wgmr->wgmr_mac2, sizeof(wgmi->wgmi_mac2),
1820		    wgp->wgp_latest_cookie, WG_COOKIE_LEN,
1821		    (const uint8_t *)wgmr,
1822		    offsetof(struct wg_msg_resp, wgmr_mac2),
1823		    NULL, 0);
1824	}
1825
1826	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
1827	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
1828	memcpy(wgs->wgs_ephemeral_key_pub, pubkey, sizeof(pubkey));
1829	memcpy(wgs->wgs_ephemeral_key_priv, privkey, sizeof(privkey));
1830	wgs->wgs_remote_index = wgmi->wgmi_sender;
1831	WG_DLOG("sender=%x\n", wgs->wgs_local_index);
1832	WG_DLOG("receiver=%x\n", wgs->wgs_remote_index);
1833}
1834
1835static void
1836wg_swap_sessions(struct wg_peer *wgp)
1837{
1838	struct wg_session *wgs, *wgs_prev;
1839
1840	KASSERT(mutex_owned(wgp->wgp_lock));
1841
1842	wgs = wgp->wgp_session_unstable;
1843	KASSERT(wgs->wgs_state == WGS_STATE_ESTABLISHED);
1844
1845	wgs_prev = wgp->wgp_session_stable;
1846	KASSERT(wgs_prev->wgs_state == WGS_STATE_ESTABLISHED ||
1847	    wgs_prev->wgs_state == WGS_STATE_UNKNOWN);
1848	atomic_store_release(&wgp->wgp_session_stable, wgs);
1849	wgp->wgp_session_unstable = wgs_prev;
1850}
1851
1852static void __noinline
1853wg_handle_msg_resp(struct wg_softc *wg, const struct wg_msg_resp *wgmr,
1854    const struct sockaddr *src)
1855{
1856	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.3: Cr */
1857	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.3: Kr */
1858	uint8_t cipher_key[WG_KDF_OUTPUT_LEN];
1859	struct wg_peer *wgp;
1860	struct wg_session *wgs;
1861	struct psref psref;
1862	int error;
1863	uint8_t mac1[WG_MAC_LEN];
1864	struct wg_session *wgs_prev;
1865	struct mbuf *m;
1866
1867	wg_algo_mac_mac1(mac1, sizeof(mac1),
1868	    wg->wg_pubkey, sizeof(wg->wg_pubkey),
1869	    (const uint8_t *)wgmr, offsetof(struct wg_msg_resp, wgmr_mac1));
1870
1871	/*
1872	 * [W] 5.3: Denial of Service Mitigation & Cookies
1873	 * "the responder, ..., must always reject messages with an invalid
1874	 *  msg.mac1"
1875	 */
1876	if (!consttime_memequal(mac1, wgmr->wgmr_mac1, sizeof(mac1))) {
1877		WG_DLOG("mac1 is invalid\n");
1878		return;
1879	}
1880
1881	WG_TRACE("resp msg received");
1882	wgs = wg_lookup_session_by_index(wg, wgmr->wgmr_receiver, &psref);
1883	if (wgs == NULL) {
1884		WG_TRACE("No session found");
1885		return;
1886	}
1887
1888	wgp = wgs->wgs_peer;
1889
1890	mutex_enter(wgp->wgp_lock);
1891
1892	/* If we weren't waiting for a handshake response, drop it.  */
1893	if (wgs->wgs_state != WGS_STATE_INIT_ACTIVE) {
1894		WG_TRACE("peer sent spurious handshake response, ignoring");
1895		goto out;
1896	}
1897
1898	if (__predict_false(wg_is_underload(wg, wgp, WG_MSG_TYPE_RESP))) {
1899		WG_TRACE("under load");
1900		/*
1901		 * [W] 5.3: Denial of Service Mitigation & Cookies
1902		 * "the responder, ..., and when under load may reject messages
1903		 *  with an invalid msg.mac2.  If the responder receives a
1904		 *  message with a valid msg.mac1 yet with an invalid msg.mac2,
1905		 *  and is under load, it may respond with a cookie reply
1906		 *  message"
1907		 */
1908		uint8_t zero[WG_MAC_LEN] = {0};
1909		if (consttime_memequal(wgmr->wgmr_mac2, zero, sizeof(zero))) {
1910			WG_TRACE("sending a cookie message: no cookie included");
1911			(void)wg_send_cookie_msg(wg, wgp, wgmr->wgmr_sender,
1912			    wgmr->wgmr_mac1, src);
1913			goto out;
1914		}
1915		if (!wgp->wgp_last_sent_cookie_valid) {
1916			WG_TRACE("sending a cookie message: no cookie sent ever");
1917			(void)wg_send_cookie_msg(wg, wgp, wgmr->wgmr_sender,
1918			    wgmr->wgmr_mac1, src);
1919			goto out;
1920		}
1921		uint8_t mac2[WG_MAC_LEN];
1922		wg_algo_mac(mac2, sizeof(mac2), wgp->wgp_last_sent_cookie,
1923		    WG_COOKIE_LEN, (const uint8_t *)wgmr,
1924		    offsetof(struct wg_msg_resp, wgmr_mac2), NULL, 0);
1925		if (!consttime_memequal(mac2, wgmr->wgmr_mac2, sizeof(mac2))) {
1926			WG_DLOG("mac2 is invalid\n");
1927			goto out;
1928		}
1929		WG_TRACE("under load, but continue to sending");
1930	}
1931
1932	memcpy(hash, wgs->wgs_handshake_hash, sizeof(hash));
1933	memcpy(ckey, wgs->wgs_chaining_key, sizeof(ckey));
1934
1935	/*
1936	 * [W] 5.4.3 Second Message: Responder to Initiator
1937	 * "When the initiator receives this message, it does the same
1938	 *  operations so that its final state variables are identical,
1939	 *  replacing the operands of the DH function to produce equivalent
1940	 *  values."
1941	 *  Note that the following comments of operations are just copies of
1942	 *  the initiator's ones.
1943	 */
1944
1945	/* [N] 2.2: "e" */
1946	/* Cr := KDF1(Cr, Er^pub) */
1947	wg_algo_kdf(ckey, NULL, NULL, ckey, wgmr->wgmr_ephemeral,
1948	    sizeof(wgmr->wgmr_ephemeral));
1949	/* Hr := HASH(Hr || msg.ephemeral) */
1950	wg_algo_hash(hash, wgmr->wgmr_ephemeral, sizeof(wgmr->wgmr_ephemeral));
1951
1952	WG_DUMP_HASH("ckey", ckey);
1953	WG_DUMP_HASH("hash", hash);
1954
1955	/* [N] 2.2: "ee" */
1956	/* Cr := KDF1(Cr, DH(Er^priv, Ei^pub)) */
1957	wg_algo_dh_kdf(ckey, NULL, wgs->wgs_ephemeral_key_priv,
1958	    wgmr->wgmr_ephemeral);
1959
1960	/* [N] 2.2: "se" */
1961	/* Cr := KDF1(Cr, DH(Er^priv, Si^pub)) */
1962	wg_algo_dh_kdf(ckey, NULL, wg->wg_privkey, wgmr->wgmr_ephemeral);
1963
1964	/* [N] 9.2: "psk" */
1965    {
1966	uint8_t kdfout[WG_KDF_OUTPUT_LEN];
1967	/* Cr, r, k := KDF3(Cr, Q) */
1968	wg_algo_kdf(ckey, kdfout, cipher_key, ckey, wgp->wgp_psk,
1969	    sizeof(wgp->wgp_psk));
1970	/* Hr := HASH(Hr || r) */
1971	wg_algo_hash(hash, kdfout, sizeof(kdfout));
1972    }
1973
1974    {
1975	uint8_t out[sizeof(wgmr->wgmr_empty)]; /* for safety */
1976	/* msg.empty := AEAD(k, 0, e, Hr) */
1977	error = wg_algo_aead_dec(out, 0, cipher_key, 0, wgmr->wgmr_empty,
1978	    sizeof(wgmr->wgmr_empty), hash, sizeof(hash));
1979	WG_DUMP_HASH("wgmr_empty", wgmr->wgmr_empty);
1980	if (error != 0) {
1981		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
1982		    "%s: peer %s: wg_algo_aead_dec for empty message failed\n",
1983		    if_name(&wg->wg_if), wgp->wgp_name);
1984		goto out;
1985	}
1986	/* Hr := HASH(Hr || msg.empty) */
1987	wg_algo_hash(hash, wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty));
1988    }
1989
1990	memcpy(wgs->wgs_handshake_hash, hash, sizeof(wgs->wgs_handshake_hash));
1991	memcpy(wgs->wgs_chaining_key, ckey, sizeof(wgs->wgs_chaining_key));
1992	wgs->wgs_remote_index = wgmr->wgmr_sender;
1993	WG_DLOG("receiver=%x\n", wgs->wgs_remote_index);
1994
1995	KASSERT(wgs->wgs_state == WGS_STATE_INIT_ACTIVE);
1996	wgs->wgs_state = WGS_STATE_ESTABLISHED;
1997	wgs->wgs_time_established = time_uptime;
1998	wgs->wgs_time_last_data_sent = 0;
1999	wgs->wgs_is_initiator = true;
2000	wg_calculate_keys(wgs, true);
2001	wg_clear_states(wgs);
2002	WG_TRACE("WGS_STATE_ESTABLISHED");
2003
2004	callout_stop(&wgp->wgp_handshake_timeout_timer);
2005
2006	wg_swap_sessions(wgp);
2007	KASSERT(wgs == wgp->wgp_session_stable);
2008	wgs_prev = wgp->wgp_session_unstable;
2009	getnanotime(&wgp->wgp_last_handshake_time);
2010	wgp->wgp_handshake_start_time = 0;
2011	wgp->wgp_last_sent_mac1_valid = false;
2012	wgp->wgp_last_sent_cookie_valid = false;
2013
2014	wg_schedule_rekey_timer(wgp);
2015
2016	wg_update_endpoint_if_necessary(wgp, src);
2017
2018	/*
2019	 * If we had a data packet queued up, send it; otherwise send a
2020	 * keepalive message -- either way we have to send something
2021	 * immediately or else the responder will never answer.
2022	 */
2023	if ((m = atomic_swap_ptr(&wgp->wgp_pending, NULL)) != NULL) {
2024		kpreempt_disable();
2025		const uint32_t h = curcpu()->ci_index; // pktq_rps_hash(m)
2026		M_SETCTX(m, wgp);
2027		if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
2028			WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
2029			    if_name(&wg->wg_if));
2030			m_freem(m);
2031		}
2032		kpreempt_enable();
2033	} else {
2034		wg_send_keepalive_msg(wgp, wgs);
2035	}
2036
2037	if (wgs_prev->wgs_state == WGS_STATE_ESTABLISHED) {
2038		/* Wait for wg_get_stable_session to drain.  */
2039		pserialize_perform(wgp->wgp_psz);
2040
2041		/* Transition ESTABLISHED->DESTROYING.  */
2042		wgs_prev->wgs_state = WGS_STATE_DESTROYING;
2043
2044		/* We can't destroy the old session immediately */
2045		wg_schedule_session_dtor_timer(wgp);
2046	} else {
2047		KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN,
2048		    "state=%d", wgs_prev->wgs_state);
2049	}
2050
2051out:
2052	mutex_exit(wgp->wgp_lock);
2053	wg_put_session(wgs, &psref);
2054}
2055
2056static int
2057wg_send_handshake_msg_resp(struct wg_softc *wg, struct wg_peer *wgp,
2058    struct wg_session *wgs, const struct wg_msg_init *wgmi)
2059{
2060	int error;
2061	struct mbuf *m;
2062	struct wg_msg_resp *wgmr;
2063
2064	KASSERT(mutex_owned(wgp->wgp_lock));
2065	KASSERT(wgs == wgp->wgp_session_unstable);
2066	KASSERT(wgs->wgs_state == WGS_STATE_INIT_PASSIVE);
2067
2068	m = m_gethdr(M_WAIT, MT_DATA);
2069	if (sizeof(*wgmr) > MHLEN) {
2070		m_clget(m, M_WAIT);
2071		CTASSERT(sizeof(*wgmr) <= MCLBYTES);
2072	}
2073	m->m_pkthdr.len = m->m_len = sizeof(*wgmr);
2074	wgmr = mtod(m, struct wg_msg_resp *);
2075	wg_fill_msg_resp(wg, wgp, wgs, wgmr, wgmi);
2076
2077	error = wg->wg_ops->send_hs_msg(wgp, m);
2078	if (error == 0)
2079		WG_TRACE("resp msg sent");
2080	return error;
2081}
2082
2083static struct wg_peer *
2084wg_lookup_peer_by_pubkey(struct wg_softc *wg,
2085    const uint8_t pubkey[WG_STATIC_KEY_LEN], struct psref *psref)
2086{
2087	struct wg_peer *wgp;
2088
2089	int s = pserialize_read_enter();
2090	wgp = thmap_get(wg->wg_peers_bypubkey, pubkey, WG_STATIC_KEY_LEN);
2091	if (wgp != NULL)
2092		wg_get_peer(wgp, psref);
2093	pserialize_read_exit(s);
2094
2095	return wgp;
2096}
2097
2098static void
2099wg_fill_msg_cookie(struct wg_softc *wg, struct wg_peer *wgp,
2100    struct wg_msg_cookie *wgmc, const uint32_t sender,
2101    const uint8_t mac1[WG_MAC_LEN], const struct sockaddr *src)
2102{
2103	uint8_t cookie[WG_COOKIE_LEN];
2104	uint8_t key[WG_HASH_LEN];
2105	uint8_t addr[sizeof(struct in6_addr)];
2106	size_t addrlen;
2107	uint16_t uh_sport; /* be */
2108
2109	KASSERT(mutex_owned(wgp->wgp_lock));
2110
2111	wgmc->wgmc_type = htole32(WG_MSG_TYPE_COOKIE);
2112	wgmc->wgmc_receiver = sender;
2113	cprng_fast(wgmc->wgmc_salt, sizeof(wgmc->wgmc_salt));
2114
2115	/*
2116	 * [W] 5.4.7: Under Load: Cookie Reply Message
2117	 * "The secret variable, Rm, changes every two minutes to a
2118	 * random value"
2119	 */
2120	if ((time_uptime - wgp->wgp_last_genrandval_time) > WG_RANDVAL_TIME) {
2121		wgp->wgp_randval = cprng_strong32();
2122		wgp->wgp_last_genrandval_time = time_uptime;
2123	}
2124
2125	switch (src->sa_family) {
2126	case AF_INET: {
2127		const struct sockaddr_in *sin = satocsin(src);
2128		addrlen = sizeof(sin->sin_addr);
2129		memcpy(addr, &sin->sin_addr, addrlen);
2130		uh_sport = sin->sin_port;
2131		break;
2132	    }
2133#ifdef INET6
2134	case AF_INET6: {
2135		const struct sockaddr_in6 *sin6 = satocsin6(src);
2136		addrlen = sizeof(sin6->sin6_addr);
2137		memcpy(addr, &sin6->sin6_addr, addrlen);
2138		uh_sport = sin6->sin6_port;
2139		break;
2140	    }
2141#endif
2142	default:
2143		panic("invalid af=%d", src->sa_family);
2144	}
2145
2146	wg_algo_mac(cookie, sizeof(cookie),
2147	    (const uint8_t *)&wgp->wgp_randval, sizeof(wgp->wgp_randval),
2148	    addr, addrlen, (const uint8_t *)&uh_sport, sizeof(uh_sport));
2149	wg_algo_mac_cookie(key, sizeof(key), wg->wg_pubkey,
2150	    sizeof(wg->wg_pubkey));
2151	wg_algo_xaead_enc(wgmc->wgmc_cookie, sizeof(wgmc->wgmc_cookie), key,
2152	    cookie, sizeof(cookie), mac1, WG_MAC_LEN, wgmc->wgmc_salt);
2153
2154	/* Need to store to calculate mac2 */
2155	memcpy(wgp->wgp_last_sent_cookie, cookie, sizeof(cookie));
2156	wgp->wgp_last_sent_cookie_valid = true;
2157}
2158
2159static int
2160wg_send_cookie_msg(struct wg_softc *wg, struct wg_peer *wgp,
2161    const uint32_t sender, const uint8_t mac1[WG_MAC_LEN],
2162    const struct sockaddr *src)
2163{
2164	int error;
2165	struct mbuf *m;
2166	struct wg_msg_cookie *wgmc;
2167
2168	KASSERT(mutex_owned(wgp->wgp_lock));
2169
2170	m = m_gethdr(M_WAIT, MT_DATA);
2171	if (sizeof(*wgmc) > MHLEN) {
2172		m_clget(m, M_WAIT);
2173		CTASSERT(sizeof(*wgmc) <= MCLBYTES);
2174	}
2175	m->m_pkthdr.len = m->m_len = sizeof(*wgmc);
2176	wgmc = mtod(m, struct wg_msg_cookie *);
2177	wg_fill_msg_cookie(wg, wgp, wgmc, sender, mac1, src);
2178
2179	error = wg->wg_ops->send_hs_msg(wgp, m);
2180	if (error == 0)
2181		WG_TRACE("cookie msg sent");
2182	return error;
2183}
2184
2185static bool
2186wg_is_underload(struct wg_softc *wg, struct wg_peer *wgp, int msgtype)
2187{
2188#ifdef WG_DEBUG_PARAMS
2189	if (wg_force_underload)
2190		return true;
2191#endif
2192
2193	/*
2194	 * XXX we don't have a means of a load estimation.  The purpose of
2195	 * the mechanism is a DoS mitigation, so we consider frequent handshake
2196	 * messages as (a kind of) load; if a message of the same type comes
2197	 * to a peer within 1 second, we consider we are under load.
2198	 */
2199	time_t last = wgp->wgp_last_msg_received_time[msgtype];
2200	wgp->wgp_last_msg_received_time[msgtype] = time_uptime;
2201	return (time_uptime - last) == 0;
2202}
2203
2204static void
2205wg_calculate_keys(struct wg_session *wgs, const bool initiator)
2206{
2207
2208	KASSERT(mutex_owned(wgs->wgs_peer->wgp_lock));
2209
2210	/*
2211	 * [W] 5.4.5: Ti^send = Tr^recv, Ti^recv = Tr^send := KDF2(Ci = Cr, e)
2212	 */
2213	if (initiator) {
2214		wg_algo_kdf(wgs->wgs_tkey_send, wgs->wgs_tkey_recv, NULL,
2215		    wgs->wgs_chaining_key, NULL, 0);
2216	} else {
2217		wg_algo_kdf(wgs->wgs_tkey_recv, wgs->wgs_tkey_send, NULL,
2218		    wgs->wgs_chaining_key, NULL, 0);
2219	}
2220	WG_DUMP_HASH("wgs_tkey_send", wgs->wgs_tkey_send);
2221	WG_DUMP_HASH("wgs_tkey_recv", wgs->wgs_tkey_recv);
2222}
2223
2224static uint64_t
2225wg_session_get_send_counter(struct wg_session *wgs)
2226{
2227#ifdef __HAVE_ATOMIC64_LOADSTORE
2228	return atomic_load_relaxed(&wgs->wgs_send_counter);
2229#else
2230	uint64_t send_counter;
2231
2232	mutex_enter(&wgs->wgs_send_counter_lock);
2233	send_counter = wgs->wgs_send_counter;
2234	mutex_exit(&wgs->wgs_send_counter_lock);
2235
2236	return send_counter;
2237#endif
2238}
2239
2240static uint64_t
2241wg_session_inc_send_counter(struct wg_session *wgs)
2242{
2243#ifdef __HAVE_ATOMIC64_LOADSTORE
2244	return atomic_inc_64_nv(&wgs->wgs_send_counter) - 1;
2245#else
2246	uint64_t send_counter;
2247
2248	mutex_enter(&wgs->wgs_send_counter_lock);
2249	send_counter = wgs->wgs_send_counter++;
2250	mutex_exit(&wgs->wgs_send_counter_lock);
2251
2252	return send_counter;
2253#endif
2254}
2255
2256static void
2257wg_clear_states(struct wg_session *wgs)
2258{
2259
2260	KASSERT(mutex_owned(wgs->wgs_peer->wgp_lock));
2261
2262	wgs->wgs_send_counter = 0;
2263	sliwin_reset(&wgs->wgs_recvwin->window);
2264
2265#define wgs_clear(v)	explicit_memset(wgs->wgs_##v, 0, sizeof(wgs->wgs_##v))
2266	wgs_clear(handshake_hash);
2267	wgs_clear(chaining_key);
2268	wgs_clear(ephemeral_key_pub);
2269	wgs_clear(ephemeral_key_priv);
2270	wgs_clear(ephemeral_key_peer);
2271#undef wgs_clear
2272}
2273
2274static struct wg_session *
2275wg_lookup_session_by_index(struct wg_softc *wg, const uint32_t index,
2276    struct psref *psref)
2277{
2278	struct wg_session *wgs;
2279
2280	int s = pserialize_read_enter();
2281	wgs = thmap_get(wg->wg_sessions_byindex, &index, sizeof index);
2282	if (wgs != NULL) {
2283		KASSERT(atomic_load_relaxed(&wgs->wgs_state) !=
2284		    WGS_STATE_UNKNOWN);
2285		psref_acquire(psref, &wgs->wgs_psref, wg_psref_class);
2286	}
2287	pserialize_read_exit(s);
2288
2289	return wgs;
2290}
2291
2292static void
2293wg_schedule_rekey_timer(struct wg_peer *wgp)
2294{
2295	int timeout = MIN(wg_rekey_after_time, (unsigned)(INT_MAX / hz));
2296
2297	callout_schedule(&wgp->wgp_rekey_timer, timeout * hz);
2298}
2299
2300static void
2301wg_send_keepalive_msg(struct wg_peer *wgp, struct wg_session *wgs)
2302{
2303	struct mbuf *m;
2304
2305	/*
2306	 * [W] 6.5 Passive Keepalive
2307	 * "A keepalive message is simply a transport data message with
2308	 *  a zero-length encapsulated encrypted inner-packet."
2309	 */
2310	m = m_gethdr(M_WAIT, MT_DATA);
2311	wg_send_data_msg(wgp, wgs, m);
2312}
2313
2314static bool
2315wg_need_to_send_init_message(struct wg_session *wgs)
2316{
2317	/*
2318	 * [W] 6.2 Transport Message Limits
2319	 * "if a peer is the initiator of a current secure session,
2320	 *  WireGuard will send a handshake initiation message to begin
2321	 *  a new secure session ... if after receiving a transport data
2322	 *  message, the current secure session is (REJECT-AFTER-TIME ���
2323	 *  KEEPALIVE-TIMEOUT ��� REKEY-TIMEOUT) seconds old and it has
2324	 *  not yet acted upon this event."
2325	 */
2326	return wgs->wgs_is_initiator && wgs->wgs_time_last_data_sent == 0 &&
2327	    (time_uptime - wgs->wgs_time_established) >=
2328	    (wg_reject_after_time - wg_keepalive_timeout - wg_rekey_timeout);
2329}
2330
2331static void
2332wg_schedule_peer_task(struct wg_peer *wgp, unsigned int task)
2333{
2334
2335	mutex_enter(wgp->wgp_intr_lock);
2336	WG_DLOG("tasks=%d, task=%d\n", wgp->wgp_tasks, task);
2337	if (wgp->wgp_tasks == 0)
2338		/*
2339		 * XXX If the current CPU is already loaded -- e.g., if
2340		 * there's already a bunch of handshakes queued up --
2341		 * consider tossing this over to another CPU to
2342		 * distribute the load.
2343		 */
2344		workqueue_enqueue(wg_wq, &wgp->wgp_work, NULL);
2345	wgp->wgp_tasks |= task;
2346	mutex_exit(wgp->wgp_intr_lock);
2347}
2348
2349static void
2350wg_change_endpoint(struct wg_peer *wgp, const struct sockaddr *new)
2351{
2352	struct wg_sockaddr *wgsa_prev;
2353
2354	WG_TRACE("Changing endpoint");
2355
2356	memcpy(wgp->wgp_endpoint0, new, new->sa_len);
2357	wgsa_prev = wgp->wgp_endpoint;
2358	atomic_store_release(&wgp->wgp_endpoint, wgp->wgp_endpoint0);
2359	wgp->wgp_endpoint0 = wgsa_prev;
2360	atomic_store_release(&wgp->wgp_endpoint_available, true);
2361
2362	wg_schedule_peer_task(wgp, WGP_TASK_ENDPOINT_CHANGED);
2363}
2364
2365static bool
2366wg_validate_inner_packet(const char *packet, size_t decrypted_len, int *af)
2367{
2368	uint16_t packet_len;
2369	const struct ip *ip;
2370
2371	if (__predict_false(decrypted_len < sizeof(struct ip)))
2372		return false;
2373
2374	ip = (const struct ip *)packet;
2375	if (ip->ip_v == 4)
2376		*af = AF_INET;
2377	else if (ip->ip_v == 6)
2378		*af = AF_INET6;
2379	else
2380		return false;
2381
2382	WG_DLOG("af=%d\n", *af);
2383
2384	switch (*af) {
2385#ifdef INET
2386	case AF_INET:
2387		packet_len = ntohs(ip->ip_len);
2388		break;
2389#endif
2390#ifdef INET6
2391	case AF_INET6: {
2392		const struct ip6_hdr *ip6;
2393
2394		if (__predict_false(decrypted_len < sizeof(struct ip6_hdr)))
2395			return false;
2396
2397		ip6 = (const struct ip6_hdr *)packet;
2398		packet_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
2399		break;
2400	}
2401#endif
2402	default:
2403		return false;
2404	}
2405
2406	WG_DLOG("packet_len=%u\n", packet_len);
2407	if (packet_len > decrypted_len)
2408		return false;
2409
2410	return true;
2411}
2412
2413static bool
2414wg_validate_route(struct wg_softc *wg, struct wg_peer *wgp_expected,
2415    int af, char *packet)
2416{
2417	struct sockaddr_storage ss;
2418	struct sockaddr *sa;
2419	struct psref psref;
2420	struct wg_peer *wgp;
2421	bool ok;
2422
2423	/*
2424	 * II CRYPTOKEY ROUTING
2425	 * "it will only accept it if its source IP resolves in the
2426	 *  table to the public key used in the secure session for
2427	 *  decrypting it."
2428	 */
2429
2430	if (af == AF_INET) {
2431		const struct ip *ip = (const struct ip *)packet;
2432		struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
2433		sockaddr_in_init(sin, &ip->ip_src, 0);
2434		sa = sintosa(sin);
2435#ifdef INET6
2436	} else {
2437		const struct ip6_hdr *ip6 = (const struct ip6_hdr *)packet;
2438		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
2439		sockaddr_in6_init(sin6, &ip6->ip6_src, 0, 0, 0);
2440		sa = sin6tosa(sin6);
2441#endif
2442	}
2443
2444	wgp = wg_pick_peer_by_sa(wg, sa, &psref);
2445	ok = (wgp == wgp_expected);
2446	if (wgp != NULL)
2447		wg_put_peer(wgp, &psref);
2448
2449	return ok;
2450}
2451
2452static void
2453wg_session_dtor_timer(void *arg)
2454{
2455	struct wg_peer *wgp = arg;
2456
2457	WG_TRACE("enter");
2458
2459	wg_schedule_peer_task(wgp, WGP_TASK_DESTROY_PREV_SESSION);
2460}
2461
2462static void
2463wg_schedule_session_dtor_timer(struct wg_peer *wgp)
2464{
2465
2466	/* 1 second grace period */
2467	callout_schedule(&wgp->wgp_session_dtor_timer, hz);
2468}
2469
2470static bool
2471sockaddr_port_match(const struct sockaddr *sa1, const struct sockaddr *sa2)
2472{
2473	if (sa1->sa_family != sa2->sa_family)
2474		return false;
2475
2476	switch (sa1->sa_family) {
2477#ifdef INET
2478	case AF_INET:
2479		return satocsin(sa1)->sin_port == satocsin(sa2)->sin_port;
2480#endif
2481#ifdef INET6
2482	case AF_INET6:
2483		return satocsin6(sa1)->sin6_port == satocsin6(sa2)->sin6_port;
2484#endif
2485	default:
2486		return false;
2487	}
2488}
2489
2490static void
2491wg_update_endpoint_if_necessary(struct wg_peer *wgp,
2492    const struct sockaddr *src)
2493{
2494	struct wg_sockaddr *wgsa;
2495	struct psref psref;
2496
2497	wgsa = wg_get_endpoint_sa(wgp, &psref);
2498
2499#ifdef WG_DEBUG_LOG
2500	char oldaddr[128], newaddr[128];
2501	sockaddr_format(wgsatosa(wgsa), oldaddr, sizeof(oldaddr));
2502	sockaddr_format(src, newaddr, sizeof(newaddr));
2503	WG_DLOG("old=%s, new=%s\n", oldaddr, newaddr);
2504#endif
2505
2506	/*
2507	 * III: "Since the packet has authenticated correctly, the source IP of
2508	 * the outer UDP/IP packet is used to update the endpoint for peer..."
2509	 */
2510	if (__predict_false(sockaddr_cmp(src, wgsatosa(wgsa)) != 0 ||
2511		!sockaddr_port_match(src, wgsatosa(wgsa)))) {
2512		/* XXX We can't change the endpoint twice in a short period */
2513		if (atomic_swap_uint(&wgp->wgp_endpoint_changing, 1) == 0) {
2514			wg_change_endpoint(wgp, src);
2515		}
2516	}
2517
2518	wg_put_sa(wgp, wgsa, &psref);
2519}
2520
2521static void __noinline
2522wg_handle_msg_data(struct wg_softc *wg, struct mbuf *m,
2523    const struct sockaddr *src)
2524{
2525	struct wg_msg_data *wgmd;
2526	char *encrypted_buf = NULL, *decrypted_buf;
2527	size_t encrypted_len, decrypted_len;
2528	struct wg_session *wgs;
2529	struct wg_peer *wgp;
2530	int state;
2531	size_t mlen;
2532	struct psref psref;
2533	int error, af;
2534	bool success, free_encrypted_buf = false, ok;
2535	struct mbuf *n;
2536
2537	KASSERT(m->m_len >= sizeof(struct wg_msg_data));
2538	wgmd = mtod(m, struct wg_msg_data *);
2539
2540	KASSERT(wgmd->wgmd_type == htole32(WG_MSG_TYPE_DATA));
2541	WG_TRACE("data");
2542
2543	/* Find the putative session, or drop.  */
2544	wgs = wg_lookup_session_by_index(wg, wgmd->wgmd_receiver, &psref);
2545	if (wgs == NULL) {
2546		WG_TRACE("No session found");
2547		m_freem(m);
2548		return;
2549	}
2550
2551	/*
2552	 * We are only ready to handle data when in INIT_PASSIVE,
2553	 * ESTABLISHED, or DESTROYING.  All transitions out of that
2554	 * state dissociate the session index and drain psrefs.
2555	 */
2556	state = atomic_load_relaxed(&wgs->wgs_state);
2557	switch (state) {
2558	case WGS_STATE_UNKNOWN:
2559		panic("wg session %p in unknown state has session index %u",
2560		    wgs, wgmd->wgmd_receiver);
2561	case WGS_STATE_INIT_ACTIVE:
2562		WG_TRACE("not yet ready for data");
2563		goto out;
2564	case WGS_STATE_INIT_PASSIVE:
2565	case WGS_STATE_ESTABLISHED:
2566	case WGS_STATE_DESTROYING:
2567		break;
2568	}
2569
2570	/*
2571	 * Get the peer, for rate-limited logs (XXX MPSAFE, dtrace) and
2572	 * to update the endpoint if authentication succeeds.
2573	 */
2574	wgp = wgs->wgs_peer;
2575
2576	/*
2577	 * Reject outrageously wrong sequence numbers before doing any
2578	 * crypto work or taking any locks.
2579	 */
2580	error = sliwin_check_fast(&wgs->wgs_recvwin->window,
2581	    le64toh(wgmd->wgmd_counter));
2582	if (error) {
2583		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2584		    "%s: peer %s: out-of-window packet: %"PRIu64"\n",
2585		    if_name(&wg->wg_if), wgp->wgp_name,
2586		    le64toh(wgmd->wgmd_counter));
2587		goto out;
2588	}
2589
2590	/* Ensure the payload and authenticator are contiguous.  */
2591	mlen = m_length(m);
2592	encrypted_len = mlen - sizeof(*wgmd);
2593	if (encrypted_len < WG_AUTHTAG_LEN) {
2594		WG_DLOG("Short encrypted_len: %lu\n", encrypted_len);
2595		goto out;
2596	}
2597	success = m_ensure_contig(&m, sizeof(*wgmd) + encrypted_len);
2598	if (success) {
2599		encrypted_buf = mtod(m, char *) + sizeof(*wgmd);
2600	} else {
2601		encrypted_buf = kmem_intr_alloc(encrypted_len, KM_NOSLEEP);
2602		if (encrypted_buf == NULL) {
2603			WG_DLOG("failed to allocate encrypted_buf\n");
2604			goto out;
2605		}
2606		m_copydata(m, sizeof(*wgmd), encrypted_len, encrypted_buf);
2607		free_encrypted_buf = true;
2608	}
2609	/* m_ensure_contig may change m regardless of its result */
2610	KASSERT(m->m_len >= sizeof(*wgmd));
2611	wgmd = mtod(m, struct wg_msg_data *);
2612
2613	/*
2614	 * Get a buffer for the plaintext.  Add WG_AUTHTAG_LEN to avoid
2615	 * a zero-length buffer (XXX).  Drop if plaintext is longer
2616	 * than MCLBYTES (XXX).
2617	 */
2618	decrypted_len = encrypted_len - WG_AUTHTAG_LEN;
2619	if (decrypted_len > MCLBYTES) {
2620		/* FIXME handle larger data than MCLBYTES */
2621		WG_DLOG("couldn't handle larger data than MCLBYTES\n");
2622		goto out;
2623	}
2624	n = wg_get_mbuf(0, decrypted_len + WG_AUTHTAG_LEN);
2625	if (n == NULL) {
2626		WG_DLOG("wg_get_mbuf failed\n");
2627		goto out;
2628	}
2629	decrypted_buf = mtod(n, char *);
2630
2631	/* Decrypt and verify the packet.  */
2632	WG_DLOG("mlen=%lu, encrypted_len=%lu\n", mlen, encrypted_len);
2633	error = wg_algo_aead_dec(decrypted_buf,
2634	    encrypted_len - WG_AUTHTAG_LEN /* can be 0 */,
2635	    wgs->wgs_tkey_recv, le64toh(wgmd->wgmd_counter), encrypted_buf,
2636	    encrypted_len, NULL, 0);
2637	if (error != 0) {
2638		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2639		    "%s: peer %s: failed to wg_algo_aead_dec\n",
2640		    if_name(&wg->wg_if), wgp->wgp_name);
2641		m_freem(n);
2642		goto out;
2643	}
2644	WG_DLOG("outsize=%u\n", (u_int)decrypted_len);
2645
2646	/* Packet is genuine.  Reject it if a replay or just too old.  */
2647	mutex_enter(&wgs->wgs_recvwin->lock);
2648	error = sliwin_update(&wgs->wgs_recvwin->window,
2649	    le64toh(wgmd->wgmd_counter));
2650	mutex_exit(&wgs->wgs_recvwin->lock);
2651	if (error) {
2652		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2653		    "%s: peer %s: replay or out-of-window packet: %"PRIu64"\n",
2654		    if_name(&wg->wg_if), wgp->wgp_name,
2655		    le64toh(wgmd->wgmd_counter));
2656		m_freem(n);
2657		goto out;
2658	}
2659
2660	/* We're done with m now; free it and chuck the pointers.  */
2661	m_freem(m);
2662	m = NULL;
2663	wgmd = NULL;
2664
2665	/*
2666	 * Validate the encapsulated packet header and get the address
2667	 * family, or drop.
2668	 */
2669	ok = wg_validate_inner_packet(decrypted_buf, decrypted_len, &af);
2670	if (!ok) {
2671		m_freem(n);
2672		goto out;
2673	}
2674
2675	/*
2676	 * The packet is genuine.  Update the peer's endpoint if the
2677	 * source address changed.
2678	 *
2679	 * XXX How to prevent DoS by replaying genuine packets from the
2680	 * wrong source address?
2681	 */
2682	wg_update_endpoint_if_necessary(wgp, src);
2683
2684	/* Submit it into our network stack if routable.  */
2685	ok = wg_validate_route(wg, wgp, af, decrypted_buf);
2686	if (ok) {
2687		wg->wg_ops->input(&wg->wg_if, n, af);
2688	} else {
2689		char addrstr[INET6_ADDRSTRLEN];
2690		memset(addrstr, 0, sizeof(addrstr));
2691		if (af == AF_INET) {
2692			const struct ip *ip = (const struct ip *)decrypted_buf;
2693			IN_PRINT(addrstr, &ip->ip_src);
2694#ifdef INET6
2695		} else if (af == AF_INET6) {
2696			const struct ip6_hdr *ip6 =
2697			    (const struct ip6_hdr *)decrypted_buf;
2698			IN6_PRINT(addrstr, &ip6->ip6_src);
2699#endif
2700		}
2701		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2702		    "%s: peer %s: invalid source address (%s)\n",
2703		    if_name(&wg->wg_if), wgp->wgp_name, addrstr);
2704		m_freem(n);
2705		/*
2706		 * The inner address is invalid however the session is valid
2707		 * so continue the session processing below.
2708		 */
2709	}
2710	n = NULL;
2711
2712	/* Update the state machine if necessary.  */
2713	if (__predict_false(state == WGS_STATE_INIT_PASSIVE)) {
2714		/*
2715		 * We were waiting for the initiator to send their
2716		 * first data transport message, and that has happened.
2717		 * Schedule a task to establish this session.
2718		 */
2719		wg_schedule_peer_task(wgp, WGP_TASK_ESTABLISH_SESSION);
2720	} else {
2721		if (__predict_false(wg_need_to_send_init_message(wgs))) {
2722			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
2723		}
2724		/*
2725		 * [W] 6.5 Passive Keepalive
2726		 * "If a peer has received a validly-authenticated transport
2727		 *  data message (section 5.4.6), but does not have any packets
2728		 *  itself to send back for KEEPALIVE-TIMEOUT seconds, it sends
2729		 *  a keepalive message."
2730		 */
2731		WG_DLOG("time_uptime=%ju wgs_time_last_data_sent=%ju\n",
2732		    (uintmax_t)time_uptime,
2733		    (uintmax_t)wgs->wgs_time_last_data_sent);
2734		if ((time_uptime - wgs->wgs_time_last_data_sent) >=
2735		    wg_keepalive_timeout) {
2736			WG_TRACE("Schedule sending keepalive message");
2737			/*
2738			 * We can't send a keepalive message here to avoid
2739			 * a deadlock;  we already hold the solock of a socket
2740			 * that is used to send the message.
2741			 */
2742			wg_schedule_peer_task(wgp,
2743			    WGP_TASK_SEND_KEEPALIVE_MESSAGE);
2744		}
2745	}
2746out:
2747	wg_put_session(wgs, &psref);
2748	if (m != NULL)
2749		m_freem(m);
2750	if (free_encrypted_buf)
2751		kmem_intr_free(encrypted_buf, encrypted_len);
2752}
2753
2754static void __noinline
2755wg_handle_msg_cookie(struct wg_softc *wg, const struct wg_msg_cookie *wgmc)
2756{
2757	struct wg_session *wgs;
2758	struct wg_peer *wgp;
2759	struct psref psref;
2760	int error;
2761	uint8_t key[WG_HASH_LEN];
2762	uint8_t cookie[WG_COOKIE_LEN];
2763
2764	WG_TRACE("cookie msg received");
2765
2766	/* Find the putative session.  */
2767	wgs = wg_lookup_session_by_index(wg, wgmc->wgmc_receiver, &psref);
2768	if (wgs == NULL) {
2769		WG_TRACE("No session found");
2770		return;
2771	}
2772
2773	/* Lock the peer so we can update the cookie state.  */
2774	wgp = wgs->wgs_peer;
2775	mutex_enter(wgp->wgp_lock);
2776
2777	if (!wgp->wgp_last_sent_mac1_valid) {
2778		WG_TRACE("No valid mac1 sent (or expired)");
2779		goto out;
2780	}
2781
2782	/* Decrypt the cookie and store it for later handshake retry.  */
2783	wg_algo_mac_cookie(key, sizeof(key), wgp->wgp_pubkey,
2784	    sizeof(wgp->wgp_pubkey));
2785	error = wg_algo_xaead_dec(cookie, sizeof(cookie), key,
2786	    wgmc->wgmc_cookie, sizeof(wgmc->wgmc_cookie),
2787	    wgp->wgp_last_sent_mac1, sizeof(wgp->wgp_last_sent_mac1),
2788	    wgmc->wgmc_salt);
2789	if (error != 0) {
2790		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2791		    "%s: peer %s: wg_algo_aead_dec for cookie failed: "
2792		    "error=%d\n", if_name(&wg->wg_if), wgp->wgp_name, error);
2793		goto out;
2794	}
2795	/*
2796	 * [W] 6.6: Interaction with Cookie Reply System
2797	 * "it should simply store the decrypted cookie value from the cookie
2798	 *  reply message, and wait for the expiration of the REKEY-TIMEOUT
2799	 *  timer for retrying a handshake initiation message."
2800	 */
2801	wgp->wgp_latest_cookie_time = time_uptime;
2802	memcpy(wgp->wgp_latest_cookie, cookie, sizeof(wgp->wgp_latest_cookie));
2803out:
2804	mutex_exit(wgp->wgp_lock);
2805	wg_put_session(wgs, &psref);
2806}
2807
2808static struct mbuf *
2809wg_validate_msg_header(struct wg_softc *wg, struct mbuf *m)
2810{
2811	struct wg_msg wgm;
2812	size_t mbuflen;
2813	size_t msglen;
2814
2815	/*
2816	 * Get the mbuf chain length.  It is already guaranteed, by
2817	 * wg_overudp_cb, to be large enough for a struct wg_msg.
2818	 */
2819	mbuflen = m_length(m);
2820	KASSERT(mbuflen >= sizeof(struct wg_msg));
2821
2822	/*
2823	 * Copy the message header (32-bit message type) out -- we'll
2824	 * worry about contiguity and alignment later.
2825	 */
2826	m_copydata(m, 0, sizeof(wgm), &wgm);
2827	switch (le32toh(wgm.wgm_type)) {
2828	case WG_MSG_TYPE_INIT:
2829		msglen = sizeof(struct wg_msg_init);
2830		break;
2831	case WG_MSG_TYPE_RESP:
2832		msglen = sizeof(struct wg_msg_resp);
2833		break;
2834	case WG_MSG_TYPE_COOKIE:
2835		msglen = sizeof(struct wg_msg_cookie);
2836		break;
2837	case WG_MSG_TYPE_DATA:
2838		msglen = sizeof(struct wg_msg_data);
2839		break;
2840	default:
2841		WG_LOG_RATECHECK(&wg->wg_ppsratecheck, LOG_DEBUG,
2842		    "%s: Unexpected msg type: %u\n", if_name(&wg->wg_if),
2843		    le32toh(wgm.wgm_type));
2844		goto error;
2845	}
2846
2847	/* Verify the mbuf chain is long enough for this type of message.  */
2848	if (__predict_false(mbuflen < msglen)) {
2849		WG_DLOG("Invalid msg size: mbuflen=%lu type=%u\n", mbuflen,
2850		    le32toh(wgm.wgm_type));
2851		goto error;
2852	}
2853
2854	/* Make the message header contiguous if necessary.  */
2855	if (__predict_false(m->m_len < msglen)) {
2856		m = m_pullup(m, msglen);
2857		if (m == NULL)
2858			return NULL;
2859	}
2860
2861	return m;
2862
2863error:
2864	m_freem(m);
2865	return NULL;
2866}
2867
2868static void
2869wg_handle_packet(struct wg_softc *wg, struct mbuf *m,
2870    const struct sockaddr *src)
2871{
2872	struct wg_msg *wgm;
2873
2874	KASSERT(curlwp->l_pflag & LP_BOUND);
2875
2876	m = wg_validate_msg_header(wg, m);
2877	if (__predict_false(m == NULL))
2878		return;
2879
2880	KASSERT(m->m_len >= sizeof(struct wg_msg));
2881	wgm = mtod(m, struct wg_msg *);
2882	switch (le32toh(wgm->wgm_type)) {
2883	case WG_MSG_TYPE_INIT:
2884		wg_handle_msg_init(wg, (struct wg_msg_init *)wgm, src);
2885		break;
2886	case WG_MSG_TYPE_RESP:
2887		wg_handle_msg_resp(wg, (struct wg_msg_resp *)wgm, src);
2888		break;
2889	case WG_MSG_TYPE_COOKIE:
2890		wg_handle_msg_cookie(wg, (struct wg_msg_cookie *)wgm);
2891		break;
2892	case WG_MSG_TYPE_DATA:
2893		wg_handle_msg_data(wg, m, src);
2894		/* wg_handle_msg_data frees m for us */
2895		return;
2896	default:
2897		panic("invalid message type: %d", le32toh(wgm->wgm_type));
2898	}
2899
2900	m_freem(m);
2901}
2902
2903static void
2904wg_receive_packets(struct wg_softc *wg, const int af)
2905{
2906
2907	for (;;) {
2908		int error, flags;
2909		struct socket *so;
2910		struct mbuf *m = NULL;
2911		struct uio dummy_uio;
2912		struct mbuf *paddr = NULL;
2913		struct sockaddr *src;
2914
2915		so = wg_get_so_by_af(wg, af);
2916		flags = MSG_DONTWAIT;
2917		dummy_uio.uio_resid = 1000000000;
2918
2919		error = so->so_receive(so, &paddr, &dummy_uio, &m, NULL,
2920		    &flags);
2921		if (error || m == NULL) {
2922			//if (error == EWOULDBLOCK)
2923			return;
2924		}
2925
2926		KASSERT(paddr != NULL);
2927		KASSERT(paddr->m_len >= sizeof(struct sockaddr));
2928		src = mtod(paddr, struct sockaddr *);
2929
2930		wg_handle_packet(wg, m, src);
2931	}
2932}
2933
2934static void
2935wg_get_peer(struct wg_peer *wgp, struct psref *psref)
2936{
2937
2938	psref_acquire(psref, &wgp->wgp_psref, wg_psref_class);
2939}
2940
2941static void
2942wg_put_peer(struct wg_peer *wgp, struct psref *psref)
2943{
2944
2945	psref_release(psref, &wgp->wgp_psref, wg_psref_class);
2946}
2947
2948static void
2949wg_task_send_init_message(struct wg_softc *wg, struct wg_peer *wgp)
2950{
2951	struct wg_session *wgs;
2952
2953	WG_TRACE("WGP_TASK_SEND_INIT_MESSAGE");
2954
2955	KASSERT(mutex_owned(wgp->wgp_lock));
2956
2957	if (!atomic_load_acquire(&wgp->wgp_endpoint_available)) {
2958		WGLOG(LOG_DEBUG, "%s: No endpoint available\n",
2959		    if_name(&wg->wg_if));
2960		/* XXX should do something? */
2961		return;
2962	}
2963
2964	wgs = wgp->wgp_session_stable;
2965	if (wgs->wgs_state == WGS_STATE_UNKNOWN) {
2966		/* XXX What if the unstable session is already INIT_ACTIVE?  */
2967		wg_send_handshake_msg_init(wg, wgp);
2968	} else {
2969		/* rekey */
2970		wgs = wgp->wgp_session_unstable;
2971		if (wgs->wgs_state != WGS_STATE_INIT_ACTIVE)
2972			wg_send_handshake_msg_init(wg, wgp);
2973	}
2974}
2975
2976static void
2977wg_task_retry_handshake(struct wg_softc *wg, struct wg_peer *wgp)
2978{
2979	struct wg_session *wgs;
2980
2981	WG_TRACE("WGP_TASK_RETRY_HANDSHAKE");
2982
2983	KASSERT(mutex_owned(wgp->wgp_lock));
2984	KASSERT(wgp->wgp_handshake_start_time != 0);
2985
2986	wgs = wgp->wgp_session_unstable;
2987	if (wgs->wgs_state != WGS_STATE_INIT_ACTIVE)
2988		return;
2989
2990	/*
2991	 * XXX no real need to assign a new index here, but we do need
2992	 * to transition to UNKNOWN temporarily
2993	 */
2994	wg_put_session_index(wg, wgs);
2995
2996	/* [W] 6.4 Handshake Initiation Retransmission */
2997	if ((time_uptime - wgp->wgp_handshake_start_time) >
2998	    wg_rekey_attempt_time) {
2999		/* Give up handshaking */
3000		wgp->wgp_handshake_start_time = 0;
3001		WG_TRACE("give up");
3002
3003		/*
3004		 * If a new data packet comes, handshaking will be retried
3005		 * and a new session would be established at that time,
3006		 * however we don't want to send pending packets then.
3007		 */
3008		wg_purge_pending_packets(wgp);
3009		return;
3010	}
3011
3012	wg_task_send_init_message(wg, wgp);
3013}
3014
3015static void
3016wg_task_establish_session(struct wg_softc *wg, struct wg_peer *wgp)
3017{
3018	struct wg_session *wgs, *wgs_prev;
3019	struct mbuf *m;
3020
3021	KASSERT(mutex_owned(wgp->wgp_lock));
3022
3023	wgs = wgp->wgp_session_unstable;
3024	if (wgs->wgs_state != WGS_STATE_INIT_PASSIVE)
3025		/* XXX Can this happen?  */
3026		return;
3027
3028	wgs->wgs_state = WGS_STATE_ESTABLISHED;
3029	wgs->wgs_time_established = time_uptime;
3030	wgs->wgs_time_last_data_sent = 0;
3031	wgs->wgs_is_initiator = false;
3032	WG_TRACE("WGS_STATE_ESTABLISHED");
3033
3034	wg_swap_sessions(wgp);
3035	KASSERT(wgs == wgp->wgp_session_stable);
3036	wgs_prev = wgp->wgp_session_unstable;
3037	getnanotime(&wgp->wgp_last_handshake_time);
3038	wgp->wgp_handshake_start_time = 0;
3039	wgp->wgp_last_sent_mac1_valid = false;
3040	wgp->wgp_last_sent_cookie_valid = false;
3041
3042	/* If we had a data packet queued up, send it.  */
3043	if ((m = atomic_swap_ptr(&wgp->wgp_pending, NULL)) != NULL) {
3044		kpreempt_disable();
3045		const uint32_t h = curcpu()->ci_index; // pktq_rps_hash(m)
3046		M_SETCTX(m, wgp);
3047		if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
3048			WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
3049			    if_name(&wg->wg_if));
3050			m_freem(m);
3051		}
3052		kpreempt_enable();
3053	}
3054
3055	if (wgs_prev->wgs_state == WGS_STATE_ESTABLISHED) {
3056		/* Wait for wg_get_stable_session to drain.  */
3057		pserialize_perform(wgp->wgp_psz);
3058
3059		/* Transition ESTABLISHED->DESTROYING.  */
3060		wgs_prev->wgs_state = WGS_STATE_DESTROYING;
3061
3062		/* We can't destroy the old session immediately */
3063		wg_schedule_session_dtor_timer(wgp);
3064	} else {
3065		KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN,
3066		    "state=%d", wgs_prev->wgs_state);
3067		wg_clear_states(wgs_prev);
3068		wgs_prev->wgs_state = WGS_STATE_UNKNOWN;
3069	}
3070}
3071
3072static void
3073wg_task_endpoint_changed(struct wg_softc *wg, struct wg_peer *wgp)
3074{
3075
3076	WG_TRACE("WGP_TASK_ENDPOINT_CHANGED");
3077
3078	KASSERT(mutex_owned(wgp->wgp_lock));
3079
3080	if (atomic_load_relaxed(&wgp->wgp_endpoint_changing)) {
3081		pserialize_perform(wgp->wgp_psz);
3082		mutex_exit(wgp->wgp_lock);
3083		psref_target_destroy(&wgp->wgp_endpoint0->wgsa_psref,
3084		    wg_psref_class);
3085		psref_target_init(&wgp->wgp_endpoint0->wgsa_psref,
3086		    wg_psref_class);
3087		mutex_enter(wgp->wgp_lock);
3088		atomic_store_release(&wgp->wgp_endpoint_changing, 0);
3089	}
3090}
3091
3092static void
3093wg_task_send_keepalive_message(struct wg_softc *wg, struct wg_peer *wgp)
3094{
3095	struct wg_session *wgs;
3096
3097	WG_TRACE("WGP_TASK_SEND_KEEPALIVE_MESSAGE");
3098
3099	KASSERT(mutex_owned(wgp->wgp_lock));
3100
3101	wgs = wgp->wgp_session_stable;
3102	if (wgs->wgs_state != WGS_STATE_ESTABLISHED)
3103		return;
3104
3105	wg_send_keepalive_msg(wgp, wgs);
3106}
3107
3108static void
3109wg_task_destroy_prev_session(struct wg_softc *wg, struct wg_peer *wgp)
3110{
3111	struct wg_session *wgs;
3112
3113	WG_TRACE("WGP_TASK_DESTROY_PREV_SESSION");
3114
3115	KASSERT(mutex_owned(wgp->wgp_lock));
3116
3117	wgs = wgp->wgp_session_unstable;
3118	if (wgs->wgs_state == WGS_STATE_DESTROYING) {
3119		wg_put_session_index(wg, wgs);
3120	}
3121}
3122
3123static void
3124wg_peer_work(struct work *wk, void *cookie)
3125{
3126	struct wg_peer *wgp = container_of(wk, struct wg_peer, wgp_work);
3127	struct wg_softc *wg = wgp->wgp_sc;
3128	unsigned int tasks;
3129
3130	mutex_enter(wgp->wgp_intr_lock);
3131	while ((tasks = wgp->wgp_tasks) != 0) {
3132		wgp->wgp_tasks = 0;
3133		mutex_exit(wgp->wgp_intr_lock);
3134
3135		mutex_enter(wgp->wgp_lock);
3136		if (ISSET(tasks, WGP_TASK_SEND_INIT_MESSAGE))
3137			wg_task_send_init_message(wg, wgp);
3138		if (ISSET(tasks, WGP_TASK_RETRY_HANDSHAKE))
3139			wg_task_retry_handshake(wg, wgp);
3140		if (ISSET(tasks, WGP_TASK_ESTABLISH_SESSION))
3141			wg_task_establish_session(wg, wgp);
3142		if (ISSET(tasks, WGP_TASK_ENDPOINT_CHANGED))
3143			wg_task_endpoint_changed(wg, wgp);
3144		if (ISSET(tasks, WGP_TASK_SEND_KEEPALIVE_MESSAGE))
3145			wg_task_send_keepalive_message(wg, wgp);
3146		if (ISSET(tasks, WGP_TASK_DESTROY_PREV_SESSION))
3147			wg_task_destroy_prev_session(wg, wgp);
3148		mutex_exit(wgp->wgp_lock);
3149
3150		mutex_enter(wgp->wgp_intr_lock);
3151	}
3152	mutex_exit(wgp->wgp_intr_lock);
3153}
3154
3155static void
3156wg_job(struct threadpool_job *job)
3157{
3158	struct wg_softc *wg = container_of(job, struct wg_softc, wg_job);
3159	int bound, upcalls;
3160
3161	mutex_enter(wg->wg_intr_lock);
3162	while ((upcalls = wg->wg_upcalls) != 0) {
3163		wg->wg_upcalls = 0;
3164		mutex_exit(wg->wg_intr_lock);
3165		bound = curlwp_bind();
3166		if (ISSET(upcalls, WG_UPCALL_INET))
3167			wg_receive_packets(wg, AF_INET);
3168		if (ISSET(upcalls, WG_UPCALL_INET6))
3169			wg_receive_packets(wg, AF_INET6);
3170		curlwp_bindx(bound);
3171		mutex_enter(wg->wg_intr_lock);
3172	}
3173	threadpool_job_done(job);
3174	mutex_exit(wg->wg_intr_lock);
3175}
3176
3177static int
3178wg_bind_port(struct wg_softc *wg, const uint16_t port)
3179{
3180	int error;
3181	uint16_t old_port = wg->wg_listen_port;
3182
3183	if (port != 0 && old_port == port)
3184		return 0;
3185
3186	struct sockaddr_in _sin, *sin = &_sin;
3187	sin->sin_len = sizeof(*sin);
3188	sin->sin_family = AF_INET;
3189	sin->sin_addr.s_addr = INADDR_ANY;
3190	sin->sin_port = htons(port);
3191
3192	error = sobind(wg->wg_so4, sintosa(sin), curlwp);
3193	if (error != 0)
3194		return error;
3195
3196#ifdef INET6
3197	struct sockaddr_in6 _sin6, *sin6 = &_sin6;
3198	sin6->sin6_len = sizeof(*sin6);
3199	sin6->sin6_family = AF_INET6;
3200	sin6->sin6_addr = in6addr_any;
3201	sin6->sin6_port = htons(port);
3202
3203	error = sobind(wg->wg_so6, sin6tosa(sin6), curlwp);
3204	if (error != 0)
3205		return error;
3206#endif
3207
3208	wg->wg_listen_port = port;
3209
3210	return 0;
3211}
3212
3213static void
3214wg_so_upcall(struct socket *so, void *cookie, int events, int waitflag)
3215{
3216	struct wg_softc *wg = cookie;
3217	int reason;
3218
3219	reason = (so->so_proto->pr_domain->dom_family == AF_INET) ?
3220	    WG_UPCALL_INET :
3221	    WG_UPCALL_INET6;
3222
3223	mutex_enter(wg->wg_intr_lock);
3224	wg->wg_upcalls |= reason;
3225	threadpool_schedule_job(wg->wg_threadpool, &wg->wg_job);
3226	mutex_exit(wg->wg_intr_lock);
3227}
3228
3229static int
3230wg_overudp_cb(struct mbuf **mp, int offset, struct socket *so,
3231    struct sockaddr *src, void *arg)
3232{
3233	struct wg_softc *wg = arg;
3234	struct wg_msg wgm;
3235	struct mbuf *m = *mp;
3236
3237	WG_TRACE("enter");
3238
3239	/* Verify the mbuf chain is long enough to have a wg msg header.  */
3240	KASSERT(offset <= m_length(m));
3241	if (__predict_false(m_length(m) - offset < sizeof(struct wg_msg))) {
3242		/* drop on the floor */
3243		m_freem(m);
3244		return -1;
3245	}
3246
3247	/*
3248	 * Copy the message header (32-bit message type) out -- we'll
3249	 * worry about contiguity and alignment later.
3250	 */
3251	m_copydata(m, offset, sizeof(struct wg_msg), &wgm);
3252	WG_DLOG("type=%d\n", le32toh(wgm.wgm_type));
3253
3254	/*
3255	 * Handle DATA packets promptly as they arrive.  Other packets
3256	 * may require expensive public-key crypto and are not as
3257	 * sensitive to latency, so defer them to the worker thread.
3258	 */
3259	switch (le32toh(wgm.wgm_type)) {
3260	case WG_MSG_TYPE_DATA:
3261		/* handle immediately */
3262		m_adj(m, offset);
3263		if (__predict_false(m->m_len < sizeof(struct wg_msg_data))) {
3264			m = m_pullup(m, sizeof(struct wg_msg_data));
3265			if (m == NULL)
3266				return -1;
3267		}
3268		wg_handle_msg_data(wg, m, src);
3269		*mp = NULL;
3270		return 1;
3271	case WG_MSG_TYPE_INIT:
3272	case WG_MSG_TYPE_RESP:
3273	case WG_MSG_TYPE_COOKIE:
3274		/* pass through to so_receive in wg_receive_packets */
3275		return 0;
3276	default:
3277		/* drop on the floor */
3278		m_freem(m);
3279		return -1;
3280	}
3281}
3282
3283static int
3284wg_socreate(struct wg_softc *wg, int af, struct socket **sop)
3285{
3286	int error;
3287	struct socket *so;
3288
3289	error = socreate(af, &so, SOCK_DGRAM, 0, curlwp, NULL);
3290	if (error != 0)
3291		return error;
3292
3293	solock(so);
3294	so->so_upcallarg = wg;
3295	so->so_upcall = wg_so_upcall;
3296	so->so_rcv.sb_flags |= SB_UPCALL;
3297	inpcb_register_overudp_cb(sotoinpcb(so), wg_overudp_cb, wg);
3298	sounlock(so);
3299
3300	*sop = so;
3301
3302	return 0;
3303}
3304
3305static bool
3306wg_session_hit_limits(struct wg_session *wgs)
3307{
3308
3309	/*
3310	 * [W] 6.2: Transport Message Limits
3311	 * "After REJECT-AFTER-MESSAGES transport data messages or after the
3312	 *  current secure session is REJECT-AFTER-TIME seconds old, whichever
3313	 *  comes first, WireGuard will refuse to send any more transport data
3314	 *  messages using the current secure session, ..."
3315	 */
3316	KASSERT(wgs->wgs_time_established != 0);
3317	if ((time_uptime - wgs->wgs_time_established) > wg_reject_after_time) {
3318		WG_DLOG("The session hits REJECT_AFTER_TIME\n");
3319		return true;
3320	} else if (wg_session_get_send_counter(wgs) >
3321	    wg_reject_after_messages) {
3322		WG_DLOG("The session hits REJECT_AFTER_MESSAGES\n");
3323		return true;
3324	}
3325
3326	return false;
3327}
3328
3329static void
3330wgintr(void *cookie)
3331{
3332	struct wg_peer *wgp;
3333	struct wg_session *wgs;
3334	struct mbuf *m;
3335	struct psref psref;
3336
3337	while ((m = pktq_dequeue(wg_pktq)) != NULL) {
3338		wgp = M_GETCTX(m, struct wg_peer *);
3339		if ((wgs = wg_get_stable_session(wgp, &psref)) == NULL) {
3340			WG_TRACE("no stable session");
3341			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
3342			goto next0;
3343		}
3344		if (__predict_false(wg_session_hit_limits(wgs))) {
3345			WG_TRACE("stable session hit limits");
3346			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
3347			goto next1;
3348		}
3349		wg_send_data_msg(wgp, wgs, m);
3350		m = NULL;	/* consumed */
3351next1:		wg_put_session(wgs, &psref);
3352next0:		if (m)
3353			m_freem(m);
3354		/* XXX Yield to avoid userland starvation?  */
3355	}
3356}
3357
3358static void
3359wg_rekey_timer(void *arg)
3360{
3361	struct wg_peer *wgp = arg;
3362
3363	wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
3364}
3365
3366static void
3367wg_purge_pending_packets(struct wg_peer *wgp)
3368{
3369	struct mbuf *m;
3370
3371	if ((m = atomic_swap_ptr(&wgp->wgp_pending, NULL)) != NULL)
3372		m_freem(m);
3373	pktq_barrier(wg_pktq);
3374}
3375
3376static void
3377wg_handshake_timeout_timer(void *arg)
3378{
3379	struct wg_peer *wgp = arg;
3380
3381	WG_TRACE("enter");
3382
3383	wg_schedule_peer_task(wgp, WGP_TASK_RETRY_HANDSHAKE);
3384}
3385
3386static struct wg_peer *
3387wg_alloc_peer(struct wg_softc *wg)
3388{
3389	struct wg_peer *wgp;
3390
3391	wgp = kmem_zalloc(sizeof(*wgp), KM_SLEEP);
3392
3393	wgp->wgp_sc = wg;
3394	callout_init(&wgp->wgp_rekey_timer, CALLOUT_MPSAFE);
3395	callout_setfunc(&wgp->wgp_rekey_timer, wg_rekey_timer, wgp);
3396	callout_init(&wgp->wgp_handshake_timeout_timer, CALLOUT_MPSAFE);
3397	callout_setfunc(&wgp->wgp_handshake_timeout_timer,
3398	    wg_handshake_timeout_timer, wgp);
3399	callout_init(&wgp->wgp_session_dtor_timer, CALLOUT_MPSAFE);
3400	callout_setfunc(&wgp->wgp_session_dtor_timer,
3401	    wg_session_dtor_timer, wgp);
3402	PSLIST_ENTRY_INIT(wgp, wgp_peerlist_entry);
3403	wgp->wgp_endpoint_changing = false;
3404	wgp->wgp_endpoint_available = false;
3405	wgp->wgp_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
3406	wgp->wgp_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
3407	wgp->wgp_psz = pserialize_create();
3408	psref_target_init(&wgp->wgp_psref, wg_psref_class);
3409
3410	wgp->wgp_endpoint = kmem_zalloc(sizeof(*wgp->wgp_endpoint), KM_SLEEP);
3411	wgp->wgp_endpoint0 = kmem_zalloc(sizeof(*wgp->wgp_endpoint0), KM_SLEEP);
3412	psref_target_init(&wgp->wgp_endpoint->wgsa_psref, wg_psref_class);
3413	psref_target_init(&wgp->wgp_endpoint0->wgsa_psref, wg_psref_class);
3414
3415	struct wg_session *wgs;
3416	wgp->wgp_session_stable =
3417	    kmem_zalloc(sizeof(*wgp->wgp_session_stable), KM_SLEEP);
3418	wgp->wgp_session_unstable =
3419	    kmem_zalloc(sizeof(*wgp->wgp_session_unstable), KM_SLEEP);
3420	wgs = wgp->wgp_session_stable;
3421	wgs->wgs_peer = wgp;
3422	wgs->wgs_state = WGS_STATE_UNKNOWN;
3423	psref_target_init(&wgs->wgs_psref, wg_psref_class);
3424#ifndef __HAVE_ATOMIC64_LOADSTORE
3425	mutex_init(&wgs->wgs_send_counter_lock, MUTEX_DEFAULT, IPL_SOFTNET);
3426#endif
3427	wgs->wgs_recvwin = kmem_zalloc(sizeof(*wgs->wgs_recvwin), KM_SLEEP);
3428	mutex_init(&wgs->wgs_recvwin->lock, MUTEX_DEFAULT, IPL_SOFTNET);
3429
3430	wgs = wgp->wgp_session_unstable;
3431	wgs->wgs_peer = wgp;
3432	wgs->wgs_state = WGS_STATE_UNKNOWN;
3433	psref_target_init(&wgs->wgs_psref, wg_psref_class);
3434#ifndef __HAVE_ATOMIC64_LOADSTORE
3435	mutex_init(&wgs->wgs_send_counter_lock, MUTEX_DEFAULT, IPL_SOFTNET);
3436#endif
3437	wgs->wgs_recvwin = kmem_zalloc(sizeof(*wgs->wgs_recvwin), KM_SLEEP);
3438	mutex_init(&wgs->wgs_recvwin->lock, MUTEX_DEFAULT, IPL_SOFTNET);
3439
3440	return wgp;
3441}
3442
3443static void
3444wg_destroy_peer(struct wg_peer *wgp)
3445{
3446	struct wg_session *wgs;
3447	struct wg_softc *wg = wgp->wgp_sc;
3448
3449	/* Prevent new packets from this peer on any source address.  */
3450	rw_enter(wg->wg_rwlock, RW_WRITER);
3451	for (int i = 0; i < wgp->wgp_n_allowedips; i++) {
3452		struct wg_allowedip *wga = &wgp->wgp_allowedips[i];
3453		struct radix_node_head *rnh = wg_rnh(wg, wga->wga_family);
3454		struct radix_node *rn;
3455
3456		KASSERT(rnh != NULL);
3457		rn = rnh->rnh_deladdr(&wga->wga_sa_addr,
3458		    &wga->wga_sa_mask, rnh);
3459		if (rn == NULL) {
3460			char addrstr[128];
3461			sockaddr_format(&wga->wga_sa_addr, addrstr,
3462			    sizeof(addrstr));
3463			WGLOG(LOG_WARNING, "%s: Couldn't delete %s",
3464			    if_name(&wg->wg_if), addrstr);
3465		}
3466	}
3467	rw_exit(wg->wg_rwlock);
3468
3469	/* Purge pending packets.  */
3470	wg_purge_pending_packets(wgp);
3471
3472	/* Halt all packet processing and timeouts.  */
3473	callout_halt(&wgp->wgp_rekey_timer, NULL);
3474	callout_halt(&wgp->wgp_handshake_timeout_timer, NULL);
3475	callout_halt(&wgp->wgp_session_dtor_timer, NULL);
3476
3477	/* Wait for any queued work to complete.  */
3478	workqueue_wait(wg_wq, &wgp->wgp_work);
3479
3480	wgs = wgp->wgp_session_unstable;
3481	if (wgs->wgs_state != WGS_STATE_UNKNOWN) {
3482		mutex_enter(wgp->wgp_lock);
3483		wg_destroy_session(wg, wgs);
3484		mutex_exit(wgp->wgp_lock);
3485	}
3486	mutex_destroy(&wgs->wgs_recvwin->lock);
3487	kmem_free(wgs->wgs_recvwin, sizeof(*wgs->wgs_recvwin));
3488#ifndef __HAVE_ATOMIC64_LOADSTORE
3489	mutex_destroy(&wgs->wgs_send_counter_lock);
3490#endif
3491	kmem_free(wgs, sizeof(*wgs));
3492
3493	wgs = wgp->wgp_session_stable;
3494	if (wgs->wgs_state != WGS_STATE_UNKNOWN) {
3495		mutex_enter(wgp->wgp_lock);
3496		wg_destroy_session(wg, wgs);
3497		mutex_exit(wgp->wgp_lock);
3498	}
3499	mutex_destroy(&wgs->wgs_recvwin->lock);
3500	kmem_free(wgs->wgs_recvwin, sizeof(*wgs->wgs_recvwin));
3501#ifndef __HAVE_ATOMIC64_LOADSTORE
3502	mutex_destroy(&wgs->wgs_send_counter_lock);
3503#endif
3504	kmem_free(wgs, sizeof(*wgs));
3505
3506	psref_target_destroy(&wgp->wgp_endpoint->wgsa_psref, wg_psref_class);
3507	psref_target_destroy(&wgp->wgp_endpoint0->wgsa_psref, wg_psref_class);
3508	kmem_free(wgp->wgp_endpoint, sizeof(*wgp->wgp_endpoint));
3509	kmem_free(wgp->wgp_endpoint0, sizeof(*wgp->wgp_endpoint0));
3510
3511	pserialize_destroy(wgp->wgp_psz);
3512	mutex_obj_free(wgp->wgp_intr_lock);
3513	mutex_obj_free(wgp->wgp_lock);
3514
3515	kmem_free(wgp, sizeof(*wgp));
3516}
3517
3518static void
3519wg_destroy_all_peers(struct wg_softc *wg)
3520{
3521	struct wg_peer *wgp, *wgp0 __diagused;
3522	void *garbage_byname, *garbage_bypubkey;
3523
3524restart:
3525	garbage_byname = garbage_bypubkey = NULL;
3526	mutex_enter(wg->wg_lock);
3527	WG_PEER_WRITER_FOREACH(wgp, wg) {
3528		if (wgp->wgp_name[0]) {
3529			wgp0 = thmap_del(wg->wg_peers_byname, wgp->wgp_name,
3530			    strlen(wgp->wgp_name));
3531			KASSERT(wgp0 == wgp);
3532			garbage_byname = thmap_stage_gc(wg->wg_peers_byname);
3533		}
3534		wgp0 = thmap_del(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
3535		    sizeof(wgp->wgp_pubkey));
3536		KASSERT(wgp0 == wgp);
3537		garbage_bypubkey = thmap_stage_gc(wg->wg_peers_bypubkey);
3538		WG_PEER_WRITER_REMOVE(wgp);
3539		wg->wg_npeers--;
3540		mutex_enter(wgp->wgp_lock);
3541		pserialize_perform(wgp->wgp_psz);
3542		mutex_exit(wgp->wgp_lock);
3543		PSLIST_ENTRY_DESTROY(wgp, wgp_peerlist_entry);
3544		break;
3545	}
3546	mutex_exit(wg->wg_lock);
3547
3548	if (wgp == NULL)
3549		return;
3550
3551	psref_target_destroy(&wgp->wgp_psref, wg_psref_class);
3552
3553	wg_destroy_peer(wgp);
3554	thmap_gc(wg->wg_peers_byname, garbage_byname);
3555	thmap_gc(wg->wg_peers_bypubkey, garbage_bypubkey);
3556
3557	goto restart;
3558}
3559
3560static int
3561wg_destroy_peer_name(struct wg_softc *wg, const char *name)
3562{
3563	struct wg_peer *wgp, *wgp0 __diagused;
3564	void *garbage_byname, *garbage_bypubkey;
3565
3566	mutex_enter(wg->wg_lock);
3567	wgp = thmap_del(wg->wg_peers_byname, name, strlen(name));
3568	if (wgp != NULL) {
3569		wgp0 = thmap_del(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
3570		    sizeof(wgp->wgp_pubkey));
3571		KASSERT(wgp0 == wgp);
3572		garbage_byname = thmap_stage_gc(wg->wg_peers_byname);
3573		garbage_bypubkey = thmap_stage_gc(wg->wg_peers_bypubkey);
3574		WG_PEER_WRITER_REMOVE(wgp);
3575		wg->wg_npeers--;
3576		if (wg->wg_npeers == 0)
3577			if_link_state_change(&wg->wg_if, LINK_STATE_DOWN);
3578		mutex_enter(wgp->wgp_lock);
3579		pserialize_perform(wgp->wgp_psz);
3580		mutex_exit(wgp->wgp_lock);
3581		PSLIST_ENTRY_DESTROY(wgp, wgp_peerlist_entry);
3582	}
3583	mutex_exit(wg->wg_lock);
3584
3585	if (wgp == NULL)
3586		return ENOENT;
3587
3588	psref_target_destroy(&wgp->wgp_psref, wg_psref_class);
3589
3590	wg_destroy_peer(wgp);
3591	thmap_gc(wg->wg_peers_byname, garbage_byname);
3592	thmap_gc(wg->wg_peers_bypubkey, garbage_bypubkey);
3593
3594	return 0;
3595}
3596
3597static int
3598wg_if_attach(struct wg_softc *wg)
3599{
3600
3601	wg->wg_if.if_addrlen = 0;
3602	wg->wg_if.if_mtu = WG_MTU;
3603	wg->wg_if.if_flags = IFF_MULTICAST;
3604	wg->wg_if.if_extflags = IFEF_MPSAFE;
3605	wg->wg_if.if_ioctl = wg_ioctl;
3606	wg->wg_if.if_output = wg_output;
3607	wg->wg_if.if_init = wg_init;
3608#ifdef ALTQ
3609	wg->wg_if.if_start = wg_start;
3610#endif
3611	wg->wg_if.if_stop = wg_stop;
3612	wg->wg_if.if_type = IFT_OTHER;
3613	wg->wg_if.if_dlt = DLT_NULL;
3614	wg->wg_if.if_softc = wg;
3615#ifdef ALTQ
3616	IFQ_SET_READY(&wg->wg_if.if_snd);
3617#endif
3618	if_initialize(&wg->wg_if);
3619
3620	wg->wg_if.if_link_state = LINK_STATE_DOWN;
3621	if_alloc_sadl(&wg->wg_if);
3622	if_register(&wg->wg_if);
3623
3624	bpf_attach(&wg->wg_if, DLT_NULL, sizeof(uint32_t));
3625
3626	return 0;
3627}
3628
3629static void
3630wg_if_detach(struct wg_softc *wg)
3631{
3632	struct ifnet *ifp = &wg->wg_if;
3633
3634	bpf_detach(ifp);
3635	if_detach(ifp);
3636}
3637
3638static int
3639wg_clone_create(struct if_clone *ifc, int unit)
3640{
3641	struct wg_softc *wg;
3642	int error;
3643
3644	wg_guarantee_initialized();
3645
3646	error = wg_count_inc();
3647	if (error)
3648		return error;
3649
3650	wg = kmem_zalloc(sizeof(*wg), KM_SLEEP);
3651
3652	if_initname(&wg->wg_if, ifc->ifc_name, unit);
3653
3654	PSLIST_INIT(&wg->wg_peers);
3655	wg->wg_peers_bypubkey = thmap_create(0, NULL, THMAP_NOCOPY);
3656	wg->wg_peers_byname = thmap_create(0, NULL, THMAP_NOCOPY);
3657	wg->wg_sessions_byindex = thmap_create(0, NULL, THMAP_NOCOPY);
3658	wg->wg_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
3659	wg->wg_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
3660	wg->wg_rwlock = rw_obj_alloc();
3661	threadpool_job_init(&wg->wg_job, wg_job, wg->wg_intr_lock,
3662	    "%s", if_name(&wg->wg_if));
3663	wg->wg_ops = &wg_ops_rumpkernel;
3664
3665	error = threadpool_get(&wg->wg_threadpool, PRI_NONE);
3666	if (error)
3667		goto fail0;
3668
3669#ifdef INET
3670	error = wg_socreate(wg, AF_INET, &wg->wg_so4);
3671	if (error)
3672		goto fail1;
3673	rn_inithead((void **)&wg->wg_rtable_ipv4,
3674	    offsetof(struct sockaddr_in, sin_addr) * NBBY);
3675#endif
3676#ifdef INET6
3677	error = wg_socreate(wg, AF_INET6, &wg->wg_so6);
3678	if (error)
3679		goto fail2;
3680	rn_inithead((void **)&wg->wg_rtable_ipv6,
3681	    offsetof(struct sockaddr_in6, sin6_addr) * NBBY);
3682#endif
3683
3684	error = wg_if_attach(wg);
3685	if (error)
3686		goto fail3;
3687
3688	return 0;
3689
3690fail4: __unused
3691	wg_if_detach(wg);
3692fail3:	wg_destroy_all_peers(wg);
3693#ifdef INET6
3694	solock(wg->wg_so6);
3695	wg->wg_so6->so_rcv.sb_flags &= ~SB_UPCALL;
3696	sounlock(wg->wg_so6);
3697#endif
3698#ifdef INET
3699	solock(wg->wg_so4);
3700	wg->wg_so4->so_rcv.sb_flags &= ~SB_UPCALL;
3701	sounlock(wg->wg_so4);
3702#endif
3703	mutex_enter(wg->wg_intr_lock);
3704	threadpool_cancel_job(wg->wg_threadpool, &wg->wg_job);
3705	mutex_exit(wg->wg_intr_lock);
3706#ifdef INET6
3707	if (wg->wg_rtable_ipv6 != NULL)
3708		free(wg->wg_rtable_ipv6, M_RTABLE);
3709	soclose(wg->wg_so6);
3710fail2:
3711#endif
3712#ifdef INET
3713	if (wg->wg_rtable_ipv4 != NULL)
3714		free(wg->wg_rtable_ipv4, M_RTABLE);
3715	soclose(wg->wg_so4);
3716fail1:
3717#endif
3718	threadpool_put(wg->wg_threadpool, PRI_NONE);
3719fail0:	threadpool_job_destroy(&wg->wg_job);
3720	rw_obj_free(wg->wg_rwlock);
3721	mutex_obj_free(wg->wg_intr_lock);
3722	mutex_obj_free(wg->wg_lock);
3723	thmap_destroy(wg->wg_sessions_byindex);
3724	thmap_destroy(wg->wg_peers_byname);
3725	thmap_destroy(wg->wg_peers_bypubkey);
3726	PSLIST_DESTROY(&wg->wg_peers);
3727	kmem_free(wg, sizeof(*wg));
3728	wg_count_dec();
3729	return error;
3730}
3731
3732static int
3733wg_clone_destroy(struct ifnet *ifp)
3734{
3735	struct wg_softc *wg = container_of(ifp, struct wg_softc, wg_if);
3736
3737#ifdef WG_RUMPKERNEL
3738	if (wg_user_mode(wg)) {
3739		rumpuser_wg_destroy(wg->wg_user);
3740		wg->wg_user = NULL;
3741	}
3742#endif
3743
3744	wg_if_detach(wg);
3745	wg_destroy_all_peers(wg);
3746#ifdef INET6
3747	solock(wg->wg_so6);
3748	wg->wg_so6->so_rcv.sb_flags &= ~SB_UPCALL;
3749	sounlock(wg->wg_so6);
3750#endif
3751#ifdef INET
3752	solock(wg->wg_so4);
3753	wg->wg_so4->so_rcv.sb_flags &= ~SB_UPCALL;
3754	sounlock(wg->wg_so4);
3755#endif
3756	mutex_enter(wg->wg_intr_lock);
3757	threadpool_cancel_job(wg->wg_threadpool, &wg->wg_job);
3758	mutex_exit(wg->wg_intr_lock);
3759#ifdef INET6
3760	if (wg->wg_rtable_ipv6 != NULL)
3761		free(wg->wg_rtable_ipv6, M_RTABLE);
3762	soclose(wg->wg_so6);
3763#endif
3764#ifdef INET
3765	if (wg->wg_rtable_ipv4 != NULL)
3766		free(wg->wg_rtable_ipv4, M_RTABLE);
3767	soclose(wg->wg_so4);
3768#endif
3769	threadpool_put(wg->wg_threadpool, PRI_NONE);
3770	threadpool_job_destroy(&wg->wg_job);
3771	rw_obj_free(wg->wg_rwlock);
3772	mutex_obj_free(wg->wg_intr_lock);
3773	mutex_obj_free(wg->wg_lock);
3774	thmap_destroy(wg->wg_sessions_byindex);
3775	thmap_destroy(wg->wg_peers_byname);
3776	thmap_destroy(wg->wg_peers_bypubkey);
3777	PSLIST_DESTROY(&wg->wg_peers);
3778	kmem_free(wg, sizeof(*wg));
3779	wg_count_dec();
3780
3781	return 0;
3782}
3783
3784static struct wg_peer *
3785wg_pick_peer_by_sa(struct wg_softc *wg, const struct sockaddr *sa,
3786    struct psref *psref)
3787{
3788	struct radix_node_head *rnh;
3789	struct radix_node *rn;
3790	struct wg_peer *wgp = NULL;
3791	struct wg_allowedip *wga;
3792
3793#ifdef WG_DEBUG_LOG
3794	char addrstr[128];
3795	sockaddr_format(sa, addrstr, sizeof(addrstr));
3796	WG_DLOG("sa=%s\n", addrstr);
3797#endif
3798
3799	rw_enter(wg->wg_rwlock, RW_READER);
3800
3801	rnh = wg_rnh(wg, sa->sa_family);
3802	if (rnh == NULL)
3803		goto out;
3804
3805	rn = rnh->rnh_matchaddr(sa, rnh);
3806	if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0)
3807		goto out;
3808
3809	WG_TRACE("success");
3810
3811	wga = container_of(rn, struct wg_allowedip, wga_nodes[0]);
3812	wgp = wga->wga_peer;
3813	wg_get_peer(wgp, psref);
3814
3815out:
3816	rw_exit(wg->wg_rwlock);
3817	return wgp;
3818}
3819
3820static void
3821wg_fill_msg_data(struct wg_softc *wg, struct wg_peer *wgp,
3822    struct wg_session *wgs, struct wg_msg_data *wgmd)
3823{
3824
3825	memset(wgmd, 0, sizeof(*wgmd));
3826	wgmd->wgmd_type = htole32(WG_MSG_TYPE_DATA);
3827	wgmd->wgmd_receiver = wgs->wgs_remote_index;
3828	/* [W] 5.4.6: msg.counter := Nm^send */
3829	/* [W] 5.4.6: Nm^send := Nm^send + 1 */
3830	wgmd->wgmd_counter = htole64(wg_session_inc_send_counter(wgs));
3831	WG_DLOG("counter=%"PRIu64"\n", le64toh(wgmd->wgmd_counter));
3832}
3833
3834static int
3835wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
3836    const struct rtentry *rt)
3837{
3838	struct wg_softc *wg = ifp->if_softc;
3839	struct wg_peer *wgp = NULL;
3840	struct wg_session *wgs = NULL;
3841	struct psref wgp_psref, wgs_psref;
3842	int bound;
3843	int error;
3844
3845	bound = curlwp_bind();
3846
3847	/* TODO make the nest limit configurable via sysctl */
3848	error = if_tunnel_check_nesting(ifp, m, 1);
3849	if (error) {
3850		WGLOG(LOG_ERR,
3851		    "%s: tunneling loop detected and packet dropped\n",
3852		    if_name(&wg->wg_if));
3853		goto out0;
3854	}
3855
3856#ifdef ALTQ
3857	bool altq = atomic_load_relaxed(&ifp->if_snd.altq_flags)
3858	    & ALTQF_ENABLED;
3859	if (altq)
3860		IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
3861#endif
3862
3863	bpf_mtap_af(ifp, dst->sa_family, m, BPF_D_OUT);
3864
3865	m->m_flags &= ~(M_BCAST|M_MCAST);
3866
3867	wgp = wg_pick_peer_by_sa(wg, dst, &wgp_psref);
3868	if (wgp == NULL) {
3869		WG_TRACE("peer not found");
3870		error = EHOSTUNREACH;
3871		goto out0;
3872	}
3873
3874	/* Clear checksum-offload flags. */
3875	m->m_pkthdr.csum_flags = 0;
3876	m->m_pkthdr.csum_data = 0;
3877
3878	/* Check whether there's an established session.  */
3879	wgs = wg_get_stable_session(wgp, &wgs_psref);
3880	if (wgs == NULL) {
3881		/*
3882		 * No established session.  If we're the first to try
3883		 * sending data, schedule a handshake and queue the
3884		 * packet for when the handshake is done; otherwise
3885		 * just drop the packet and let the ongoing handshake
3886		 * attempt continue.  We could queue more data packets
3887		 * but it's not clear that's worthwhile.
3888		 */
3889		if (atomic_cas_ptr(&wgp->wgp_pending, NULL, m) == NULL) {
3890			m = NULL; /* consume */
3891			WG_TRACE("queued first packet; init handshake");
3892			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
3893		} else {
3894			WG_TRACE("first packet already queued, dropping");
3895		}
3896		goto out1;
3897	}
3898
3899	/* There's an established session.  Toss it in the queue.  */
3900#ifdef ALTQ
3901	if (altq) {
3902		mutex_enter(ifp->if_snd.ifq_lock);
3903		if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
3904			M_SETCTX(m, wgp);
3905			ALTQ_ENQUEUE(&ifp->if_snd, m, error);
3906			m = NULL; /* consume */
3907		}
3908		mutex_exit(ifp->if_snd.ifq_lock);
3909		if (m == NULL) {
3910			wg_start(ifp);
3911			goto out2;
3912		}
3913	}
3914#endif
3915	kpreempt_disable();
3916	const uint32_t h = curcpu()->ci_index;	// pktq_rps_hash(m)
3917	M_SETCTX(m, wgp);
3918	if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
3919		WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
3920		    if_name(&wg->wg_if));
3921		error = ENOBUFS;
3922		goto out3;
3923	}
3924	m = NULL;		/* consumed */
3925	error = 0;
3926out3:	kpreempt_enable();
3927
3928#ifdef ALTQ
3929out2:
3930#endif
3931	wg_put_session(wgs, &wgs_psref);
3932out1:	wg_put_peer(wgp, &wgp_psref);
3933out0:	if (m)
3934		m_freem(m);
3935	curlwp_bindx(bound);
3936	return error;
3937}
3938
3939static int
3940wg_send_udp(struct wg_peer *wgp, struct mbuf *m)
3941{
3942	struct psref psref;
3943	struct wg_sockaddr *wgsa;
3944	int error;
3945	struct socket *so;
3946
3947	wgsa = wg_get_endpoint_sa(wgp, &psref);
3948	so = wg_get_so_by_peer(wgp, wgsa);
3949	solock(so);
3950	if (wgsatosa(wgsa)->sa_family == AF_INET) {
3951		error = udp_send(so, m, wgsatosa(wgsa), NULL, curlwp);
3952	} else {
3953#ifdef INET6
3954		error = udp6_output(sotoinpcb(so), m, wgsatosin6(wgsa),
3955		    NULL, curlwp);
3956#else
3957		m_freem(m);
3958		error = EPFNOSUPPORT;
3959#endif
3960	}
3961	sounlock(so);
3962	wg_put_sa(wgp, wgsa, &psref);
3963
3964	return error;
3965}
3966
3967/* Inspired by pppoe_get_mbuf */
3968static struct mbuf *
3969wg_get_mbuf(size_t leading_len, size_t len)
3970{
3971	struct mbuf *m;
3972
3973	KASSERT(leading_len <= MCLBYTES);
3974	KASSERT(len <= MCLBYTES - leading_len);
3975
3976	m = m_gethdr(M_DONTWAIT, MT_DATA);
3977	if (m == NULL)
3978		return NULL;
3979	if (len + leading_len > MHLEN) {
3980		m_clget(m, M_DONTWAIT);
3981		if ((m->m_flags & M_EXT) == 0) {
3982			m_free(m);
3983			return NULL;
3984		}
3985	}
3986	m->m_data += leading_len;
3987	m->m_pkthdr.len = m->m_len = len;
3988
3989	return m;
3990}
3991
3992static int
3993wg_send_data_msg(struct wg_peer *wgp, struct wg_session *wgs,
3994    struct mbuf *m)
3995{
3996	struct wg_softc *wg = wgp->wgp_sc;
3997	int error;
3998	size_t inner_len, padded_len, encrypted_len;
3999	char *padded_buf = NULL;
4000	size_t mlen;
4001	struct wg_msg_data *wgmd;
4002	bool free_padded_buf = false;
4003	struct mbuf *n;
4004	size_t leading_len = max_hdr + sizeof(struct udphdr);
4005
4006	mlen = m_length(m);
4007	inner_len = mlen;
4008	padded_len = roundup(mlen, 16);
4009	encrypted_len = padded_len + WG_AUTHTAG_LEN;
4010	WG_DLOG("inner=%lu, padded=%lu, encrypted_len=%lu\n",
4011	    inner_len, padded_len, encrypted_len);
4012	if (mlen != 0) {
4013		bool success;
4014		success = m_ensure_contig(&m, padded_len);
4015		if (success) {
4016			padded_buf = mtod(m, char *);
4017		} else {
4018			padded_buf = kmem_intr_alloc(padded_len, KM_NOSLEEP);
4019			if (padded_buf == NULL) {
4020				error = ENOBUFS;
4021				goto end;
4022			}
4023			free_padded_buf = true;
4024			m_copydata(m, 0, mlen, padded_buf);
4025		}
4026		memset(padded_buf + mlen, 0, padded_len - inner_len);
4027	}
4028
4029	n = wg_get_mbuf(leading_len, sizeof(*wgmd) + encrypted_len);
4030	if (n == NULL) {
4031		error = ENOBUFS;
4032		goto end;
4033	}
4034	KASSERT(n->m_len >= sizeof(*wgmd));
4035	wgmd = mtod(n, struct wg_msg_data *);
4036	wg_fill_msg_data(wg, wgp, wgs, wgmd);
4037	/* [W] 5.4.6: AEAD(Tm^send, Nm^send, P, e) */
4038	wg_algo_aead_enc((char *)wgmd + sizeof(*wgmd), encrypted_len,
4039	    wgs->wgs_tkey_send, le64toh(wgmd->wgmd_counter),
4040	    padded_buf, padded_len,
4041	    NULL, 0);
4042
4043	error = wg->wg_ops->send_data_msg(wgp, n);
4044	if (error == 0) {
4045		struct ifnet *ifp = &wg->wg_if;
4046		if_statadd(ifp, if_obytes, mlen);
4047		if_statinc(ifp, if_opackets);
4048		if (wgs->wgs_is_initiator &&
4049		    wgs->wgs_time_last_data_sent == 0) {
4050			/*
4051			 * [W] 6.2 Transport Message Limits
4052			 * "if a peer is the initiator of a current secure
4053			 *  session, WireGuard will send a handshake initiation
4054			 *  message to begin a new secure session if, after
4055			 *  transmitting a transport data message, the current
4056			 *  secure session is REKEY-AFTER-TIME seconds old,"
4057			 */
4058			wg_schedule_rekey_timer(wgp);
4059		}
4060		wgs->wgs_time_last_data_sent = time_uptime;
4061		if (wg_session_get_send_counter(wgs) >=
4062		    wg_rekey_after_messages) {
4063			/*
4064			 * [W] 6.2 Transport Message Limits
4065			 * "WireGuard will try to create a new session, by
4066			 *  sending a handshake initiation message (section
4067			 *  5.4.2), after it has sent REKEY-AFTER-MESSAGES
4068			 *  transport data messages..."
4069			 */
4070			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
4071		}
4072	}
4073end:
4074	m_freem(m);
4075	if (free_padded_buf)
4076		kmem_intr_free(padded_buf, padded_len);
4077	return error;
4078}
4079
4080static void
4081wg_input(struct ifnet *ifp, struct mbuf *m, const int af)
4082{
4083	pktqueue_t *pktq;
4084	size_t pktlen;
4085
4086	KASSERT(af == AF_INET || af == AF_INET6);
4087
4088	WG_TRACE("");
4089
4090	m_set_rcvif(m, ifp);
4091	pktlen = m->m_pkthdr.len;
4092
4093	bpf_mtap_af(ifp, af, m, BPF_D_IN);
4094
4095	switch (af) {
4096	case AF_INET:
4097		pktq = ip_pktq;
4098		break;
4099#ifdef INET6
4100	case AF_INET6:
4101		pktq = ip6_pktq;
4102		break;
4103#endif
4104	default:
4105		panic("invalid af=%d", af);
4106	}
4107
4108	kpreempt_disable();
4109	const u_int h = curcpu()->ci_index;
4110	if (__predict_true(pktq_enqueue(pktq, m, h))) {
4111		if_statadd(ifp, if_ibytes, pktlen);
4112		if_statinc(ifp, if_ipackets);
4113	} else {
4114		m_freem(m);
4115	}
4116	kpreempt_enable();
4117}
4118
4119static void
4120wg_calc_pubkey(uint8_t pubkey[WG_STATIC_KEY_LEN],
4121    const uint8_t privkey[WG_STATIC_KEY_LEN])
4122{
4123
4124	crypto_scalarmult_base(pubkey, privkey);
4125}
4126
4127static int
4128wg_rtable_add_route(struct wg_softc *wg, struct wg_allowedip *wga)
4129{
4130	struct radix_node_head *rnh;
4131	struct radix_node *rn;
4132	int error = 0;
4133
4134	rw_enter(wg->wg_rwlock, RW_WRITER);
4135	rnh = wg_rnh(wg, wga->wga_family);
4136	KASSERT(rnh != NULL);
4137	rn = rnh->rnh_addaddr(&wga->wga_sa_addr, &wga->wga_sa_mask, rnh,
4138	    wga->wga_nodes);
4139	rw_exit(wg->wg_rwlock);
4140
4141	if (rn == NULL)
4142		error = EEXIST;
4143
4144	return error;
4145}
4146
4147static int
4148wg_handle_prop_peer(struct wg_softc *wg, prop_dictionary_t peer,
4149    struct wg_peer **wgpp)
4150{
4151	int error = 0;
4152	const void *pubkey;
4153	size_t pubkey_len;
4154	const void *psk;
4155	size_t psk_len;
4156	const char *name = NULL;
4157
4158	if (prop_dictionary_get_string(peer, "name", &name)) {
4159		if (strlen(name) > WG_PEER_NAME_MAXLEN) {
4160			error = EINVAL;
4161			goto out;
4162		}
4163	}
4164
4165	if (!prop_dictionary_get_data(peer, "public_key",
4166		&pubkey, &pubkey_len)) {
4167		error = EINVAL;
4168		goto out;
4169	}
4170#ifdef WG_DEBUG_DUMP
4171    {
4172	char *hex = gethexdump(pubkey, pubkey_len);
4173	log(LOG_DEBUG, "pubkey=%p, pubkey_len=%lu\n%s\n",
4174	    pubkey, pubkey_len, hex);
4175	puthexdump(hex, pubkey, pubkey_len);
4176    }
4177#endif
4178
4179	struct wg_peer *wgp = wg_alloc_peer(wg);
4180	memcpy(wgp->wgp_pubkey, pubkey, sizeof(wgp->wgp_pubkey));
4181	if (name != NULL)
4182		strncpy(wgp->wgp_name, name, sizeof(wgp->wgp_name));
4183
4184	if (prop_dictionary_get_data(peer, "preshared_key", &psk, &psk_len)) {
4185		if (psk_len != sizeof(wgp->wgp_psk)) {
4186			error = EINVAL;
4187			goto out;
4188		}
4189		memcpy(wgp->wgp_psk, psk, sizeof(wgp->wgp_psk));
4190	}
4191
4192	const void *addr;
4193	size_t addr_len;
4194	struct wg_sockaddr *wgsa = wgp->wgp_endpoint;
4195
4196	if (!prop_dictionary_get_data(peer, "endpoint", &addr, &addr_len))
4197		goto skip_endpoint;
4198	if (addr_len < sizeof(*wgsatosa(wgsa)) ||
4199	    addr_len > sizeof(*wgsatoss(wgsa))) {
4200		error = EINVAL;
4201		goto out;
4202	}
4203	memcpy(wgsatoss(wgsa), addr, addr_len);
4204	switch (wgsa_family(wgsa)) {
4205	case AF_INET:
4206#ifdef INET6
4207	case AF_INET6:
4208#endif
4209		break;
4210	default:
4211		error = EPFNOSUPPORT;
4212		goto out;
4213	}
4214	if (addr_len != sockaddr_getsize_by_family(wgsa_family(wgsa))) {
4215		error = EINVAL;
4216		goto out;
4217	}
4218    {
4219	char addrstr[128];
4220	sockaddr_format(wgsatosa(wgsa), addrstr, sizeof(addrstr));
4221	WG_DLOG("addr=%s\n", addrstr);
4222    }
4223	wgp->wgp_endpoint_available = true;
4224
4225	prop_array_t allowedips;
4226skip_endpoint:
4227	allowedips = prop_dictionary_get(peer, "allowedips");
4228	if (allowedips == NULL)
4229		goto skip;
4230
4231	prop_object_iterator_t _it = prop_array_iterator(allowedips);
4232	prop_dictionary_t prop_allowedip;
4233	int j = 0;
4234	while ((prop_allowedip = prop_object_iterator_next(_it)) != NULL) {
4235		struct wg_allowedip *wga = &wgp->wgp_allowedips[j];
4236
4237		if (!prop_dictionary_get_int(prop_allowedip, "family",
4238			&wga->wga_family))
4239			continue;
4240		if (!prop_dictionary_get_data(prop_allowedip, "ip",
4241			&addr, &addr_len))
4242			continue;
4243		if (!prop_dictionary_get_uint8(prop_allowedip, "cidr",
4244			&wga->wga_cidr))
4245			continue;
4246
4247		switch (wga->wga_family) {
4248		case AF_INET: {
4249			struct sockaddr_in sin;
4250			char addrstr[128];
4251			struct in_addr mask;
4252			struct sockaddr_in sin_mask;
4253
4254			if (addr_len != sizeof(struct in_addr))
4255				return EINVAL;
4256			memcpy(&wga->wga_addr4, addr, addr_len);
4257
4258			sockaddr_in_init(&sin, (const struct in_addr *)addr,
4259			    0);
4260			sockaddr_copy(&wga->wga_sa_addr,
4261			    sizeof(sin), sintosa(&sin));
4262
4263			sockaddr_format(sintosa(&sin),
4264			    addrstr, sizeof(addrstr));
4265			WG_DLOG("addr=%s/%d\n", addrstr, wga->wga_cidr);
4266
4267			in_len2mask(&mask, wga->wga_cidr);
4268			sockaddr_in_init(&sin_mask, &mask, 0);
4269			sockaddr_copy(&wga->wga_sa_mask,
4270			    sizeof(sin_mask), sintosa(&sin_mask));
4271
4272			break;
4273		    }
4274#ifdef INET6
4275		case AF_INET6: {
4276			struct sockaddr_in6 sin6;
4277			char addrstr[128];
4278			struct in6_addr mask;
4279			struct sockaddr_in6 sin6_mask;
4280
4281			if (addr_len != sizeof(struct in6_addr))
4282				return EINVAL;
4283			memcpy(&wga->wga_addr6, addr, addr_len);
4284
4285			sockaddr_in6_init(&sin6, (const struct in6_addr *)addr,
4286			    0, 0, 0);
4287			sockaddr_copy(&wga->wga_sa_addr,
4288			    sizeof(sin6), sin6tosa(&sin6));
4289
4290			sockaddr_format(sin6tosa(&sin6),
4291			    addrstr, sizeof(addrstr));
4292			WG_DLOG("addr=%s/%d\n", addrstr, wga->wga_cidr);
4293
4294			in6_prefixlen2mask(&mask, wga->wga_cidr);
4295			sockaddr_in6_init(&sin6_mask, &mask, 0, 0, 0);
4296			sockaddr_copy(&wga->wga_sa_mask,
4297			    sizeof(sin6_mask), sin6tosa(&sin6_mask));
4298
4299			break;
4300		    }
4301#endif
4302		default:
4303			error = EINVAL;
4304			goto out;
4305		}
4306		wga->wga_peer = wgp;
4307
4308		error = wg_rtable_add_route(wg, wga);
4309		if (error != 0)
4310			goto out;
4311
4312		j++;
4313	}
4314	wgp->wgp_n_allowedips = j;
4315skip:
4316	*wgpp = wgp;
4317out:
4318	return error;
4319}
4320
4321static int
4322wg_alloc_prop_buf(char **_buf, struct ifdrv *ifd)
4323{
4324	int error;
4325	char *buf;
4326
4327	WG_DLOG("buf=%p, len=%lu\n", ifd->ifd_data, ifd->ifd_len);
4328	if (ifd->ifd_len >= WG_MAX_PROPLEN)
4329		return E2BIG;
4330	buf = kmem_alloc(ifd->ifd_len + 1, KM_SLEEP);
4331	error = copyin(ifd->ifd_data, buf, ifd->ifd_len);
4332	if (error != 0)
4333		return error;
4334	buf[ifd->ifd_len] = '\0';
4335#ifdef WG_DEBUG_DUMP
4336	log(LOG_DEBUG, "%.*s\n",
4337	    (int)MIN(INT_MAX, ifd->ifd_len),
4338	    (const char *)buf);
4339#endif
4340	*_buf = buf;
4341	return 0;
4342}
4343
4344static int
4345wg_ioctl_set_private_key(struct wg_softc *wg, struct ifdrv *ifd)
4346{
4347	int error;
4348	prop_dictionary_t prop_dict;
4349	char *buf = NULL;
4350	const void *privkey;
4351	size_t privkey_len;
4352
4353	error = wg_alloc_prop_buf(&buf, ifd);
4354	if (error != 0)
4355		return error;
4356	error = EINVAL;
4357	prop_dict = prop_dictionary_internalize(buf);
4358	if (prop_dict == NULL)
4359		goto out;
4360	if (!prop_dictionary_get_data(prop_dict, "private_key",
4361		&privkey, &privkey_len))
4362		goto out;
4363#ifdef WG_DEBUG_DUMP
4364    {
4365	char *hex = gethexdump(privkey, privkey_len);
4366	log(LOG_DEBUG, "privkey=%p, privkey_len=%lu\n%s\n",
4367	    privkey, privkey_len, hex);
4368	puthexdump(hex, privkey, privkey_len);
4369    }
4370#endif
4371	if (privkey_len != WG_STATIC_KEY_LEN)
4372		goto out;
4373	memcpy(wg->wg_privkey, privkey, WG_STATIC_KEY_LEN);
4374	wg_calc_pubkey(wg->wg_pubkey, wg->wg_privkey);
4375	error = 0;
4376
4377out:
4378	kmem_free(buf, ifd->ifd_len + 1);
4379	return error;
4380}
4381
4382static int
4383wg_ioctl_set_listen_port(struct wg_softc *wg, struct ifdrv *ifd)
4384{
4385	int error;
4386	prop_dictionary_t prop_dict;
4387	char *buf = NULL;
4388	uint16_t port;
4389
4390	error = wg_alloc_prop_buf(&buf, ifd);
4391	if (error != 0)
4392		return error;
4393	error = EINVAL;
4394	prop_dict = prop_dictionary_internalize(buf);
4395	if (prop_dict == NULL)
4396		goto out;
4397	if (!prop_dictionary_get_uint16(prop_dict, "listen_port", &port))
4398		goto out;
4399
4400	error = wg->wg_ops->bind_port(wg, (uint16_t)port);
4401
4402out:
4403	kmem_free(buf, ifd->ifd_len + 1);
4404	return error;
4405}
4406
4407static int
4408wg_ioctl_add_peer(struct wg_softc *wg, struct ifdrv *ifd)
4409{
4410	int error;
4411	prop_dictionary_t prop_dict;
4412	char *buf = NULL;
4413	struct wg_peer *wgp = NULL, *wgp0 __diagused;
4414
4415	error = wg_alloc_prop_buf(&buf, ifd);
4416	if (error != 0)
4417		return error;
4418	error = EINVAL;
4419	prop_dict = prop_dictionary_internalize(buf);
4420	if (prop_dict == NULL)
4421		goto out;
4422
4423	error = wg_handle_prop_peer(wg, prop_dict, &wgp);
4424	if (error != 0)
4425		goto out;
4426
4427	mutex_enter(wg->wg_lock);
4428	if (thmap_get(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4429		sizeof(wgp->wgp_pubkey)) != NULL ||
4430	    (wgp->wgp_name[0] &&
4431		thmap_get(wg->wg_peers_byname, wgp->wgp_name,
4432		    strlen(wgp->wgp_name)) != NULL)) {
4433		mutex_exit(wg->wg_lock);
4434		wg_destroy_peer(wgp);
4435		error = EEXIST;
4436		goto out;
4437	}
4438	wgp0 = thmap_put(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4439	    sizeof(wgp->wgp_pubkey), wgp);
4440	KASSERT(wgp0 == wgp);
4441	if (wgp->wgp_name[0]) {
4442		wgp0 = thmap_put(wg->wg_peers_byname, wgp->wgp_name,
4443		    strlen(wgp->wgp_name), wgp);
4444		KASSERT(wgp0 == wgp);
4445	}
4446	WG_PEER_WRITER_INSERT_HEAD(wgp, wg);
4447	wg->wg_npeers++;
4448	mutex_exit(wg->wg_lock);
4449
4450	if_link_state_change(&wg->wg_if, LINK_STATE_UP);
4451
4452out:
4453	kmem_free(buf, ifd->ifd_len + 1);
4454	return error;
4455}
4456
4457static int
4458wg_ioctl_delete_peer(struct wg_softc *wg, struct ifdrv *ifd)
4459{
4460	int error;
4461	prop_dictionary_t prop_dict;
4462	char *buf = NULL;
4463	const char *name;
4464
4465	error = wg_alloc_prop_buf(&buf, ifd);
4466	if (error != 0)
4467		return error;
4468	error = EINVAL;
4469	prop_dict = prop_dictionary_internalize(buf);
4470	if (prop_dict == NULL)
4471		goto out;
4472
4473	if (!prop_dictionary_get_string(prop_dict, "name", &name))
4474		goto out;
4475	if (strlen(name) > WG_PEER_NAME_MAXLEN)
4476		goto out;
4477
4478	error = wg_destroy_peer_name(wg, name);
4479out:
4480	kmem_free(buf, ifd->ifd_len + 1);
4481	return error;
4482}
4483
4484static bool
4485wg_is_authorized(struct wg_softc *wg, u_long cmd)
4486{
4487	int au = cmd == SIOCGDRVSPEC ?
4488	    KAUTH_REQ_NETWORK_INTERFACE_WG_GETPRIV :
4489	    KAUTH_REQ_NETWORK_INTERFACE_WG_SETPRIV;
4490	return kauth_authorize_network(kauth_cred_get(),
4491	    KAUTH_NETWORK_INTERFACE_WG, au, &wg->wg_if,
4492	    (void *)cmd, NULL) == 0;
4493}
4494
4495static int
4496wg_ioctl_get(struct wg_softc *wg, struct ifdrv *ifd)
4497{
4498	int error = ENOMEM;
4499	prop_dictionary_t prop_dict;
4500	prop_array_t peers = NULL;
4501	char *buf;
4502	struct wg_peer *wgp;
4503	int s, i;
4504
4505	prop_dict = prop_dictionary_create();
4506	if (prop_dict == NULL)
4507		goto error;
4508
4509	if (wg_is_authorized(wg, SIOCGDRVSPEC)) {
4510		if (!prop_dictionary_set_data(prop_dict, "private_key",
4511			wg->wg_privkey, WG_STATIC_KEY_LEN))
4512			goto error;
4513	}
4514
4515	if (wg->wg_listen_port != 0) {
4516		if (!prop_dictionary_set_uint16(prop_dict, "listen_port",
4517			wg->wg_listen_port))
4518			goto error;
4519	}
4520
4521	if (wg->wg_npeers == 0)
4522		goto skip_peers;
4523
4524	peers = prop_array_create();
4525	if (peers == NULL)
4526		goto error;
4527
4528	s = pserialize_read_enter();
4529	i = 0;
4530	WG_PEER_READER_FOREACH(wgp, wg) {
4531		struct wg_sockaddr *wgsa;
4532		struct psref wgp_psref, wgsa_psref;
4533		prop_dictionary_t prop_peer;
4534
4535		wg_get_peer(wgp, &wgp_psref);
4536		pserialize_read_exit(s);
4537
4538		prop_peer = prop_dictionary_create();
4539		if (prop_peer == NULL)
4540			goto next;
4541
4542		if (strlen(wgp->wgp_name) > 0) {
4543			if (!prop_dictionary_set_string(prop_peer, "name",
4544				wgp->wgp_name))
4545				goto next;
4546		}
4547
4548		if (!prop_dictionary_set_data(prop_peer, "public_key",
4549			wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey)))
4550			goto next;
4551
4552		uint8_t psk_zero[WG_PRESHARED_KEY_LEN] = {0};
4553		if (!consttime_memequal(wgp->wgp_psk, psk_zero,
4554			sizeof(wgp->wgp_psk))) {
4555			if (wg_is_authorized(wg, SIOCGDRVSPEC)) {
4556				if (!prop_dictionary_set_data(prop_peer,
4557					"preshared_key",
4558					wgp->wgp_psk, sizeof(wgp->wgp_psk)))
4559					goto next;
4560			}
4561		}
4562
4563		wgsa = wg_get_endpoint_sa(wgp, &wgsa_psref);
4564		CTASSERT(AF_UNSPEC == 0);
4565		if (wgsa_family(wgsa) != 0 /*AF_UNSPEC*/ &&
4566		    !prop_dictionary_set_data(prop_peer, "endpoint",
4567			wgsatoss(wgsa),
4568			sockaddr_getsize_by_family(wgsa_family(wgsa)))) {
4569			wg_put_sa(wgp, wgsa, &wgsa_psref);
4570			goto next;
4571		}
4572		wg_put_sa(wgp, wgsa, &wgsa_psref);
4573
4574		const struct timespec *t = &wgp->wgp_last_handshake_time;
4575
4576		if (!prop_dictionary_set_uint64(prop_peer,
4577			"last_handshake_time_sec", (uint64_t)t->tv_sec))
4578			goto next;
4579		if (!prop_dictionary_set_uint32(prop_peer,
4580			"last_handshake_time_nsec", (uint32_t)t->tv_nsec))
4581			goto next;
4582
4583		if (wgp->wgp_n_allowedips == 0)
4584			goto skip_allowedips;
4585
4586		prop_array_t allowedips = prop_array_create();
4587		if (allowedips == NULL)
4588			goto next;
4589		for (int j = 0; j < wgp->wgp_n_allowedips; j++) {
4590			struct wg_allowedip *wga = &wgp->wgp_allowedips[j];
4591			prop_dictionary_t prop_allowedip;
4592
4593			prop_allowedip = prop_dictionary_create();
4594			if (prop_allowedip == NULL)
4595				break;
4596
4597			if (!prop_dictionary_set_int(prop_allowedip, "family",
4598				wga->wga_family))
4599				goto _next;
4600			if (!prop_dictionary_set_uint8(prop_allowedip, "cidr",
4601				wga->wga_cidr))
4602				goto _next;
4603
4604			switch (wga->wga_family) {
4605			case AF_INET:
4606				if (!prop_dictionary_set_data(prop_allowedip,
4607					"ip", &wga->wga_addr4,
4608					sizeof(wga->wga_addr4)))
4609					goto _next;
4610				break;
4611#ifdef INET6
4612			case AF_INET6:
4613				if (!prop_dictionary_set_data(prop_allowedip,
4614					"ip", &wga->wga_addr6,
4615					sizeof(wga->wga_addr6)))
4616					goto _next;
4617				break;
4618#endif
4619			default:
4620				break;
4621			}
4622			prop_array_set(allowedips, j, prop_allowedip);
4623		_next:
4624			prop_object_release(prop_allowedip);
4625		}
4626		prop_dictionary_set(prop_peer, "allowedips", allowedips);
4627		prop_object_release(allowedips);
4628
4629	skip_allowedips:
4630
4631		prop_array_set(peers, i, prop_peer);
4632	next:
4633		if (prop_peer)
4634			prop_object_release(prop_peer);
4635		i++;
4636
4637		s = pserialize_read_enter();
4638		wg_put_peer(wgp, &wgp_psref);
4639	}
4640	pserialize_read_exit(s);
4641
4642	prop_dictionary_set(prop_dict, "peers", peers);
4643	prop_object_release(peers);
4644	peers = NULL;
4645
4646skip_peers:
4647	buf = prop_dictionary_externalize(prop_dict);
4648	if (buf == NULL)
4649		goto error;
4650	if (ifd->ifd_len < (strlen(buf) + 1)) {
4651		error = EINVAL;
4652		goto error;
4653	}
4654	error = copyout(buf, ifd->ifd_data, strlen(buf) + 1);
4655
4656	free(buf, 0);
4657error:
4658	if (peers != NULL)
4659		prop_object_release(peers);
4660	if (prop_dict != NULL)
4661		prop_object_release(prop_dict);
4662
4663	return error;
4664}
4665
4666static int
4667wg_ioctl(struct ifnet *ifp, u_long cmd, void *data)
4668{
4669	struct wg_softc *wg = ifp->if_softc;
4670	struct ifreq *ifr = data;
4671	struct ifaddr *ifa = data;
4672	struct ifdrv *ifd = data;
4673	int error = 0;
4674
4675	switch (cmd) {
4676	case SIOCINITIFADDR:
4677		if (ifa->ifa_addr->sa_family != AF_LINK &&
4678		    (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
4679		    (IFF_UP | IFF_RUNNING)) {
4680			ifp->if_flags |= IFF_UP;
4681			error = if_init(ifp);
4682		}
4683		return error;
4684	case SIOCADDMULTI:
4685	case SIOCDELMULTI:
4686		switch (ifr->ifr_addr.sa_family) {
4687		case AF_INET:	/* IP supports Multicast */
4688			break;
4689#ifdef INET6
4690		case AF_INET6:	/* IP6 supports Multicast */
4691			break;
4692#endif
4693		default:  /* Other protocols doesn't support Multicast */
4694			error = EAFNOSUPPORT;
4695			break;
4696		}
4697		return error;
4698	case SIOCSDRVSPEC:
4699		if (!wg_is_authorized(wg, cmd)) {
4700			return EPERM;
4701		}
4702		switch (ifd->ifd_cmd) {
4703		case WG_IOCTL_SET_PRIVATE_KEY:
4704			error = wg_ioctl_set_private_key(wg, ifd);
4705			break;
4706		case WG_IOCTL_SET_LISTEN_PORT:
4707			error = wg_ioctl_set_listen_port(wg, ifd);
4708			break;
4709		case WG_IOCTL_ADD_PEER:
4710			error = wg_ioctl_add_peer(wg, ifd);
4711			break;
4712		case WG_IOCTL_DELETE_PEER:
4713			error = wg_ioctl_delete_peer(wg, ifd);
4714			break;
4715		default:
4716			error = EINVAL;
4717			break;
4718		}
4719		return error;
4720	case SIOCGDRVSPEC:
4721		return wg_ioctl_get(wg, ifd);
4722	case SIOCSIFFLAGS:
4723		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
4724			break;
4725		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
4726		case IFF_RUNNING:
4727			/*
4728			 * If interface is marked down and it is running,
4729			 * then stop and disable it.
4730			 */
4731			if_stop(ifp, 1);
4732			break;
4733		case IFF_UP:
4734			/*
4735			 * If interface is marked up and it is stopped, then
4736			 * start it.
4737			 */
4738			error = if_init(ifp);
4739			break;
4740		default:
4741			break;
4742		}
4743		return error;
4744#ifdef WG_RUMPKERNEL
4745	case SIOCSLINKSTR:
4746		error = wg_ioctl_linkstr(wg, ifd);
4747		if (error == 0)
4748			wg->wg_ops = &wg_ops_rumpuser;
4749		return error;
4750#endif
4751	default:
4752		break;
4753	}
4754
4755	error = ifioctl_common(ifp, cmd, data);
4756
4757#ifdef WG_RUMPKERNEL
4758	if (!wg_user_mode(wg))
4759		return error;
4760
4761	/* Do the same to the corresponding tun device on the host */
4762	/*
4763	 * XXX Actually the command has not been handled yet.  It
4764	 *     will be handled via pr_ioctl form doifioctl later.
4765	 */
4766	switch (cmd) {
4767	case SIOCAIFADDR:
4768	case SIOCDIFADDR: {
4769		struct in_aliasreq _ifra = *(const struct in_aliasreq *)data;
4770		struct in_aliasreq *ifra = &_ifra;
4771		KASSERT(error == ENOTTY);
4772		strncpy(ifra->ifra_name, rumpuser_wg_get_tunname(wg->wg_user),
4773		    IFNAMSIZ);
4774		error = rumpuser_wg_ioctl(wg->wg_user, cmd, ifra, AF_INET);
4775		if (error == 0)
4776			error = ENOTTY;
4777		break;
4778	}
4779#ifdef INET6
4780	case SIOCAIFADDR_IN6:
4781	case SIOCDIFADDR_IN6: {
4782		struct in6_aliasreq _ifra = *(const struct in6_aliasreq *)data;
4783		struct in6_aliasreq *ifra = &_ifra;
4784		KASSERT(error == ENOTTY);
4785		strncpy(ifra->ifra_name, rumpuser_wg_get_tunname(wg->wg_user),
4786		    IFNAMSIZ);
4787		error = rumpuser_wg_ioctl(wg->wg_user, cmd, ifra, AF_INET6);
4788		if (error == 0)
4789			error = ENOTTY;
4790		break;
4791	}
4792#endif
4793	}
4794#endif /* WG_RUMPKERNEL */
4795
4796	return error;
4797}
4798
4799static int
4800wg_init(struct ifnet *ifp)
4801{
4802
4803	ifp->if_flags |= IFF_RUNNING;
4804
4805	/* TODO flush pending packets. */
4806	return 0;
4807}
4808
4809#ifdef ALTQ
4810static void
4811wg_start(struct ifnet *ifp)
4812{
4813	struct mbuf *m;
4814
4815	for (;;) {
4816		IFQ_DEQUEUE(&ifp->if_snd, m);
4817		if (m == NULL)
4818			break;
4819
4820		kpreempt_disable();
4821		const uint32_t h = curcpu()->ci_index;	// pktq_rps_hash(m)
4822		if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
4823			WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
4824			    if_name(ifp));
4825			m_freem(m);
4826		}
4827		kpreempt_enable();
4828	}
4829}
4830#endif
4831
4832static void
4833wg_stop(struct ifnet *ifp, int disable)
4834{
4835
4836	KASSERT((ifp->if_flags & IFF_RUNNING) != 0);
4837	ifp->if_flags &= ~IFF_RUNNING;
4838
4839	/* Need to do something? */
4840}
4841
4842#ifdef WG_DEBUG_PARAMS
4843SYSCTL_SETUP(sysctl_net_wg_setup, "sysctl net.wg setup")
4844{
4845	const struct sysctlnode *node = NULL;
4846
4847	sysctl_createv(clog, 0, NULL, &node,
4848	    CTLFLAG_PERMANENT,
4849	    CTLTYPE_NODE, "wg",
4850	    SYSCTL_DESCR("wg(4)"),
4851	    NULL, 0, NULL, 0,
4852	    CTL_NET, CTL_CREATE, CTL_EOL);
4853	sysctl_createv(clog, 0, &node, NULL,
4854	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4855	    CTLTYPE_QUAD, "rekey_after_messages",
4856	    SYSCTL_DESCR("session liftime by messages"),
4857	    NULL, 0, &wg_rekey_after_messages, 0, CTL_CREATE, CTL_EOL);
4858	sysctl_createv(clog, 0, &node, NULL,
4859	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4860	    CTLTYPE_INT, "rekey_after_time",
4861	    SYSCTL_DESCR("session liftime"),
4862	    NULL, 0, &wg_rekey_after_time, 0, CTL_CREATE, CTL_EOL);
4863	sysctl_createv(clog, 0, &node, NULL,
4864	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4865	    CTLTYPE_INT, "rekey_timeout",
4866	    SYSCTL_DESCR("session handshake retry time"),
4867	    NULL, 0, &wg_rekey_timeout, 0, CTL_CREATE, CTL_EOL);
4868	sysctl_createv(clog, 0, &node, NULL,
4869	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4870	    CTLTYPE_INT, "rekey_attempt_time",
4871	    SYSCTL_DESCR("session handshake timeout"),
4872	    NULL, 0, &wg_rekey_attempt_time, 0, CTL_CREATE, CTL_EOL);
4873	sysctl_createv(clog, 0, &node, NULL,
4874	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4875	    CTLTYPE_INT, "keepalive_timeout",
4876	    SYSCTL_DESCR("keepalive timeout"),
4877	    NULL, 0, &wg_keepalive_timeout, 0, CTL_CREATE, CTL_EOL);
4878	sysctl_createv(clog, 0, &node, NULL,
4879	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
4880	    CTLTYPE_BOOL, "force_underload",
4881	    SYSCTL_DESCR("force to detemine under load"),
4882	    NULL, 0, &wg_force_underload, 0, CTL_CREATE, CTL_EOL);
4883}
4884#endif
4885
4886#ifdef WG_RUMPKERNEL
4887static bool
4888wg_user_mode(struct wg_softc *wg)
4889{
4890
4891	return wg->wg_user != NULL;
4892}
4893
4894static int
4895wg_ioctl_linkstr(struct wg_softc *wg, struct ifdrv *ifd)
4896{
4897	struct ifnet *ifp = &wg->wg_if;
4898	int error;
4899
4900	if (ifp->if_flags & IFF_UP)
4901		return EBUSY;
4902
4903	if (ifd->ifd_cmd == IFLINKSTR_UNSET) {
4904		/* XXX do nothing */
4905		return 0;
4906	} else if (ifd->ifd_cmd != 0) {
4907		return EINVAL;
4908	} else if (wg->wg_user != NULL) {
4909		return EBUSY;
4910	}
4911
4912	/* Assume \0 included */
4913	if (ifd->ifd_len > IFNAMSIZ) {
4914		return E2BIG;
4915	} else if (ifd->ifd_len < 1) {
4916		return EINVAL;
4917	}
4918
4919	char tun_name[IFNAMSIZ];
4920	error = copyinstr(ifd->ifd_data, tun_name, ifd->ifd_len, NULL);
4921	if (error != 0)
4922		return error;
4923
4924	if (strncmp(tun_name, "tun", 3) != 0)
4925		return EINVAL;
4926
4927	error = rumpuser_wg_create(tun_name, wg, &wg->wg_user);
4928
4929	return error;
4930}
4931
4932static int
4933wg_send_user(struct wg_peer *wgp, struct mbuf *m)
4934{
4935	int error;
4936	struct psref psref;
4937	struct wg_sockaddr *wgsa;
4938	struct wg_softc *wg = wgp->wgp_sc;
4939	struct iovec iov[1];
4940
4941	wgsa = wg_get_endpoint_sa(wgp, &psref);
4942
4943	iov[0].iov_base = mtod(m, void *);
4944	iov[0].iov_len = m->m_len;
4945
4946	/* Send messages to a peer via an ordinary socket. */
4947	error = rumpuser_wg_send_peer(wg->wg_user, wgsatosa(wgsa), iov, 1);
4948
4949	wg_put_sa(wgp, wgsa, &psref);
4950
4951	m_freem(m);
4952
4953	return error;
4954}
4955
4956static void
4957wg_input_user(struct ifnet *ifp, struct mbuf *m, const int af)
4958{
4959	struct wg_softc *wg = ifp->if_softc;
4960	struct iovec iov[2];
4961	struct sockaddr_storage ss;
4962
4963	KASSERT(af == AF_INET || af == AF_INET6);
4964
4965	WG_TRACE("");
4966
4967	if (af == AF_INET) {
4968		struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
4969		struct ip *ip;
4970
4971		KASSERT(m->m_len >= sizeof(struct ip));
4972		ip = mtod(m, struct ip *);
4973		sockaddr_in_init(sin, &ip->ip_dst, 0);
4974	} else {
4975		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
4976		struct ip6_hdr *ip6;
4977
4978		KASSERT(m->m_len >= sizeof(struct ip6_hdr));
4979		ip6 = mtod(m, struct ip6_hdr *);
4980		sockaddr_in6_init(sin6, &ip6->ip6_dst, 0, 0, 0);
4981	}
4982
4983	iov[0].iov_base = &ss;
4984	iov[0].iov_len = ss.ss_len;
4985	iov[1].iov_base = mtod(m, void *);
4986	iov[1].iov_len = m->m_len;
4987
4988	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
4989
4990	/* Send decrypted packets to users via a tun. */
4991	rumpuser_wg_send_user(wg->wg_user, iov, 2);
4992
4993	m_freem(m);
4994}
4995
4996static int
4997wg_bind_port_user(struct wg_softc *wg, const uint16_t port)
4998{
4999	int error;
5000	uint16_t old_port = wg->wg_listen_port;
5001
5002	if (port != 0 && old_port == port)
5003		return 0;
5004
5005	error = rumpuser_wg_sock_bind(wg->wg_user, port);
5006	if (error == 0)
5007		wg->wg_listen_port = port;
5008	return error;
5009}
5010
5011/*
5012 * Receive user packets.
5013 */
5014void
5015rumpkern_wg_recv_user(struct wg_softc *wg, struct iovec *iov, size_t iovlen)
5016{
5017	struct ifnet *ifp = &wg->wg_if;
5018	struct mbuf *m;
5019	const struct sockaddr *dst;
5020
5021	WG_TRACE("");
5022
5023	dst = iov[0].iov_base;
5024
5025	m = m_gethdr(M_DONTWAIT, MT_DATA);
5026	if (m == NULL)
5027		return;
5028	m->m_len = m->m_pkthdr.len = 0;
5029	m_copyback(m, 0, iov[1].iov_len, iov[1].iov_base);
5030
5031	WG_DLOG("iov_len=%lu\n", iov[1].iov_len);
5032	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
5033
5034	(void)wg_output(ifp, m, dst, NULL);
5035}
5036
5037/*
5038 * Receive packets from a peer.
5039 */
5040void
5041rumpkern_wg_recv_peer(struct wg_softc *wg, struct iovec *iov, size_t iovlen)
5042{
5043	struct mbuf *m;
5044	const struct sockaddr *src;
5045	int bound;
5046
5047	WG_TRACE("");
5048
5049	src = iov[0].iov_base;
5050
5051	m = m_gethdr(M_DONTWAIT, MT_DATA);
5052	if (m == NULL)
5053		return;
5054	m->m_len = m->m_pkthdr.len = 0;
5055	m_copyback(m, 0, iov[1].iov_len, iov[1].iov_base);
5056
5057	WG_DLOG("iov_len=%lu\n", iov[1].iov_len);
5058	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
5059
5060	bound = curlwp_bind();
5061	wg_handle_packet(wg, m, src);
5062	curlwp_bindx(bound);
5063}
5064#endif /* WG_RUMPKERNEL */
5065
5066/*
5067 * Module infrastructure
5068 */
5069#include "if_module.h"
5070
5071IF_MODULE(MODULE_CLASS_DRIVER, wg, "sodium,blake2s")
5072