1/*
2 * Copyright (c) 2012-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <string.h>
30#include <sys/types.h>
31#include <sys/syslog.h>
32#include <sys/queue.h>
33#include <sys/malloc.h>
34#include <sys/socket.h>
35#include <sys/kpi_mbuf.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/protosw.h>
39#include <sys/socketvar.h>
40#include <sys/kernel.h>
41#include <sys/systm.h>
42#include <sys/kern_control.h>
43#include <sys/ubc.h>
44#include <sys/codesign.h>
45#include <libkern/tree.h>
46#include <kern/locks.h>
47#include <kern/debug.h>
48#include <net/if_var.h>
49#include <net/route.h>
50#include <net/flowhash.h>
51#include <net/ntstat.h>
52#include <netinet/in.h>
53#include <netinet/in_var.h>
54#include <netinet/tcp.h>
55#include <netinet/tcp_var.h>
56#include <netinet/tcp_fsm.h>
57#include <netinet/flow_divert.h>
58#include <netinet/flow_divert_proto.h>
59#if INET6
60#include <netinet6/ip6protosw.h>
61#endif	/* INET6 */
62#include <dev/random/randomdev.h>
63#include <libkern/crypto/sha1.h>
64#include <libkern/crypto/crypto_internal.h>
65
66#define FLOW_DIVERT_CONNECT_STARTED		0x00000001
67#define FLOW_DIVERT_READ_CLOSED			0x00000002
68#define FLOW_DIVERT_WRITE_CLOSED		0x00000004
69#define FLOW_DIVERT_TUNNEL_RD_CLOSED	0x00000008
70#define FLOW_DIVERT_TUNNEL_WR_CLOSED	0x00000010
71#define FLOW_DIVERT_TRANSFERRED			0x00000020
72
73#define FDLOG(level, pcb, format, ...) do {											\
74	if (level <= (pcb)->log_level) {												\
75		log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s (%u): " format "\n", __FUNCTION__, (pcb)->hash, __VA_ARGS__); 	\
76	}																				\
77} while (0)
78
79#define FDLOG0(level, pcb, msg) do {												\
80	if (level <= (pcb)->log_level) {												\
81		log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s (%u): %s\n", __FUNCTION__, (pcb)->hash, msg);				\
82	}																				\
83} while (0)
84
85#define FDRETAIN(pcb)			if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
86#define FDRELEASE(pcb)														\
87	do {																	\
88		if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {	\
89			flow_divert_pcb_destroy(pcb);									\
90		}																	\
91	} while (0)
92
93#define FDLOCK(pcb)						lck_mtx_lock(&(pcb)->mtx)
94#define FDUNLOCK(pcb)					lck_mtx_unlock(&(pcb)->mtx)
95
96#define FD_CTL_SENDBUFF_SIZE			(2 * FLOW_DIVERT_CHUNK_SIZE)
97#define FD_CTL_RCVBUFF_SIZE				(128 * 1024)
98
99#define GROUP_BIT_CTL_ENQUEUE_BLOCKED	0
100
101#define GROUP_COUNT_MAX					32
102#define FLOW_DIVERT_MAX_NAME_SIZE		4096
103#define FLOW_DIVERT_MAX_KEY_SIZE		1024
104
105#define DNS_SERVICE_GROUP_UNIT			(GROUP_COUNT_MAX + 1)
106
107struct flow_divert_trie_node
108{
109	uint16_t start;
110	uint16_t length;
111	uint16_t child_map;
112	uint32_t group_unit;
113};
114
115struct flow_divert_trie
116{
117	struct flow_divert_trie_node *nodes;
118	uint16_t *child_maps;
119	uint8_t *bytes;
120	void *memory;
121	size_t nodes_count;
122	size_t child_maps_count;
123	size_t bytes_count;
124	size_t nodes_free_next;
125	size_t child_maps_free_next;
126	size_t bytes_free_next;
127	uint16_t root;
128};
129
130#define CHILD_MAP_SIZE			256
131#define NULL_TRIE_IDX			0xffff
132#define TRIE_NODE(t, i)			((t)->nodes[(i)])
133#define TRIE_CHILD(t, i, b)		(((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
134#define TRIE_BYTE(t, i)			((t)->bytes[(i)])
135
136static struct flow_divert_pcb		nil_pcb;
137
138decl_lck_rw_data(static, g_flow_divert_group_lck);
139static struct flow_divert_group		**g_flow_divert_groups			= NULL;
140static uint32_t						g_active_group_count			= 0;
141static struct flow_divert_trie		g_signing_id_trie;
142
143static	lck_grp_attr_t				*flow_divert_grp_attr			= NULL;
144static	lck_attr_t					*flow_divert_mtx_attr			= NULL;
145static	lck_grp_t					*flow_divert_mtx_grp			= NULL;
146static	errno_t						g_init_result					= 0;
147
148static	kern_ctl_ref				g_flow_divert_kctl_ref			= NULL;
149
150static struct protosw				g_flow_divert_in_protosw;
151static struct pr_usrreqs			g_flow_divert_in_usrreqs;
152#if INET6
153static struct ip6protosw			g_flow_divert_in6_protosw;
154static struct pr_usrreqs			g_flow_divert_in6_usrreqs;
155#endif	/* INET6 */
156
157static struct protosw				*g_tcp_protosw					= NULL;
158static struct ip6protosw			*g_tcp6_protosw					= NULL;
159
160static inline int
161flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
162{
163	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
164}
165
166RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
167RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
168
169static const char *
170flow_divert_packet_type2str(uint8_t packet_type)
171{
172	switch (packet_type) {
173		case FLOW_DIVERT_PKT_CONNECT:
174			return "connect";
175		case FLOW_DIVERT_PKT_CONNECT_RESULT:
176			return "connect result";
177		case FLOW_DIVERT_PKT_DATA:
178			return "data";
179		case FLOW_DIVERT_PKT_CLOSE:
180			return "close";
181		case FLOW_DIVERT_PKT_READ_NOTIFY:
182			return "read notification";
183		case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
184			return "properties update";
185		case FLOW_DIVERT_PKT_APP_MAP_UPDATE:
186			return "app map update";
187		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
188			return "app map create";
189		default:
190			return "unknown";
191	}
192}
193
194static struct flow_divert_pcb *
195flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
196{
197	struct flow_divert_pcb	key_item;
198	struct flow_divert_pcb	*fd_cb		= NULL;
199
200	key_item.hash = hash;
201
202	lck_rw_lock_shared(&group->lck);
203	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
204	FDRETAIN(fd_cb);
205	lck_rw_done(&group->lck);
206
207	return fd_cb;
208}
209
210static errno_t
211flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
212{
213	int							error						= 0;
214	struct						flow_divert_pcb	*exist		= NULL;
215	struct flow_divert_group	*group;
216	static uint32_t				g_nextkey					= 1;
217	static uint32_t				g_hash_seed					= 0;
218	errno_t						result						= 0;
219	int							try_count					= 0;
220
221	if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
222		return EINVAL;
223	}
224
225	socket_unlock(fd_cb->so, 0);
226	lck_rw_lock_shared(&g_flow_divert_group_lck);
227
228	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
229		FDLOG0(LOG_ERR, &nil_pcb, "No active groups, flow divert cannot be used for this socket");
230		error = ENETUNREACH;
231		goto done;
232	}
233
234	group = g_flow_divert_groups[ctl_unit];
235	if (group == NULL) {
236		FDLOG(LOG_ERR, &nil_pcb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
237		error = ENETUNREACH;
238		goto done;
239	}
240
241	socket_lock(fd_cb->so, 0);
242
243	do {
244		uint32_t	key[2];
245		uint32_t	idx;
246
247		key[0] = g_nextkey++;
248		key[1] = RandomULong();
249
250		if (g_hash_seed == 0) {
251			g_hash_seed = RandomULong();
252		}
253
254		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
255
256		for (idx = 1; idx < GROUP_COUNT_MAX; idx++) {
257			struct flow_divert_group *curr_group = g_flow_divert_groups[idx];
258			if (curr_group != NULL && curr_group != group) {
259				lck_rw_lock_shared(&curr_group->lck);
260				exist = RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb);
261				lck_rw_done(&curr_group->lck);
262				if (exist != NULL) {
263					break;
264				}
265			}
266		}
267
268		if (exist == NULL) {
269			lck_rw_lock_exclusive(&group->lck);
270			exist = RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb);
271			lck_rw_done(&group->lck);
272		}
273	} while (exist != NULL && try_count++ < 3);
274
275	if (exist == NULL) {
276		fd_cb->group = group;
277		FDRETAIN(fd_cb);		/* The group now has a reference */
278	} else {
279		fd_cb->hash = 0;
280		result = EEXIST;
281	}
282
283	socket_unlock(fd_cb->so, 0);
284
285done:
286	lck_rw_done(&g_flow_divert_group_lck);
287	socket_lock(fd_cb->so, 0);
288
289	return result;
290}
291
292static struct flow_divert_pcb *
293flow_divert_pcb_create(socket_t so)
294{
295	struct flow_divert_pcb	*new_pcb	= NULL;
296
297	MALLOC_ZONE(new_pcb, struct flow_divert_pcb *, sizeof(*new_pcb), M_FLOW_DIVERT_PCB, M_WAITOK);
298	if (new_pcb == NULL) {
299		FDLOG0(LOG_ERR, &nil_pcb, "failed to allocate a pcb");
300		return NULL;
301	}
302
303	memset(new_pcb, 0, sizeof(*new_pcb));
304
305	lck_mtx_init(&new_pcb->mtx, flow_divert_mtx_grp, flow_divert_mtx_attr);
306	new_pcb->so = so;
307	new_pcb->log_level = nil_pcb.log_level;
308
309	FDRETAIN(new_pcb);	/* Represents the socket's reference */
310
311	return new_pcb;
312}
313
314static void
315flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
316{
317	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, app rx %u, tunnel tx %u, tunnel rx %u",
318			fd_cb->bytes_written_by_app, fd_cb->bytes_read_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
319
320	if (fd_cb->local_address != NULL) {
321		FREE(fd_cb->local_address, M_SONAME);
322	}
323	if (fd_cb->remote_address != NULL) {
324		FREE(fd_cb->remote_address, M_SONAME);
325	}
326	if (fd_cb->connect_token != NULL) {
327		mbuf_freem(fd_cb->connect_token);
328	}
329	FREE_ZONE(fd_cb, sizeof(*fd_cb), M_FLOW_DIVERT_PCB);
330}
331
332static void
333flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
334{
335	if (fd_cb->group != NULL) {
336		struct flow_divert_group *group = fd_cb->group;
337		lck_rw_lock_exclusive(&group->lck);
338		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
339		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
340		fd_cb->group = NULL;
341		FDRELEASE(fd_cb);				/* Release the group's reference */
342		lck_rw_done(&group->lck);
343	}
344}
345
346static int
347flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
348{
349	struct flow_divert_packet_header	hdr;
350	int					error		= 0;
351
352	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
353	if (error) {
354		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
355		return error;
356	}
357
358	hdr.packet_type = packet_type;
359	hdr.conn_id = htonl(fd_cb->hash);
360
361	/* Lay down the header */
362	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
363	if (error) {
364		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
365		mbuf_freem(*packet);
366		*packet = NULL;
367		return error;
368	}
369
370	return 0;
371}
372
373static int
374flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const void *value)
375{
376	size_t	net_length	= htonl(length);
377	int		error		= 0;
378
379	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
380	if (error) {
381		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
382		return error;
383	}
384
385	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
386	if (error) {
387		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%lu)", length);
388		return error;
389	}
390
391	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
392	if (error) {
393		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
394		return error;
395	}
396
397	return error;
398}
399
400static int
401flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
402{
403	size_t	cursor			= offset;
404	int		error			= 0;
405	size_t	curr_length;
406	uint8_t	curr_type;
407
408	*err = 0;
409
410	do {
411		if (!next) {
412			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
413			if (error) {
414				*err = ENOENT;
415				return -1;
416			}
417		} else {
418			next = 0;
419			curr_type = FLOW_DIVERT_TLV_NIL;
420		}
421
422		if (curr_type != type) {
423			cursor += sizeof(curr_type);
424			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
425			if (error) {
426				*err = error;
427				return -1;
428			}
429
430			cursor += (sizeof(curr_length) + ntohl(curr_length));
431		}
432	} while (curr_type != type);
433
434	return cursor;
435}
436
437static int
438flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, size_t *val_size)
439{
440	int		error		= 0;
441	size_t	length;
442	int		tlv_offset;
443
444	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
445	if (tlv_offset < 0) {
446		return error;
447	}
448
449	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
450	if (error) {
451		return error;
452	}
453
454	length = ntohl(length);
455
456	if (val_size != NULL) {
457		*val_size = length;
458	}
459
460	if (buff != NULL && buff_len > 0) {
461		size_t to_copy = (length < buff_len) ? length : buff_len;
462		error = mbuf_copydata(packet, tlv_offset + sizeof(type) + sizeof(length), to_copy, buff);
463		if (error) {
464			return error;
465		}
466	}
467
468	return 0;
469}
470
471static int
472flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
473{
474	mbuf_t	curr_mbuf	= packet;
475
476	if (g_crypto_funcs == NULL || group->token_key == NULL) {
477		return ENOPROTOOPT;
478	}
479
480	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
481	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
482
483	while (curr_mbuf != NULL) {
484		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
485		curr_mbuf = mbuf_next(curr_mbuf);
486	}
487
488	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
489
490	return 0;
491}
492
493static int
494flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
495{
496	int							error = 0;
497	struct flow_divert_group	*group = NULL;
498	int							hmac_offset;
499	uint8_t						packet_hmac[SHA_DIGEST_LENGTH];
500	uint8_t						computed_hmac[SHA_DIGEST_LENGTH];
501	mbuf_t						tail;
502
503	lck_rw_lock_shared(&g_flow_divert_group_lck);
504
505	if (g_flow_divert_groups != NULL && g_active_group_count > 0) {
506		group = g_flow_divert_groups[ctl_unit];
507	}
508
509	if (group == NULL) {
510		lck_rw_done(&g_flow_divert_group_lck);
511		return ENOPROTOOPT;
512	}
513
514	lck_rw_lock_shared(&group->lck);
515
516	if (group->token_key == NULL) {
517		error = ENOPROTOOPT;
518		goto done;
519	}
520
521	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
522	if (hmac_offset < 0) {
523		goto done;
524	}
525
526	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
527	if (error) {
528		goto done;
529	}
530
531	/* Chop off the HMAC TLV */
532	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
533	if (error) {
534		goto done;
535	}
536
537	mbuf_free(tail);
538
539	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
540	if (error) {
541		goto done;
542	}
543
544	if (memcmp(packet_hmac, computed_hmac, sizeof(packet_hmac))) {
545		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
546		error = EINVAL;
547		goto done;
548	}
549
550done:
551	lck_rw_done(&group->lck);
552	lck_rw_done(&g_flow_divert_group_lck);
553	return error;
554}
555
556static void
557flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, int data_len, Boolean send)
558{
559	struct inpcb *inp = NULL;
560	struct ifnet *ifp = NULL;
561	Boolean cell = FALSE;
562	Boolean wifi = FALSE;
563
564	inp = sotoinpcb(fd_cb->so);
565	if (inp == NULL) {
566		return;
567	}
568
569	ifp = inp->inp_last_outifp;
570	if (ifp != NULL) {
571		cell = IFNET_IS_CELLULAR(ifp);
572		wifi = (!cell && IFNET_IS_WIFI(ifp));
573	}
574
575	if (send) {
576		INP_ADD_STAT(inp, cell, wifi, txpackets, 1);
577		INP_ADD_STAT(inp, cell, wifi, txbytes, data_len);
578	} else {
579		INP_ADD_STAT(inp, cell, wifi, rxpackets, 1);
580		INP_ADD_STAT(inp, cell, wifi, rxbytes, data_len);
581	}
582}
583
584static errno_t
585flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
586{
587	struct inpcb *inp = NULL;
588	struct ifnet *ifp = NULL;
589
590	inp = sotoinpcb(fd_cb->so);
591	if ((inp != NULL) && (inp->inp_flags & INP_NO_IFT_CELLULAR)) {
592		ifp = inp->inp_last_outifp;
593		if (ifp != NULL) {
594			if (IFNET_IS_CELLULAR(ifp)) {
595				return EHOSTUNREACH;
596			}
597		}
598	}
599
600	return 0;
601}
602
603static void
604flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, Boolean tunnel)
605{
606	if (how != SHUT_RD) {
607		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
608		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
609			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
610			/* If the tunnel is not accepting writes any more, then flush the send buffer */
611			sbflush(&fd_cb->so->so_snd);
612		}
613	}
614	if (how != SHUT_WR) {
615		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
616		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
617			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
618		}
619	}
620}
621
622static uint16_t
623trie_node_alloc(struct flow_divert_trie *trie)
624{
625	if (trie->nodes_free_next < trie->nodes_count) {
626		uint16_t node_idx = trie->nodes_free_next++;
627		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
628		return node_idx;
629	} else {
630		return NULL_TRIE_IDX;
631	}
632}
633
634static uint16_t
635trie_child_map_alloc(struct flow_divert_trie *trie)
636{
637	if (trie->child_maps_free_next < trie->child_maps_count) {
638		return trie->child_maps_free_next++;
639	} else {
640		return NULL_TRIE_IDX;
641	}
642}
643
644static uint16_t
645trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
646{
647	uint16_t start = trie->bytes_free_next;
648	if (start + bytes_size <= trie->bytes_count) {
649		if (start != bytes_idx) {
650			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
651		}
652		trie->bytes_free_next += bytes_size;
653		return start;
654	} else {
655		return NULL_TRIE_IDX;
656	}
657}
658
659static uint16_t
660flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
661{
662	uint16_t current = trie->root;
663	uint16_t child = trie->root;
664	uint16_t string_end = string_start + string_len;
665	uint16_t string_idx = string_start;
666	uint16_t string_remainder = string_len;
667
668	while (child != NULL_TRIE_IDX) {
669		uint16_t parent = current;
670		uint16_t node_idx;
671		uint16_t current_end;
672
673		current = child;
674		child = NULL_TRIE_IDX;
675
676		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
677
678		for (node_idx = TRIE_NODE(trie, current).start;
679		     node_idx < current_end &&
680		     string_idx < string_end &&
681		     TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
682		     node_idx++, string_idx++);
683
684		string_remainder = string_end - string_idx;
685
686		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
687			/*
688			 * We did not reach the end of the current node's string.
689			 * We need to split the current node into two:
690			 *   1. A new node that contains the prefix of the node that matches
691			 *      the prefix of the string being inserted.
692			 *   2. The current node modified to point to the remainder
693			 *      of the current node's string.
694			 */
695			uint16_t prefix = trie_node_alloc(trie);
696			if (prefix == NULL_TRIE_IDX) {
697				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
698				return NULL_TRIE_IDX;
699			}
700
701			/*
702			 * Prefix points to the portion of the current nodes's string that has matched
703			 * the input string thus far.
704			 */
705			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
706			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
707
708			/*
709			 * Prefix has the current node as the child corresponding to the first byte
710			 * after the split.
711			 */
712			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
713			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
714				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
715				return NULL_TRIE_IDX;
716			}
717			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
718
719			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
720			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
721
722			/* Current node is adjusted to point to the remainder */
723			TRIE_NODE(trie, current).start = node_idx;
724			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
725
726			/* We want to insert the new leaf (if any) as a child of the prefix */
727			current = prefix;
728		}
729
730		if (string_remainder > 0) {
731			/*
732			 * We still have bytes in the string that have not been matched yet.
733			 * If the current node has children, iterate to the child corresponding
734			 * to the next byte in the string.
735			 */
736			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
737				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
738			}
739		}
740	} /* while (child != NULL_TRIE_IDX) */
741
742	if (string_remainder > 0) {
743		/* Add a new leaf containing the remainder of the string */
744		uint16_t leaf = trie_node_alloc(trie);
745		if (leaf == NULL_TRIE_IDX) {
746			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
747			return NULL_TRIE_IDX;
748		}
749
750		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
751		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
752			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
753			return NULL_TRIE_IDX;
754		}
755		TRIE_NODE(trie, leaf).length = string_remainder;
756
757		/* Set the new leaf as the child of the current node */
758		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
759			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
760			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
761				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
762				return NULL_TRIE_IDX;
763			}
764		}
765		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
766		current = leaf;
767	} /* else duplicate or this string is a prefix of one of the existing strings */
768
769	return current;
770}
771
772static uint16_t
773flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
774{
775	uint16_t current = trie->root;
776	uint16_t string_idx = 0;
777
778	while (current != NULL_TRIE_IDX) {
779		uint16_t next = NULL_TRIE_IDX;
780		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
781		uint16_t node_idx;
782
783		for (node_idx = TRIE_NODE(trie, current).start;
784		     node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
785		     node_idx++, string_idx++);
786
787		if (node_idx == node_end) {
788			if (string_bytes[string_idx] == '\0') {
789				return current; /* Got an exact match */
790			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
791				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
792			}
793		}
794		current = next;
795	}
796
797	return NULL_TRIE_IDX;
798}
799
800static int
801flow_divert_get_src_proc(struct socket *so, proc_t *proc, boolean_t match_delegate)
802{
803	int release = 0;
804
805	if (!match_delegate &&
806	    (so->so_flags & SOF_DELEGATED) &&
807	    (*proc == PROC_NULL || (*proc)->p_pid != so->e_pid))
808	{
809		*proc = proc_find(so->e_pid);
810		release = 1;
811	} else if (*proc == PROC_NULL) {
812		*proc = current_proc();
813	}
814
815	if (*proc != PROC_NULL) {
816		if ((*proc)->p_pid == 0) {
817			if (release) {
818				proc_rele(*proc);
819			}
820			release = 0;
821			*proc = PROC_NULL;
822		}
823	}
824
825	return release;
826}
827
828static int
829flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean enqueue)
830{
831	int		error;
832
833	if (fd_cb->group == NULL) {
834		fd_cb->so->so_error = ECONNABORTED;
835		soisdisconnected(fd_cb->so);
836		return ECONNABORTED;
837	}
838
839	lck_rw_lock_shared(&fd_cb->group->lck);
840
841	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
842		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
843	} else {
844		error = ENOBUFS;
845	}
846
847	if (error == ENOBUFS) {
848		if (enqueue) {
849			if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
850				lck_rw_lock_exclusive(&fd_cb->group->lck);
851			}
852			MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
853			error = 0;
854		}
855		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
856	}
857
858	lck_rw_done(&fd_cb->group->lck);
859
860	return error;
861}
862
863static int
864flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, proc_t p)
865{
866	mbuf_t			connect_packet	= NULL;
867	int				error			= 0;
868
869	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
870	if (error) {
871		goto done;
872	}
873
874	error = flow_divert_packet_append_tlv(connect_packet,
875	                                      FLOW_DIVERT_TLV_TRAFFIC_CLASS,
876	                                      sizeof(fd_cb->so->so_traffic_class),
877	                                      &fd_cb->so->so_traffic_class);
878	if (error) {
879		goto done;
880	}
881
882	if (fd_cb->so->so_flags & SOF_DELEGATED) {
883		error = flow_divert_packet_append_tlv(connect_packet,
884		                                      FLOW_DIVERT_TLV_PID,
885		                                      sizeof(fd_cb->so->e_pid),
886		                                      &fd_cb->so->e_pid);
887		if (error) {
888			goto done;
889		}
890
891		error = flow_divert_packet_append_tlv(connect_packet,
892		                                      FLOW_DIVERT_TLV_UUID,
893		                                      sizeof(fd_cb->so->e_uuid),
894		                                      &fd_cb->so->e_uuid);
895		if (error) {
896			goto done;
897		}
898	} else {
899		error = flow_divert_packet_append_tlv(connect_packet,
900		                                      FLOW_DIVERT_TLV_PID,
901		                                      sizeof(fd_cb->so->e_pid),
902		                                      &fd_cb->so->last_pid);
903		if (error) {
904			goto done;
905		}
906
907		error = flow_divert_packet_append_tlv(connect_packet,
908		                                      FLOW_DIVERT_TLV_UUID,
909		                                      sizeof(fd_cb->so->e_uuid),
910		                                      &fd_cb->so->last_uuid);
911		if (error) {
912			goto done;
913		}
914	}
915
916	if (fd_cb->connect_token != NULL) {
917		unsigned int token_len = m_length(fd_cb->connect_token);
918		mbuf_concatenate(connect_packet, fd_cb->connect_token);
919		mbuf_pkthdr_adjustlen(connect_packet, token_len);
920		fd_cb->connect_token = NULL;
921	} else {
922		uint32_t ctl_unit = htonl(fd_cb->control_group_unit);
923		int port;
924		int release_proc;
925
926		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
927		if (error) {
928			goto done;
929		}
930
931		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, to->sa_len, to);
932		if (error) {
933			goto done;
934		}
935
936		if (to->sa_family == AF_INET) {
937			port = ntohs((satosin(to))->sin_port);
938		}
939#if INET6
940   		else {
941			port = ntohs((satosin6(to))->sin6_port);
942		}
943#endif
944
945		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
946		if (error) {
947			goto done;
948		}
949
950		release_proc = flow_divert_get_src_proc(fd_cb->so, &p, FALSE);
951		if (p != PROC_NULL) {
952			proc_lock(p);
953			if (p->p_csflags & CS_VALID) {
954				const char *signing_id = cs_identity_get(p);
955				if (signing_id != NULL) {
956					error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id);
957				}
958
959				if (error == 0) {
960					unsigned char cdhash[SHA1_RESULTLEN];
961					error = proc_getcdhash(p, cdhash);
962					if (error == 0) {
963						error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash);
964					}
965				}
966			}
967			proc_unlock(p);
968
969			if (release_proc) {
970				proc_rele(p);
971			}
972		}
973	}
974
975	error = flow_divert_send_packet(fd_cb, connect_packet, TRUE);
976	if (error) {
977		goto done;
978	}
979
980done:
981	if (error && connect_packet != NULL) {
982		mbuf_free(connect_packet);
983	}
984
985	return error;
986}
987
988static int
989flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
990{
991	int		error	 		= 0;
992	mbuf_t	packet			= NULL;
993	int		rbuff_space		= 0;
994
995	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
996	if (error) {
997		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
998		goto done;
999	}
1000
1001	rbuff_space = sbspace(&fd_cb->so->so_rcv);
1002	if (rbuff_space < 0) {
1003		rbuff_space = 0;
1004	}
1005	rbuff_space = htonl(rbuff_space);
1006	error = flow_divert_packet_append_tlv(packet,
1007	                                      FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1008	                                      sizeof(rbuff_space),
1009	                                      &rbuff_space);
1010	if (error) {
1011		goto done;
1012	}
1013
1014	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1015	if (error) {
1016		goto done;
1017	}
1018
1019done:
1020	if (error && packet != NULL) {
1021		mbuf_free(packet);
1022	}
1023
1024	return error;
1025}
1026
1027static int
1028flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1029{
1030	int		error	= 0;
1031	mbuf_t	packet	= NULL;
1032	uint32_t	zero	= 0;
1033
1034	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1035	if (error) {
1036		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1037		goto done;
1038	}
1039
1040	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1041	if (error) {
1042		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1043		goto done;
1044	}
1045
1046	how = htonl(how);
1047	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1048	if (error) {
1049		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1050		goto done;
1051	}
1052
1053	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1054	if (error) {
1055		goto done;
1056	}
1057
1058done:
1059	if (error && packet != NULL) {
1060		mbuf_free(packet);
1061	}
1062
1063	return error;
1064}
1065
1066static int
1067flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1068{
1069	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1070			(FLOW_DIVERT_TUNNEL_RD_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED))
1071	{
1072		return SHUT_RDWR;
1073	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1074		return SHUT_RD;
1075	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1076		return SHUT_WR;
1077	}
1078
1079	return -1;
1080}
1081
1082/*
1083 * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1084 * writes. Returns FALSE otherwise.
1085 */
1086static void
1087flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1088{
1089	int		how		= -1;
1090
1091	/* Do not send any close messages if there is still data in the send buffer */
1092	if (fd_cb->so->so_snd.sb_cc == 0) {
1093		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED|FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1094			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1095			how = SHUT_RD;
1096		}
1097		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1098			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1099			if (how == SHUT_RD) {
1100				how = SHUT_RDWR;
1101			} else {
1102				how = SHUT_WR;
1103			}
1104		}
1105	}
1106
1107	if (how != -1) {
1108		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1109		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1110			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1111			if (how != SHUT_RD) {
1112				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1113			}
1114			if (how != SHUT_WR) {
1115				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1116			}
1117		}
1118	}
1119
1120	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1121		soisdisconnected(fd_cb->so);
1122	}
1123}
1124
1125static errno_t
1126flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, Boolean force)
1127{
1128	mbuf_t	packet;
1129	mbuf_t	last;
1130	int		error	= 0;
1131
1132	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1133	if (error) {
1134		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1135		return error;
1136	}
1137
1138	last = m_last(packet);
1139	mbuf_setnext(last, data);
1140	mbuf_pkthdr_adjustlen(packet, data_len);
1141
1142	error = flow_divert_send_packet(fd_cb, packet, force);
1143
1144	if (error) {
1145		mbuf_setnext(last, NULL);
1146		mbuf_free(packet);
1147	} else {
1148		fd_cb->bytes_sent += data_len;
1149		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1150	}
1151
1152	return error;
1153}
1154
1155static void
1156flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1157{
1158	size_t	to_send;
1159	size_t	sent	= 0;
1160	int		error	= 0;
1161	mbuf_t	buffer;
1162
1163	to_send = fd_cb->so->so_snd.sb_cc;
1164	buffer = fd_cb->so->so_snd.sb_mb;
1165
1166	if (buffer == NULL && to_send > 0) {
1167		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1168		return;
1169	}
1170
1171	/* Ignore the send window if force is enabled */
1172	if (!force && (to_send > fd_cb->send_window)) {
1173		to_send = fd_cb->send_window;
1174	}
1175
1176	while (sent < to_send) {
1177		mbuf_t	data;
1178		size_t	data_len;
1179
1180		data_len = to_send - sent;
1181		if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1182			data_len = FLOW_DIVERT_CHUNK_SIZE;
1183		}
1184
1185		error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1186		if (error) {
1187			FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1188			break;
1189		}
1190
1191		error = flow_divert_send_data_packet(fd_cb, data, data_len, force);
1192		if (error) {
1193			mbuf_free(data);
1194			break;
1195		}
1196
1197		sent += data_len;
1198	}
1199
1200	if (sent > 0) {
1201		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1202		if (fd_cb->send_window >= sent) {
1203			fd_cb->send_window -= sent;
1204		} else {
1205			fd_cb->send_window = 0;
1206		}
1207		sbdrop(&fd_cb->so->so_snd, sent);
1208		sowwakeup(fd_cb->so);
1209	}
1210}
1211
1212static int
1213flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data)
1214{
1215	size_t	to_send		= mbuf_pkthdr_len(data);
1216	size_t	sent		= 0;
1217	int		error		= 0;
1218	mbuf_t	remaining_data	= data;
1219	mbuf_t	pkt_data	= NULL;
1220
1221	if (to_send > fd_cb->send_window) {
1222		to_send = fd_cb->send_window;
1223	}
1224
1225	if (fd_cb->so->so_snd.sb_cc > 0) {
1226		to_send = 0;	/* If the send buffer is non-empty, then we can't send anything */
1227	}
1228
1229	while (sent < to_send) {
1230		size_t	pkt_data_len;
1231
1232		pkt_data = remaining_data;
1233
1234		if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1235			pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1236			error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1237			if (error) {
1238				FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1239				pkt_data = NULL;
1240				break;
1241			}
1242		} else {
1243			pkt_data_len = to_send - sent;
1244			remaining_data = NULL;
1245		}
1246
1247		error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, FALSE);
1248
1249		if (error) {
1250			break;
1251		}
1252
1253		pkt_data = NULL;
1254		sent += pkt_data_len;
1255	}
1256
1257	fd_cb->send_window -= sent;
1258
1259	error = 0;
1260
1261	if (pkt_data != NULL) {
1262		if (sbspace(&fd_cb->so->so_snd) > 0) {
1263			if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1264				FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1265						fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1266			}
1267		} else {
1268			error = ENOBUFS;
1269		}
1270	}
1271
1272	if (remaining_data != NULL) {
1273		if (sbspace(&fd_cb->so->so_snd) > 0) {
1274			if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1275				FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1276						fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1277			}
1278		} else {
1279			error = ENOBUFS;
1280		}
1281	}
1282
1283	return error;
1284}
1285
1286static int
1287flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb, uint32_t read_count)
1288{
1289	int		error		= 0;
1290	mbuf_t	packet		= NULL;
1291	uint32_t	net_read_count	= htonl(read_count);
1292
1293	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1294	if (error) {
1295		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1296		goto done;
1297	}
1298
1299	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_READ_COUNT, sizeof(net_read_count), &net_read_count);
1300	if (error) {
1301		FDLOG(LOG_ERR, fd_cb, "failed to add the read count: %d", error);
1302		goto done;
1303	}
1304
1305	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1306	if (error) {
1307		goto done;
1308	}
1309
1310done:
1311	if (error && packet != NULL) {
1312		mbuf_free(packet);
1313	}
1314
1315	return error;
1316}
1317
1318static int
1319flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1320{
1321	int		error		= 0;
1322	mbuf_t	packet		= NULL;
1323
1324	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
1325	if (error) {
1326		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
1327		goto done;
1328	}
1329
1330	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
1331	if (error) {
1332		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
1333		goto done;
1334	}
1335
1336	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1337	if (error) {
1338		goto done;
1339	}
1340
1341done:
1342	if (error && packet != NULL) {
1343		mbuf_free(packet);
1344	}
1345
1346	return error;
1347}
1348
1349static void
1350flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
1351{
1352	uint32_t					connect_error;
1353	uint32_t					ctl_unit			= 0;
1354	int							error				= 0;
1355	struct flow_divert_group 	*grp				= NULL;
1356	struct sockaddr_storage		local_address;
1357	int							out_if_index		= 0;
1358	struct sockaddr_storage		remote_address;
1359	uint32_t					send_window;
1360
1361	memset(&local_address, 0, sizeof(local_address));
1362	memset(&remote_address, 0, sizeof(remote_address));
1363
1364	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
1365	if (error) {
1366		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
1367		return;
1368	}
1369
1370	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
1371
1372	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
1373	if (error) {
1374		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
1375		return;
1376	}
1377
1378	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
1379	if (error) {
1380		FDLOG(LOG_ERR, fd_cb, "failed to get the control unit: %d", error);
1381		return;
1382	}
1383
1384	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_address), &local_address, NULL);
1385	if (error) {
1386		FDLOG0(LOG_NOTICE, fd_cb, "No local address provided");
1387	}
1388
1389	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, NULL);
1390	if (error) {
1391		FDLOG0(LOG_NOTICE, fd_cb, "No remote address provided");
1392	}
1393
1394	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
1395	if (error) {
1396		FDLOG0(LOG_NOTICE, fd_cb, "No output if index provided");
1397	}
1398
1399	connect_error	= ntohl(connect_error);
1400	ctl_unit		= ntohl(ctl_unit);
1401
1402	lck_rw_lock_shared(&g_flow_divert_group_lck);
1403
1404	if (connect_error == 0) {
1405		if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
1406			FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
1407			error = EINVAL;
1408		} else if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
1409			FDLOG0(LOG_ERR, fd_cb, "No active groups, dropping connection");
1410			error = EINVAL;
1411		} else {
1412			grp = g_flow_divert_groups[ctl_unit];
1413			if (grp == NULL) {
1414				error = ECONNRESET;
1415			}
1416		}
1417	}
1418
1419	FDLOCK(fd_cb);
1420	if (fd_cb->so != NULL) {
1421		struct inpcb				*inp = NULL;
1422		struct ifnet				*ifp = NULL;
1423		struct flow_divert_group	*old_group;
1424
1425		socket_lock(fd_cb->so, 0);
1426
1427		if (!(fd_cb->so->so_state & SS_ISCONNECTING)) {
1428			goto done;
1429		}
1430
1431		inp = sotoinpcb(fd_cb->so);
1432
1433		if (connect_error || error) {
1434			goto set_socket_state;
1435		}
1436
1437		if (local_address.ss_family != 0) {
1438			if (local_address.ss_len > sizeof(local_address)) {
1439				local_address.ss_len = sizeof(local_address);
1440			}
1441			fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1);
1442		} else {
1443			error = EINVAL;
1444			goto set_socket_state;
1445		}
1446
1447		if (remote_address.ss_family != 0) {
1448			if (remote_address.ss_len > sizeof(remote_address)) {
1449				remote_address.ss_len = sizeof(remote_address);
1450			}
1451			fd_cb->remote_address = dup_sockaddr((struct sockaddr *)&remote_address, 1);
1452		} else {
1453			error = EINVAL;
1454			goto set_socket_state;
1455		}
1456
1457		ifnet_head_lock_shared();
1458		if (out_if_index > 0 && out_if_index <= if_index) {
1459			ifp = ifindex2ifnet[out_if_index];
1460		}
1461
1462		if (ifp != NULL) {
1463			inp->inp_last_outifp = ifp;
1464		} else {
1465			error = EINVAL;
1466		}
1467		ifnet_head_done();
1468
1469		if (error) {
1470			goto set_socket_state;
1471		}
1472
1473		if (fd_cb->group == NULL) {
1474			error = EINVAL;
1475			goto set_socket_state;
1476		}
1477
1478		old_group = fd_cb->group;
1479
1480		lck_rw_lock_exclusive(&old_group->lck);
1481		lck_rw_lock_exclusive(&grp->lck);
1482
1483		RB_REMOVE(fd_pcb_tree, &old_group->pcb_tree, fd_cb);
1484		if (RB_INSERT(fd_pcb_tree, &grp->pcb_tree, fd_cb) != NULL) {
1485			panic("group with unit %u already contains a connection with hash %u", grp->ctl_unit, fd_cb->hash);
1486		}
1487
1488		fd_cb->group = grp;
1489
1490		lck_rw_done(&grp->lck);
1491		lck_rw_done(&old_group->lck);
1492
1493		fd_cb->send_window = ntohl(send_window);
1494		flow_divert_send_buffered_data(fd_cb, FALSE);
1495
1496set_socket_state:
1497		if (!connect_error && !error) {
1498			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
1499			error = flow_divert_send_connect_result(fd_cb);
1500		}
1501
1502		if (connect_error || error) {
1503			if (!connect_error) {
1504				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
1505				fd_cb->so->so_error = error;
1506				flow_divert_send_close_if_needed(fd_cb);
1507			} else {
1508				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
1509				fd_cb->so->so_error = connect_error;
1510			}
1511			soisdisconnected(fd_cb->so);
1512		} else {
1513			soisconnected(fd_cb->so);
1514		}
1515
1516done:
1517		socket_unlock(fd_cb->so, 0);
1518	}
1519	FDUNLOCK(fd_cb);
1520
1521	lck_rw_done(&g_flow_divert_group_lck);
1522}
1523
1524static void
1525flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
1526{
1527	uint32_t	close_error;
1528	int			error			= 0;
1529	int			how;
1530
1531	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
1532	if (error) {
1533		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
1534		return;
1535	}
1536
1537	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
1538	if (error) {
1539		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
1540		return;
1541	}
1542
1543	how = ntohl(how);
1544
1545	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
1546
1547	FDLOCK(fd_cb);
1548	if (fd_cb->so != NULL) {
1549		socket_lock(fd_cb->so, 0);
1550
1551		fd_cb->so->so_error = ntohl(close_error);
1552
1553		flow_divert_update_closed_state(fd_cb, how, TRUE);
1554
1555		how = flow_divert_tunnel_how_closed(fd_cb);
1556		if (how == SHUT_RDWR) {
1557			soisdisconnected(fd_cb->so);
1558		} else if (how == SHUT_RD) {
1559			socantrcvmore(fd_cb->so);
1560		} else if (how == SHUT_WR) {
1561			socantsendmore(fd_cb->so);
1562		}
1563
1564		socket_unlock(fd_cb->so, 0);
1565	}
1566	FDUNLOCK(fd_cb);
1567}
1568
1569static void
1570flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
1571{
1572	int		error		= 0;
1573	mbuf_t	data		= NULL;
1574	size_t	data_size;
1575
1576	data_size = (mbuf_pkthdr_len(packet) - offset);
1577
1578	FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
1579
1580	error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
1581	if (error || data == NULL) {
1582		FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1583		return;
1584	}
1585
1586	FDLOCK(fd_cb);
1587	if (fd_cb->so != NULL) {
1588		socket_lock(fd_cb->so, 0);
1589		if (flow_divert_check_no_cellular(fd_cb)) {
1590			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
1591			flow_divert_send_close(fd_cb, SHUT_RDWR);
1592			soisdisconnected(fd_cb->so);
1593		} else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) {
1594			if (sbappendstream(&fd_cb->so->so_rcv, data)) {
1595				fd_cb->bytes_received += data_size;
1596				flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
1597				fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
1598				sorwakeup(fd_cb->so);
1599				data = NULL;
1600			} else {
1601				FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed");
1602			}
1603		}
1604		socket_unlock(fd_cb->so, 0);
1605	}
1606	FDUNLOCK(fd_cb);
1607
1608	if (data != NULL) {
1609		mbuf_free(data);
1610	}
1611}
1612
1613static void
1614flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
1615{
1616	uint32_t	read_count;
1617	int		error			= 0;
1618
1619	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
1620	if (error) {
1621		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
1622		return;
1623	}
1624
1625	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", read_count);
1626
1627	FDLOCK(fd_cb);
1628	if (fd_cb->so != NULL) {
1629		socket_lock(fd_cb->so, 0);
1630		fd_cb->send_window += ntohl(read_count);
1631		flow_divert_send_buffered_data(fd_cb, FALSE);
1632		socket_unlock(fd_cb->so, 0);
1633	}
1634	FDUNLOCK(fd_cb);
1635}
1636
1637static void
1638flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
1639{
1640	int error = 0;
1641	size_t key_size = 0;
1642	int log_level;
1643
1644	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
1645	if (error) {
1646		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
1647		return;
1648	}
1649
1650	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
1651		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %lu", key_size);
1652		return;
1653	}
1654
1655	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
1656	if (!error) {
1657		nil_pcb.log_level = log_level;
1658	}
1659
1660	lck_rw_lock_exclusive(&group->lck);
1661
1662	MALLOC(group->token_key, uint8_t *, key_size, M_TEMP, M_WAITOK);
1663	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
1664	if (error) {
1665		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
1666		FREE(group->token_key, M_TEMP);
1667		group->token_key = NULL;
1668		lck_rw_done(&group->lck);
1669		return;
1670	}
1671
1672	group->token_key_size = key_size;
1673
1674	lck_rw_done(&group->lck);
1675}
1676
1677static void
1678flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
1679{
1680	int							error				= 0;
1681	struct sockaddr_storage		local_address;
1682	int							out_if_index		= 0;
1683	struct sockaddr_storage		remote_address;
1684
1685	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
1686
1687	memset(&local_address, 0, sizeof(local_address));
1688	memset(&remote_address, 0, sizeof(remote_address));
1689
1690	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_address), &local_address, NULL);
1691	if (error) {
1692		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
1693	}
1694
1695	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, NULL);
1696	if (error) {
1697		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
1698	}
1699
1700	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
1701	if (error) {
1702		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
1703	}
1704
1705	FDLOCK(fd_cb);
1706	if (fd_cb->so != NULL) {
1707		struct inpcb				*inp = NULL;
1708		struct ifnet				*ifp = NULL;
1709
1710		socket_lock(fd_cb->so, 0);
1711
1712		inp = sotoinpcb(fd_cb->so);
1713
1714		if (local_address.ss_family != 0) {
1715			if (local_address.ss_len > sizeof(local_address)) {
1716				local_address.ss_len = sizeof(local_address);
1717			}
1718			fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1);
1719		}
1720
1721		if (remote_address.ss_family != 0) {
1722			if (remote_address.ss_len > sizeof(remote_address)) {
1723				remote_address.ss_len = sizeof(remote_address);
1724			}
1725			fd_cb->remote_address = dup_sockaddr((struct sockaddr *)&remote_address, 1);
1726		}
1727
1728		ifnet_head_lock_shared();
1729		if (out_if_index > 0 && out_if_index <= if_index) {
1730			ifp = ifindex2ifnet[out_if_index];
1731		}
1732
1733		if (ifp != NULL) {
1734			inp->inp_last_outifp = ifp;
1735		}
1736		ifnet_head_done();
1737
1738		socket_unlock(fd_cb->so, 0);
1739	}
1740	FDUNLOCK(fd_cb);
1741}
1742
1743static void
1744flow_divert_handle_app_map_create(mbuf_t packet, int offset)
1745{
1746	size_t bytes_mem_size;
1747	size_t child_maps_mem_size;
1748	int cursor;
1749	int error = 0;
1750	struct flow_divert_trie new_trie;
1751	int insert_error = 0;
1752	size_t nodes_mem_size;
1753	int prefix_count = 0;
1754	int signing_id_count = 0;
1755
1756	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
1757
1758	/* Re-set the current trie */
1759	if (g_signing_id_trie.memory != NULL) {
1760		FREE(g_signing_id_trie.memory, M_TEMP);
1761	}
1762	memset(&g_signing_id_trie, 0, sizeof(g_signing_id_trie));
1763	g_signing_id_trie.root = NULL_TRIE_IDX;
1764
1765	memset(&new_trie, 0, sizeof(new_trie));
1766
1767	/* Get the number of shared prefixes in the new set of signing ID strings */
1768	flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
1769
1770	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
1771	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
1772	     cursor >= 0;
1773	     cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
1774	{
1775		size_t sid_size = 0;
1776		flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1777		new_trie.bytes_count += sid_size;
1778		signing_id_count++;
1779	}
1780
1781	if (signing_id_count == 0) {
1782		lck_rw_done(&g_flow_divert_group_lck);
1783		return;
1784	}
1785
1786	new_trie.nodes_count = (prefix_count + signing_id_count + 1); /* + 1 for the root node */
1787	new_trie.child_maps_count = (prefix_count + 1); /* + 1 for the root node */
1788
1789	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
1790			new_trie.nodes_count, new_trie.child_maps_count, new_trie.bytes_count);
1791
1792	nodes_mem_size = (sizeof(*new_trie.nodes) * new_trie.nodes_count);
1793	child_maps_mem_size = (sizeof(*new_trie.child_maps) * CHILD_MAP_SIZE * new_trie.child_maps_count);
1794	bytes_mem_size = (sizeof(*new_trie.bytes) * new_trie.bytes_count);
1795
1796	MALLOC(new_trie.memory, void *, nodes_mem_size + child_maps_mem_size + bytes_mem_size, M_TEMP, M_WAITOK);
1797	if (new_trie.memory == NULL) {
1798		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
1799		      nodes_mem_size + child_maps_mem_size + bytes_mem_size);
1800		return;
1801	}
1802
1803	/* Initialize the free lists */
1804	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
1805	new_trie.nodes_free_next = 0;
1806	memset(new_trie.nodes, 0, nodes_mem_size);
1807
1808	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
1809	new_trie.child_maps_free_next = 0;
1810	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
1811
1812	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
1813	new_trie.bytes_free_next = 0;
1814
1815	/* The root is an empty node */
1816	new_trie.root = trie_node_alloc(&new_trie);
1817
1818	/* Add each signing ID to the trie */
1819	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
1820	     cursor >= 0;
1821	     cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
1822	{
1823		size_t sid_size = 0;
1824		flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1825		if (new_trie.bytes_free_next + sid_size <= new_trie.bytes_count) {
1826			boolean_t is_dns;
1827			uint16_t new_node_idx;
1828			flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
1829			is_dns = (sid_size == sizeof(FLOW_DIVERT_DNS_SERVICE_SIGNING_ID) - 1 &&
1830			          !memcmp(&TRIE_BYTE(&new_trie, new_trie.bytes_free_next),
1831			                  FLOW_DIVERT_DNS_SERVICE_SIGNING_ID,
1832			                  sid_size));
1833			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
1834			if (new_node_idx != NULL_TRIE_IDX) {
1835				if (is_dns) {
1836					FDLOG(LOG_NOTICE, &nil_pcb, "Setting group unit for %s to %d", FLOW_DIVERT_DNS_SERVICE_SIGNING_ID, DNS_SERVICE_GROUP_UNIT);
1837					TRIE_NODE(&new_trie, new_node_idx).group_unit = DNS_SERVICE_GROUP_UNIT;
1838				}
1839			} else {
1840				insert_error = EINVAL;
1841				break;
1842			}
1843		} else {
1844			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
1845			insert_error = ENOBUFS;
1846			break;
1847		}
1848	}
1849
1850	if (!insert_error) {
1851		g_signing_id_trie = new_trie;
1852	} else {
1853		FREE(new_trie.memory, M_TEMP);
1854	}
1855
1856	lck_rw_done(&g_flow_divert_group_lck);
1857}
1858
1859static void
1860flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet, int offset)
1861{
1862	int error = 0;
1863	int cursor;
1864	size_t max_size = 0;
1865	uint8_t *signing_id;
1866	uint32_t ctl_unit;
1867
1868	lck_rw_lock_shared(&group->lck);
1869	ctl_unit = group->ctl_unit;
1870	lck_rw_done(&group->lck);
1871
1872	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
1873	     cursor >= 0;
1874	     cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
1875	{
1876		size_t sid_size = 0;
1877		flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1878		if (sid_size > max_size) {
1879			max_size = sid_size;
1880		}
1881	}
1882
1883	MALLOC(signing_id, uint8_t *, max_size + 1, M_TEMP, M_WAITOK);
1884	if (signing_id == NULL) {
1885		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate a string to hold the signing ID (size %lu)", max_size);
1886		return;
1887	}
1888
1889	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
1890	     cursor >= 0;
1891	     cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1))
1892	{
1893		size_t signing_id_len = 0;
1894		uint16_t node;
1895
1896		flow_divert_packet_get_tlv(packet,
1897				cursor, FLOW_DIVERT_TLV_SIGNING_ID, max_size, signing_id, &signing_id_len);
1898
1899		signing_id[signing_id_len] = '\0';
1900
1901		lck_rw_lock_exclusive(&g_flow_divert_group_lck);
1902
1903		node = flow_divert_trie_search(&g_signing_id_trie, signing_id);
1904		if (node != NULL_TRIE_IDX) {
1905			if (TRIE_NODE(&g_signing_id_trie, node).group_unit != DNS_SERVICE_GROUP_UNIT) {
1906				FDLOG(LOG_INFO, &nil_pcb, "Setting %s to ctl unit %u", signing_id, group->ctl_unit);
1907				TRIE_NODE(&g_signing_id_trie, node).group_unit = ctl_unit;
1908			}
1909		} else {
1910			FDLOG(LOG_ERR, &nil_pcb, "Failed to find signing ID %s", signing_id);
1911		}
1912
1913		lck_rw_done(&g_flow_divert_group_lck);
1914	}
1915
1916	FREE(signing_id, M_TEMP);
1917}
1918
1919static int
1920flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
1921{
1922	struct flow_divert_packet_header	hdr;
1923	int									error		= 0;
1924	struct flow_divert_pcb				*fd_cb;
1925
1926	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
1927		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
1928		error = EINVAL;
1929		goto done;
1930	}
1931
1932	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
1933	if (error) {
1934		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
1935		error = ENOBUFS;
1936		goto done;
1937	}
1938
1939	hdr.conn_id = ntohl(hdr.conn_id);
1940
1941	if (hdr.conn_id == 0) {
1942		switch (hdr.packet_type) {
1943			case FLOW_DIVERT_PKT_GROUP_INIT:
1944				flow_divert_handle_group_init(group, packet, sizeof(hdr));
1945				break;
1946			case FLOW_DIVERT_PKT_APP_MAP_CREATE:
1947				flow_divert_handle_app_map_create(packet, sizeof(hdr));
1948				break;
1949			case FLOW_DIVERT_PKT_APP_MAP_UPDATE:
1950				flow_divert_handle_app_map_update(group, packet, sizeof(hdr));
1951				break;
1952			default:
1953				FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
1954				break;
1955		}
1956		goto done;
1957	}
1958
1959	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);		/* This retains the PCB */
1960	if (fd_cb == NULL) {
1961		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
1962			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
1963		}
1964		goto done;
1965	}
1966
1967	switch (hdr.packet_type) {
1968		case FLOW_DIVERT_PKT_CONNECT_RESULT:
1969			flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
1970			break;
1971		case FLOW_DIVERT_PKT_CLOSE:
1972			flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
1973			break;
1974		case FLOW_DIVERT_PKT_DATA:
1975			flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
1976			break;
1977		case FLOW_DIVERT_PKT_READ_NOTIFY:
1978			flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
1979			break;
1980		case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
1981			flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
1982			break;
1983		default:
1984			FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
1985			break;
1986	}
1987
1988	FDRELEASE(fd_cb);
1989
1990done:
1991	mbuf_free(packet);
1992	return error;
1993}
1994
1995static void
1996flow_divert_close_all(struct flow_divert_group *group)
1997{
1998	struct flow_divert_pcb			*fd_cb;
1999	SLIST_HEAD(, flow_divert_pcb)	tmp_list;
2000
2001	SLIST_INIT(&tmp_list);
2002
2003	lck_rw_lock_exclusive(&group->lck);
2004
2005	MBUFQ_DRAIN(&group->send_queue);
2006
2007	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
2008		FDRETAIN(fd_cb);
2009		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
2010	}
2011
2012	lck_rw_done(&group->lck);
2013
2014	while (!SLIST_EMPTY(&tmp_list)) {
2015		fd_cb = SLIST_FIRST(&tmp_list);
2016		FDLOCK(fd_cb);
2017		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
2018		if (fd_cb->so != NULL) {
2019			socket_lock(fd_cb->so, 0);
2020			flow_divert_pcb_remove(fd_cb);
2021			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
2022			fd_cb->so->so_error = ECONNABORTED;
2023			socket_unlock(fd_cb->so, 0);
2024		}
2025		FDUNLOCK(fd_cb);
2026		FDRELEASE(fd_cb);
2027	}
2028}
2029
2030void
2031flow_divert_detach(struct socket *so)
2032{
2033	struct flow_divert_pcb	*fd_cb		= so->so_fd_pcb;
2034
2035	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2036
2037	so->so_flags &= ~SOF_FLOW_DIVERT;
2038	so->so_fd_pcb = NULL;
2039
2040	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
2041
2042	if (fd_cb->group != NULL) {
2043		/* Last-ditch effort to send any buffered data */
2044		flow_divert_send_buffered_data(fd_cb, TRUE);
2045
2046		/* Remove from the group */
2047		flow_divert_pcb_remove(fd_cb);
2048	}
2049
2050	socket_unlock(so, 0);
2051	FDLOCK(fd_cb);
2052	fd_cb->so = NULL;
2053	FDUNLOCK(fd_cb);
2054	socket_lock(so, 0);
2055
2056	FDRELEASE(fd_cb);	/* Release the socket's reference */
2057}
2058
2059static int
2060flow_divert_close(struct socket *so)
2061{
2062	struct flow_divert_pcb	*fd_cb		= so->so_fd_pcb;
2063
2064	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2065
2066	FDLOG0(LOG_INFO, fd_cb, "Closing");
2067
2068	soisdisconnecting(so);
2069	sbflush(&so->so_rcv);
2070
2071	flow_divert_send_buffered_data(fd_cb, TRUE);
2072	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
2073	flow_divert_send_close_if_needed(fd_cb);
2074
2075	/* Remove from the group */
2076	flow_divert_pcb_remove(fd_cb);
2077
2078	return 0;
2079}
2080
2081static int
2082flow_divert_disconnectx(struct socket *so, associd_t aid, connid_t cid __unused)
2083{
2084	if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) {
2085		return (EINVAL);
2086	}
2087
2088	return (flow_divert_close(so));
2089}
2090
2091static int
2092flow_divert_shutdown(struct socket *so)
2093{
2094	struct flow_divert_pcb	*fd_cb		= so->so_fd_pcb;
2095
2096	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2097
2098	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
2099
2100	socantsendmore(so);
2101
2102	flow_divert_update_closed_state(fd_cb, SHUT_WR, FALSE);
2103	flow_divert_send_close_if_needed(fd_cb);
2104
2105	return 0;
2106}
2107
2108static int
2109flow_divert_rcvd(struct socket *so, int flags __unused)
2110{
2111	struct flow_divert_pcb	*fd_cb			= so->so_fd_pcb;
2112	uint32_t				latest_sb_size;
2113	uint32_t				read_count;
2114
2115	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2116
2117	latest_sb_size = fd_cb->so->so_rcv.sb_cc;
2118
2119	if (fd_cb->sb_size < latest_sb_size) {
2120		panic("flow divert rcvd event handler (%u): saved rcv buffer size (%u) is less than latest rcv buffer size (%u)",
2121				fd_cb->hash, fd_cb->sb_size, latest_sb_size);
2122	}
2123
2124	read_count = fd_cb->sb_size - latest_sb_size;
2125
2126	FDLOG(LOG_DEBUG, fd_cb, "app read %u bytes", read_count);
2127
2128	if (read_count > 0 && flow_divert_send_read_notification(fd_cb, read_count) == 0) {
2129		fd_cb->bytes_read_by_app += read_count;
2130		fd_cb->sb_size = latest_sb_size;
2131	}
2132
2133	return 0;
2134}
2135
2136static errno_t
2137flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
2138                     struct sockaddr **dup)
2139{
2140	int						error		= 0;
2141	struct sockaddr			*result;
2142	struct sockaddr_storage	ss;
2143
2144	if (addr != NULL) {
2145		result = addr;
2146	} else {
2147		memset(&ss, 0, sizeof(ss));
2148		ss.ss_family = family;
2149		if (ss.ss_family == AF_INET) {
2150			ss.ss_len = sizeof(struct sockaddr_in);
2151		}
2152#if INET6
2153		else if (ss.ss_family == AF_INET6) {
2154			ss.ss_len = sizeof(struct sockaddr_in6);
2155		}
2156#endif	/* INET6 */
2157		else {
2158			error = EINVAL;
2159		}
2160		result = (struct sockaddr *)&ss;
2161	}
2162
2163	if (!error) {
2164		*dup = dup_sockaddr(result, 1);
2165		if (*dup == NULL) {
2166			error = ENOBUFS;
2167		}
2168	}
2169
2170	return error;
2171}
2172
2173static errno_t
2174flow_divert_getpeername(struct socket *so, struct sockaddr **sa)
2175{
2176	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2177
2178	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2179
2180	return flow_divert_dup_addr(so->so_proto->pr_domain->dom_family,
2181	                            fd_cb->remote_address,
2182	                            sa);
2183}
2184
2185static errno_t
2186flow_divert_getsockaddr(struct socket *so, struct sockaddr **sa)
2187{
2188	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2189
2190	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2191
2192	return flow_divert_dup_addr(so->so_proto->pr_domain->dom_family,
2193	                            fd_cb->local_address,
2194	                            sa);
2195}
2196
2197static errno_t
2198flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
2199{
2200	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2201
2202	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2203
2204	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
2205		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
2206			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
2207		}
2208	}
2209
2210	if (SOCK_DOM(so) == PF_INET) {
2211		return g_tcp_protosw->pr_ctloutput(so, sopt);
2212	}
2213#if INET6
2214	else if (SOCK_DOM(so) == PF_INET6) {
2215		return g_tcp6_protosw->pr_ctloutput(so, sopt);
2216	}
2217#endif
2218	return 0;
2219}
2220
2221errno_t
2222flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
2223{
2224	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2225	int						error	= 0;
2226	struct inpcb			*inp	= sotoinpcb(so);
2227	struct sockaddr_in		*sinp;
2228
2229	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2230
2231	if (fd_cb->group == NULL) {
2232		error = ENETUNREACH;
2233		goto done;
2234	}
2235
2236	if (inp == NULL) {
2237		error = EINVAL;
2238		goto done;
2239	} else if (inp->inp_state == INPCB_STATE_DEAD) {
2240		if (so->so_error) {
2241			error = so->so_error;
2242			so->so_error = 0;
2243		} else {
2244			error = EINVAL;
2245		}
2246		goto done;
2247	}
2248
2249	sinp = (struct sockaddr_in *)(void *)to;
2250	if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
2251		error = EAFNOSUPPORT;
2252		goto done;
2253	}
2254
2255	if ((fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) && !(fd_cb->flags & FLOW_DIVERT_TRANSFERRED)) {
2256		error = EALREADY;
2257		goto done;
2258	}
2259
2260	if (fd_cb->flags & FLOW_DIVERT_TRANSFERRED) {
2261		FDLOG0(LOG_INFO, fd_cb, "fully transferred");
2262		fd_cb->flags &= ~FLOW_DIVERT_TRANSFERRED;
2263		if (fd_cb->remote_address != NULL) {
2264			soisconnected(fd_cb->so);
2265			goto done;
2266		}
2267	}
2268
2269	FDLOG0(LOG_INFO, fd_cb, "Connecting");
2270
2271	error = flow_divert_send_connect(fd_cb, to, p);
2272	if (error) {
2273		goto done;
2274	}
2275
2276	fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
2277
2278	soisconnecting(so);
2279
2280done:
2281	return error;
2282}
2283
2284static int
2285flow_divert_connectx_out_common(struct socket *so, int af,
2286    struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl,
2287    struct proc *p, uint32_t ifscope __unused, associd_t aid __unused,
2288    connid_t *pcid, uint32_t flags __unused, void *arg __unused,
2289    uint32_t arglen __unused)
2290{
2291	struct sockaddr_entry *src_se = NULL, *dst_se = NULL;
2292	struct inpcb *inp = sotoinpcb(so);
2293	int error;
2294
2295	if (inp == NULL) {
2296		return (EINVAL);
2297	}
2298
2299	VERIFY(dst_sl != NULL);
2300
2301	/* select source (if specified) and destination addresses */
2302	error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se);
2303	if (error != 0) {
2304		return (error);
2305	}
2306
2307	VERIFY(*dst_sl != NULL && dst_se != NULL);
2308	VERIFY(src_se == NULL || *src_sl != NULL);
2309	VERIFY(dst_se->se_addr->sa_family == af);
2310	VERIFY(src_se == NULL || src_se->se_addr->sa_family == af);
2311
2312	error = flow_divert_connect_out(so, dst_se->se_addr, p);
2313
2314	if (error == 0 && pcid != NULL) {
2315		*pcid = 1;	/* there is only 1 connection for a TCP */
2316	}
2317
2318	return (error);
2319}
2320
2321static int
2322flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl,
2323    struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
2324    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
2325    uint32_t arglen)
2326{
2327	return (flow_divert_connectx_out_common(so, AF_INET, src_sl, dst_sl,
2328	    p, ifscope, aid, pcid, flags, arg, arglen));
2329}
2330
2331#if INET6
2332static int
2333flow_divert_connectx6_out(struct socket *so, struct sockaddr_list **src_sl,
2334    struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
2335    associd_t aid, connid_t *pcid, uint32_t flags, void *arg,
2336    uint32_t arglen)
2337{
2338	return (flow_divert_connectx_out_common(so, AF_INET6, src_sl, dst_sl,
2339	    p, ifscope, aid, pcid, flags, arg, arglen));
2340}
2341#endif /* INET6 */
2342
2343static int
2344flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags,
2345                        uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
2346                        user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
2347                        user_addr_t aux_data __unused, uint32_t *aux_len)
2348{
2349	int						error	= 0;
2350	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2351	struct ifnet			*ifp	= NULL;
2352	struct inpcb			*inp	= sotoinpcb(so);
2353
2354	VERIFY((so->so_flags & SOF_FLOW_DIVERT));
2355
2356	if (so->so_fd_pcb == NULL || inp == NULL) {
2357		error = EINVAL;
2358		goto out;
2359	}
2360
2361	if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) {
2362		error = EINVAL;
2363		goto out;
2364	}
2365
2366	ifp = inp->inp_last_outifp;
2367	*ifindex = ((ifp != NULL) ? ifp->if_index : 0);
2368	*soerror = so->so_error;
2369	*flags = 0;
2370
2371	if (so->so_state & SS_ISCONNECTED) {
2372		*flags |= (CIF_CONNECTED | CIF_PREFERRED);
2373	}
2374
2375	if (fd_cb->local_address == NULL) {
2376		struct sockaddr_in sin;
2377		bzero(&sin, sizeof(sin));
2378		sin.sin_len = sizeof(sin);
2379		sin.sin_family = AF_INET;
2380		*src_len = sin.sin_len;
2381		if (src != USER_ADDR_NULL) {
2382			error = copyout(&sin, src, sin.sin_len);
2383			if (error != 0) {
2384				goto out;
2385			}
2386		}
2387	} else {
2388		*src_len = fd_cb->local_address->sa_len;
2389		if (src != USER_ADDR_NULL) {
2390			error = copyout(fd_cb->local_address, src, fd_cb->local_address->sa_len);
2391			if (error != 0) {
2392				goto out;
2393			}
2394		}
2395	}
2396
2397	if (fd_cb->remote_address == NULL) {
2398		struct sockaddr_in sin;
2399		bzero(&sin, sizeof(sin));
2400		sin.sin_len = sizeof(sin);
2401		sin.sin_family = AF_INET;
2402		*dst_len = sin.sin_len;
2403		if (dst != USER_ADDR_NULL) {
2404			error = copyout(&sin, dst, sin.sin_len);
2405			if (error != 0) {
2406				goto out;
2407			}
2408		}
2409	} else {
2410		*dst_len = fd_cb->remote_address->sa_len;
2411		if (dst != USER_ADDR_NULL) {
2412			error = copyout(fd_cb->remote_address, dst, fd_cb->remote_address->sa_len);
2413			if (error != 0) {
2414				goto out;
2415			}
2416		}
2417	}
2418
2419	*aux_type = 0;
2420	*aux_len = 0;
2421
2422out:
2423	return error;
2424}
2425
2426static int
2427flow_divert_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp __unused, struct proc *p __unused)
2428{
2429	int error = 0;
2430
2431	switch (cmd) {
2432		case SIOCGCONNINFO32: {
2433			struct so_cinforeq32 cifr;
2434			bcopy(data, &cifr, sizeof (cifr));
2435			error = flow_divert_getconninfo(so, cifr.scir_cid, &cifr.scir_flags,
2436			                                &cifr.scir_ifindex, &cifr.scir_error, cifr.scir_src,
2437			                                &cifr.scir_src_len, cifr.scir_dst, &cifr.scir_dst_len,
2438			                                &cifr.scir_aux_type, cifr.scir_aux_data,
2439			                                &cifr.scir_aux_len);
2440			if (error == 0) {
2441				bcopy(&cifr, data, sizeof (cifr));
2442			}
2443			break;
2444		}
2445
2446		case SIOCGCONNINFO64: {
2447			struct so_cinforeq64 cifr;
2448			bcopy(data, &cifr, sizeof (cifr));
2449			error = flow_divert_getconninfo(so, cifr.scir_cid, &cifr.scir_flags,
2450			                                &cifr.scir_ifindex, &cifr.scir_error, cifr.scir_src,
2451			                                &cifr.scir_src_len, cifr.scir_dst, &cifr.scir_dst_len,
2452			                                &cifr.scir_aux_type, cifr.scir_aux_data,
2453			                                &cifr.scir_aux_len);
2454			if (error == 0) {
2455				bcopy(&cifr, data, sizeof (cifr));
2456			}
2457			break;
2458		}
2459
2460		default:
2461			error = EOPNOTSUPP;
2462	}
2463
2464	return error;
2465}
2466
2467static int
2468flow_divert_in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p)
2469{
2470	int error = flow_divert_control(so, cmd, data, ifp, p);
2471
2472	if (error == EOPNOTSUPP) {
2473		error = in_control(so, cmd, data, ifp, p);
2474	}
2475
2476	return error;
2477}
2478
2479static int
2480flow_divert_in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p)
2481{
2482	int error = flow_divert_control(so, cmd, data, ifp, p);
2483
2484	if (error == EOPNOTSUPP) {
2485		error = in6_control(so, cmd, data, ifp, p);
2486	}
2487
2488	return error;
2489}
2490
2491static errno_t
2492flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p __unused)
2493{
2494	struct flow_divert_pcb	*fd_cb	= so->so_fd_pcb;
2495	int						error	= 0;
2496	struct inpcb *inp;
2497
2498	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2499
2500	inp = sotoinpcb(so);
2501	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
2502		error = ECONNRESET;
2503		goto done;
2504	}
2505
2506	if (control && mbuf_len(control) > 0) {
2507		error = EINVAL;
2508		goto done;
2509	}
2510
2511	if (flags & MSG_OOB) {
2512		error = EINVAL;
2513		goto done; /* We don't support OOB data */
2514	}
2515
2516	error = flow_divert_check_no_cellular(fd_cb);
2517	if (error) {
2518		goto done;
2519	}
2520
2521	/* Implicit connect */
2522	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
2523		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
2524		error = flow_divert_connect_out(so, to, NULL);
2525		if (error) {
2526			goto done;
2527		}
2528	}
2529
2530	FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
2531
2532	fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
2533	error = flow_divert_send_app_data(fd_cb, data);
2534	if (error) {
2535		goto done;
2536	}
2537
2538	data = NULL;
2539
2540	if (flags & PRUS_EOF) {
2541		flow_divert_shutdown(so);
2542	}
2543
2544done:
2545	if (data) {
2546		mbuf_free(data);
2547	}
2548	if (control) {
2549		mbuf_free(control);
2550	}
2551	return error;
2552}
2553
2554boolean_t
2555flow_divert_is_dns_service(struct socket *so)
2556{
2557	uint32_t ctl_unit = 0;
2558	flow_divert_check_policy(so, NULL, TRUE, &ctl_unit);
2559	FDLOG(LOG_INFO, &nil_pcb, "Check for DNS resulted in %u", ctl_unit);
2560	return (ctl_unit == DNS_SERVICE_GROUP_UNIT);
2561}
2562
2563errno_t
2564flow_divert_check_policy(struct socket *so, proc_t p, boolean_t match_delegate, uint32_t *ctl_unit)
2565{
2566	int error = EPROTOTYPE;
2567
2568	if (ctl_unit != NULL) {
2569		*ctl_unit = 0;
2570	}
2571
2572	if (SOCK_DOM(so) != PF_INET
2573#if INET6
2574	    && SOCK_DOM(so) != PF_INET6
2575#endif
2576	    )
2577	{
2578		return error;
2579	}
2580
2581	if (g_signing_id_trie.root != NULL_TRIE_IDX) {
2582		int release_proc = flow_divert_get_src_proc(so, &p, match_delegate);
2583		if (p != PROC_NULL) {
2584			proc_lock(p);
2585			if (p->p_csflags & CS_VALID) {
2586				const char *signing_id = cs_identity_get(p);
2587				if (signing_id != NULL) {
2588					uint16_t result = NULL_TRIE_IDX;
2589					lck_rw_lock_shared(&g_flow_divert_group_lck);
2590					result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id);
2591					if (result != NULL_TRIE_IDX) {
2592						uint32_t unit = TRIE_NODE(&g_signing_id_trie, result).group_unit;
2593
2594						error = 0;
2595
2596						FDLOG(LOG_INFO, &nil_pcb, "%s matched, ctl_unit = %u", signing_id, unit);
2597
2598						if (ctl_unit != NULL) {
2599							*ctl_unit = unit;
2600						}
2601					}
2602					lck_rw_done(&g_flow_divert_group_lck);
2603				}
2604			}
2605			proc_unlock(p);
2606
2607			if (release_proc) {
2608				proc_rele(p);
2609			}
2610		}
2611	}
2612
2613	return error;
2614}
2615
2616static void
2617flow_divert_set_protosw(struct socket *so)
2618{
2619	so->so_flags |= SOF_FLOW_DIVERT;
2620	if (SOCK_DOM(so) == PF_INET) {
2621		so->so_proto = &g_flow_divert_in_protosw;
2622	}
2623#if INET6
2624	else {
2625		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
2626	}
2627#endif	/* INET6 */
2628}
2629
2630static errno_t
2631flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit)
2632{
2633	int									error		= 0;
2634	struct flow_divert_pcb				*fd_cb		= NULL;
2635	struct ifnet						*ifp		= NULL;
2636	struct inpcb						*inp		= NULL;
2637	struct socket						*old_so;
2638	mbuf_t								recv_data	= NULL;
2639
2640	socket_unlock(so, 0);
2641
2642	FDLOG(LOG_INFO, &nil_pcb, "Attaching socket to flow %u", flow_id);
2643
2644	/* Find the flow divert control block */
2645	lck_rw_lock_shared(&g_flow_divert_group_lck);
2646	if (g_flow_divert_groups != NULL && g_active_group_count > 0) {
2647		struct flow_divert_group *group = g_flow_divert_groups[ctl_unit];
2648		if (group != NULL) {
2649			fd_cb = flow_divert_pcb_lookup(flow_id, group);
2650		}
2651	}
2652	lck_rw_done(&g_flow_divert_group_lck);
2653
2654	if (fd_cb == NULL) {
2655		error = ENOENT;
2656		goto done;
2657	}
2658
2659	FDLOCK(fd_cb);
2660
2661	/* Dis-associate the flow divert control block from its current socket */
2662	old_so = fd_cb->so;
2663
2664	inp = sotoinpcb(old_so);
2665
2666	VERIFY(inp != NULL);
2667
2668	socket_lock(old_so, 0);
2669	soisdisconnected(old_so);
2670	old_so->so_flags &= ~SOF_FLOW_DIVERT;
2671	old_so->so_fd_pcb = NULL;
2672	old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM);
2673	fd_cb->so = NULL;
2674	/* Save the output interface */
2675	ifp = inp->inp_last_outifp;
2676	if (old_so->so_rcv.sb_cc > 0) {
2677		error = mbuf_dup(old_so->so_rcv.sb_mb, MBUF_DONTWAIT, &recv_data);
2678		sbflush(&old_so->so_rcv);
2679	}
2680	socket_unlock(old_so, 0);
2681
2682	/* Associate the new socket with the flow divert control block */
2683	socket_lock(so, 0);
2684	so->so_fd_pcb = fd_cb;
2685	inp = sotoinpcb(so);
2686	inp->inp_last_outifp = ifp;
2687	if (recv_data != NULL) {
2688		if (sbappendstream(&so->so_rcv, recv_data)) {
2689			sorwakeup(so);
2690		}
2691	}
2692	flow_divert_set_protosw(so);
2693	socket_unlock(so, 0);
2694
2695	fd_cb->so = so;
2696	fd_cb->flags |= FLOW_DIVERT_TRANSFERRED;
2697
2698	FDUNLOCK(fd_cb);
2699
2700done:
2701	socket_lock(so, 0);
2702
2703	if (fd_cb != NULL) {
2704		FDRELEASE(fd_cb);	/* Release the reference obtained via flow_divert_pcb_lookup */
2705	}
2706
2707	return error;
2708}
2709
2710errno_t
2711flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit)
2712{
2713	errno_t error = 0;
2714	struct flow_divert_pcb *fd_cb;
2715
2716	if (so->so_flags & SOF_FLOW_DIVERT) {
2717		return EALREADY;
2718	}
2719
2720	fd_cb = flow_divert_pcb_create(so);
2721	if (fd_cb != NULL) {
2722		error = flow_divert_pcb_insert(fd_cb, ctl_unit);
2723		if (error) {
2724			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
2725			FDRELEASE(fd_cb);
2726		} else {
2727			fd_cb->log_level = LOG_NOTICE;
2728			fd_cb->control_group_unit = ctl_unit;
2729			so->so_fd_pcb = fd_cb;
2730
2731			flow_divert_set_protosw(so);
2732
2733			FDLOG0(LOG_INFO, fd_cb, "Created");
2734		}
2735	} else {
2736		error = ENOMEM;
2737	}
2738
2739	return error;
2740}
2741
2742errno_t
2743flow_divert_token_set(struct socket *so, struct sockopt *sopt)
2744{
2745	uint32_t					ctl_unit		= 0;
2746	uint32_t					key_unit		= 0;
2747	uint32_t					flow_id			= 0;
2748	int							error			= 0;
2749	mbuf_t						token			= NULL;
2750
2751	if (so->so_flags & SOF_FLOW_DIVERT) {
2752		error = EALREADY;
2753		goto done;
2754	}
2755
2756	if (g_init_result) {
2757		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
2758		error = ENOPROTOOPT;
2759		goto done;
2760	}
2761
2762	if (SOCK_TYPE(so) != SOCK_STREAM ||
2763	    SOCK_PROTO(so) != IPPROTO_TCP ||
2764	    (SOCK_DOM(so) != PF_INET
2765#if INET6
2766	     && SOCK_DOM(so) != PF_INET6
2767#endif
2768		))
2769	{
2770		error = EINVAL;
2771		goto done;
2772	} else {
2773		struct tcpcb *tp = sototcpcb(so);
2774		if (tp == NULL || tp->t_state != TCPS_CLOSED) {
2775			error = EINVAL;
2776			goto done;
2777		}
2778	}
2779
2780	error = soopt_getm(sopt, &token);
2781	if (error) {
2782		goto done;
2783	}
2784
2785	error = soopt_mcopyin(sopt, token);
2786	if (error) {
2787		goto done;
2788	}
2789
2790	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
2791	if (!error) {
2792		key_unit = ntohl(key_unit);
2793	} else if (error != ENOENT) {
2794		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
2795		goto done;
2796	} else {
2797		key_unit = 0;
2798	}
2799
2800	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
2801	if (error) {
2802		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
2803		goto done;
2804	}
2805
2806	/* A valid kernel control unit is required */
2807	ctl_unit = ntohl(ctl_unit);
2808	if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
2809		FDLOG(LOG_ERR, &nil_pcb, "Got an invalid control socket unit: %u", ctl_unit);
2810		error = EINVAL;
2811		goto done;
2812	}
2813
2814	socket_unlock(so, 0);
2815	error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
2816	socket_lock(so, 0);
2817
2818	if (error) {
2819		FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", error);
2820		goto done;
2821	}
2822
2823	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_FLOW_ID, sizeof(flow_id), (void *)&flow_id, NULL);
2824	if (error && error != ENOENT) {
2825		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the flow ID from the token: %d", error);
2826		goto done;
2827	}
2828
2829	if (flow_id == 0) {
2830		error = flow_divert_pcb_init(so, ctl_unit);
2831		if (error == 0) {
2832			struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
2833			int log_level = LOG_NOTICE;
2834
2835			error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL,
2836				                               sizeof(log_level), &log_level, NULL);
2837			if (error == 0) {
2838				fd_cb->log_level = log_level;
2839			}
2840			error = 0;
2841
2842			fd_cb->connect_token = token;
2843			token = NULL;
2844		}
2845	} else {
2846		error = flow_divert_attach(so, flow_id, ctl_unit);
2847	}
2848
2849done:
2850	if (token != NULL) {
2851		mbuf_freem(token);
2852	}
2853
2854	return error;
2855}
2856
2857errno_t
2858flow_divert_token_get(struct socket *so, struct sockopt *sopt)
2859{
2860	uint32_t					ctl_unit;
2861	int							error						= 0;
2862	uint8_t						hmac[SHA_DIGEST_LENGTH];
2863	struct flow_divert_pcb		*fd_cb						= so->so_fd_pcb;
2864	mbuf_t						token						= NULL;
2865	struct flow_divert_group	*control_group				= NULL;
2866
2867	if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2868		error = EINVAL;
2869		goto done;
2870	}
2871
2872	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
2873
2874	if (fd_cb->group == NULL) {
2875		error = EINVAL;
2876		goto done;
2877	}
2878
2879	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
2880	if (error) {
2881		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
2882		goto done;
2883	}
2884
2885	ctl_unit = htonl(fd_cb->group->ctl_unit);
2886
2887	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
2888	if (error) {
2889		goto done;
2890	}
2891
2892	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
2893	if (error) {
2894		goto done;
2895	}
2896
2897	socket_unlock(so, 0);
2898	lck_rw_lock_shared(&g_flow_divert_group_lck);
2899
2900	if (g_flow_divert_groups != NULL && g_active_group_count > 0 &&
2901	    fd_cb->control_group_unit > 0 && fd_cb->control_group_unit < GROUP_COUNT_MAX)
2902	{
2903		control_group = g_flow_divert_groups[fd_cb->control_group_unit];
2904	}
2905
2906	if (control_group != NULL) {
2907		lck_rw_lock_shared(&control_group->lck);
2908		ctl_unit = htonl(control_group->ctl_unit);
2909		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
2910		if (!error) {
2911			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
2912		}
2913		lck_rw_done(&control_group->lck);
2914	} else {
2915		error = ENOPROTOOPT;
2916	}
2917
2918	lck_rw_done(&g_flow_divert_group_lck);
2919	socket_lock(so, 0);
2920
2921	if (error) {
2922		goto done;
2923	}
2924
2925	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
2926	if (error) {
2927		goto done;
2928	}
2929
2930	error = soopt_mcopyout(sopt, token);
2931	if (error) {
2932		token = NULL;	/* For some reason, soopt_mcopyout() frees the mbuf if it fails */
2933		goto done;
2934	}
2935
2936done:
2937	if (token != NULL) {
2938		mbuf_freem(token);
2939	}
2940
2941	return error;
2942}
2943
2944static errno_t
2945flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
2946{
2947	struct flow_divert_group	*new_group;
2948	int				error		= 0;
2949
2950	if (sac->sc_unit >= GROUP_COUNT_MAX) {
2951		error = EINVAL;
2952		goto done;
2953	}
2954
2955	*unitinfo = NULL;
2956
2957	MALLOC_ZONE(new_group, struct flow_divert_group *, sizeof(*new_group), M_FLOW_DIVERT_GROUP, M_WAITOK);
2958	if (new_group == NULL) {
2959		error = ENOBUFS;
2960		goto done;
2961	}
2962
2963	memset(new_group, 0, sizeof(*new_group));
2964
2965	lck_rw_init(&new_group->lck, flow_divert_mtx_grp, flow_divert_mtx_attr);
2966	RB_INIT(&new_group->pcb_tree);
2967	new_group->ctl_unit = sac->sc_unit;
2968	MBUFQ_INIT(&new_group->send_queue);
2969
2970	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
2971
2972	if (g_flow_divert_groups == NULL) {
2973		MALLOC(g_flow_divert_groups,
2974		       struct flow_divert_group **,
2975		       GROUP_COUNT_MAX * sizeof(struct flow_divert_group *),
2976		       M_TEMP,
2977		       M_WAITOK | M_ZERO);
2978	}
2979
2980	if (g_flow_divert_groups == NULL) {
2981		error = ENOBUFS;
2982	} else if (g_flow_divert_groups[sac->sc_unit] != NULL) {
2983		error = EALREADY;
2984	} else {
2985		g_flow_divert_groups[sac->sc_unit] = new_group;
2986		g_active_group_count++;
2987	}
2988
2989	lck_rw_done(&g_flow_divert_group_lck);
2990
2991	*unitinfo = new_group;
2992
2993done:
2994	if (error != 0 && new_group != NULL) {
2995		FREE_ZONE(new_group, sizeof(*new_group), M_FLOW_DIVERT_GROUP);
2996	}
2997	return error;
2998}
2999
3000static errno_t
3001flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
3002{
3003	struct flow_divert_group	*group	= NULL;
3004	errno_t						error	= 0;
3005	uint16_t					node	= 0;
3006
3007	if (unit >= GROUP_COUNT_MAX) {
3008		return EINVAL;
3009	}
3010
3011	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
3012
3013	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
3014
3015	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
3016		panic("flow divert group %u is disconnecting, but no groups are active (groups = %p, active count = %u", unit,
3017		      g_flow_divert_groups, g_active_group_count);
3018	}
3019
3020	group = g_flow_divert_groups[unit];
3021
3022	if (group != (struct flow_divert_group *)unitinfo) {
3023		panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
3024	}
3025
3026	if (group != NULL) {
3027		flow_divert_close_all(group);
3028		if (group->token_key != NULL) {
3029			memset(group->token_key, 0, group->token_key_size);
3030			FREE(group->token_key, M_TEMP);
3031			group->token_key = NULL;
3032			group->token_key_size = 0;
3033		}
3034		FREE_ZONE(group, sizeof(*group), M_FLOW_DIVERT_GROUP);
3035		g_flow_divert_groups[unit] = NULL;
3036		g_active_group_count--;
3037	} else {
3038		error = EINVAL;
3039	}
3040
3041	if (g_active_group_count == 0) {
3042		FREE(g_flow_divert_groups, M_TEMP);
3043		g_flow_divert_groups = NULL;
3044	}
3045
3046	/* Remove all signing IDs that point to this unit */
3047	for (node = 0; node < g_signing_id_trie.nodes_count; node++) {
3048		if (TRIE_NODE(&g_signing_id_trie, node).group_unit == unit) {
3049			TRIE_NODE(&g_signing_id_trie, node).group_unit = 0;
3050		}
3051	}
3052
3053	lck_rw_done(&g_flow_divert_group_lck);
3054
3055	return error;
3056}
3057
3058static errno_t
3059flow_divert_kctl_send(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, mbuf_t m, int flags __unused)
3060{
3061	return flow_divert_input(m, (struct flow_divert_group *)unitinfo);
3062}
3063
3064static void
3065flow_divert_kctl_rcvd(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, int flags __unused)
3066{
3067	struct flow_divert_group	*group	= (struct flow_divert_group *)unitinfo;
3068
3069	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
3070		struct flow_divert_pcb			*fd_cb;
3071		SLIST_HEAD(, flow_divert_pcb) 	tmp_list;
3072
3073		lck_rw_lock_shared(&g_flow_divert_group_lck);
3074		lck_rw_lock_exclusive(&group->lck);
3075
3076		while (!MBUFQ_EMPTY(&group->send_queue)) {
3077			mbuf_t next_packet;
3078			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
3079			next_packet = MBUFQ_FIRST(&group->send_queue);
3080			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
3081			if (error) {
3082				FDLOG(LOG_DEBUG, &nil_pcb, "ctl_enqueuembuf returned an error: %d", error);
3083				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
3084				lck_rw_done(&group->lck);
3085				lck_rw_done(&g_flow_divert_group_lck);
3086				return;
3087			}
3088			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
3089		}
3090
3091		SLIST_INIT(&tmp_list);
3092
3093		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3094			FDRETAIN(fd_cb);
3095			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3096		}
3097
3098		lck_rw_done(&group->lck);
3099
3100		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3101			FDLOCK(fd_cb);
3102			if (fd_cb->so != NULL) {
3103				socket_lock(fd_cb->so, 0);
3104				if (fd_cb->group != NULL) {
3105					flow_divert_send_buffered_data(fd_cb, FALSE);
3106				}
3107				socket_unlock(fd_cb->so, 0);
3108			}
3109			FDUNLOCK(fd_cb);
3110			FDRELEASE(fd_cb);
3111		}
3112
3113		lck_rw_done(&g_flow_divert_group_lck);
3114	}
3115}
3116
3117static int
3118flow_divert_kctl_init(void)
3119{
3120	struct kern_ctl_reg	ctl_reg;
3121	int			result;
3122
3123	memset(&ctl_reg, 0, sizeof(ctl_reg));
3124
3125	strncpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
3126	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name)-1] = '\0';
3127	ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
3128	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
3129	ctl_reg.ctl_recvsize = FD_CTL_RCVBUFF_SIZE;
3130
3131	ctl_reg.ctl_connect = flow_divert_kctl_connect;
3132	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
3133	ctl_reg.ctl_send = flow_divert_kctl_send;
3134	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
3135
3136	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
3137
3138	if (result) {
3139		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
3140		return result;
3141	}
3142
3143	return 0;
3144}
3145
3146void
3147flow_divert_init(void)
3148{
3149	memset(&nil_pcb, 0, sizeof(nil_pcb));
3150	nil_pcb.log_level = LOG_INFO;
3151
3152	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
3153
3154	VERIFY(g_tcp_protosw != NULL);
3155
3156	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
3157	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
3158
3159	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
3160	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
3161	g_flow_divert_in_usrreqs.pru_control = flow_divert_in_control;
3162	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
3163	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
3164	g_flow_divert_in_usrreqs.pru_peeraddr = flow_divert_getpeername;
3165	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
3166	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
3167	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
3168	g_flow_divert_in_usrreqs.pru_sockaddr = flow_divert_getsockaddr;
3169
3170	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
3171	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
3172
3173	/*
3174	 * Socket filters shouldn't attach/detach to/from this protosw
3175	 * since pr_protosw is to be used instead, which points to the
3176	 * real protocol; if they do, it is a bug and we should panic.
3177	 */
3178	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
3179	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
3180	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
3181	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
3182
3183#if INET6
3184	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
3185
3186	VERIFY(g_tcp6_protosw != NULL);
3187
3188	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
3189	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
3190
3191	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
3192	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
3193	g_flow_divert_in6_usrreqs.pru_control = flow_divert_in6_control;
3194	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
3195	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
3196	g_flow_divert_in6_usrreqs.pru_peeraddr = flow_divert_getpeername;
3197	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
3198	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
3199	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
3200	g_flow_divert_in6_usrreqs.pru_sockaddr = flow_divert_getsockaddr;
3201
3202	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
3203	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
3204	/*
3205	 * Socket filters shouldn't attach/detach to/from this protosw
3206	 * since pr_protosw is to be used instead, which points to the
3207	 * real protocol; if they do, it is a bug and we should panic.
3208	 */
3209	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
3210	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
3211	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
3212	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
3213#endif	/* INET6 */
3214
3215	flow_divert_grp_attr = lck_grp_attr_alloc_init();
3216	if (flow_divert_grp_attr == NULL) {
3217		FDLOG0(LOG_ERR, &nil_pcb, "lck_grp_attr_alloc_init failed");
3218		g_init_result = ENOMEM;
3219		goto done;
3220	}
3221
3222	flow_divert_mtx_grp = lck_grp_alloc_init(FLOW_DIVERT_CONTROL_NAME, flow_divert_grp_attr);
3223	if (flow_divert_mtx_grp == NULL) {
3224		FDLOG0(LOG_ERR, &nil_pcb, "lck_grp_alloc_init failed");
3225		g_init_result = ENOMEM;
3226		goto done;
3227	}
3228
3229	flow_divert_mtx_attr = lck_attr_alloc_init();
3230	if (flow_divert_mtx_attr == NULL) {
3231		FDLOG0(LOG_ERR, &nil_pcb, "lck_attr_alloc_init failed");
3232		g_init_result = ENOMEM;
3233		goto done;
3234	}
3235
3236	g_init_result = flow_divert_kctl_init();
3237	if (g_init_result) {
3238		goto done;
3239	}
3240
3241	lck_rw_init(&g_flow_divert_group_lck, flow_divert_mtx_grp, flow_divert_mtx_attr);
3242
3243	memset(&g_signing_id_trie, 0, sizeof(g_signing_id_trie));
3244	g_signing_id_trie.root = NULL_TRIE_IDX;
3245
3246done:
3247	if (g_init_result != 0) {
3248		if (flow_divert_mtx_attr != NULL) {
3249			lck_attr_free(flow_divert_mtx_attr);
3250			flow_divert_mtx_attr = NULL;
3251		}
3252		if (flow_divert_mtx_grp != NULL) {
3253			lck_grp_free(flow_divert_mtx_grp);
3254			flow_divert_mtx_grp = NULL;
3255		}
3256		if (flow_divert_grp_attr != NULL) {
3257			lck_grp_attr_free(flow_divert_grp_attr);
3258			flow_divert_grp_attr = NULL;
3259		}
3260
3261		if (g_flow_divert_kctl_ref != NULL) {
3262			ctl_deregister(g_flow_divert_kctl_ref);
3263			g_flow_divert_kctl_ref = NULL;
3264		}
3265	}
3266}
3267