1/*
2 * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/locks.h>
30
31#include <sys/types.h>
32#include <sys/kernel_types.h>
33#include <sys/kauth.h>
34#include <sys/socket.h>
35#include <sys/socketvar.h>
36#include <sys/sockio.h>
37#include <sys/sysctl.h>
38#include <sys/proc.h>
39
40#include <net/if.h>
41#include <net/if_var.h>
42#include <net/if_types.h>
43#include <net/bpf.h>
44#include <net/net_osdep.h>
45#include <net/pktap.h>
46
47#include <netinet/in_pcb.h>
48#include <netinet/tcp.h>
49#include <netinet/tcp_var.h>
50#define	_IP_VHL
51#include <netinet/ip.h>
52#include <netinet/ip_var.h>
53#include <netinet/udp.h>
54#include <netinet/udp_var.h>
55
56#include <netinet/ip6.h>
57#include <netinet6/in6_pcb.h>
58
59#include <libkern/OSAtomic.h>
60
61#include <kern/debug.h>
62
63#include <sys/mcache.h>
64
65#include <string.h>
66
67extern struct inpcbinfo ripcbinfo;
68
69struct pktap_softc {
70	LIST_ENTRY(pktap_softc)		pktp_link;
71	uint32_t					pktp_unit;
72	uint32_t					pktp_dlt_raw_count;
73	uint32_t					pktp_dlt_pkttap_count;
74	struct ifnet				*pktp_ifp;
75	struct pktap_filter			pktp_filters[PKTAP_MAX_FILTERS];
76};
77
78#ifndef PKTAP_DEBUG
79#define	PKTAP_DEBUG 1
80#endif /* PKTAP_DEBUG */
81
82#define	PKTAP_FILTER_OK	0		/* Packet passes filter checks */
83#define	PKTAP_FILTER_SKIP 1		/* Do not tap this packet */
84
85static int pktap_inited = 0;
86
87SYSCTL_DECL(_net_link);
88SYSCTL_NODE(_net_link, IFT_PKTAP, pktap,
89    CTLFLAG_RW  |CTLFLAG_LOCKED, 0, "pktap virtual interface");
90
91static int pktap_total_tap_count = 0;
92SYSCTL_INT(_net_link_pktap, OID_AUTO, total_tap_count,
93    CTLFLAG_RD | CTLFLAG_LOCKED, &pktap_total_tap_count, 0, "");
94
95static u_int64_t pktap_count_unknown_if_type = 0;
96SYSCTL_QUAD(_net_link_pktap, OID_AUTO, count_unknown_if_type,
97    CTLFLAG_RD | CTLFLAG_LOCKED, &pktap_count_unknown_if_type, "");
98
99static int pktap_log = 0;
100SYSCTL_INT(_net_link_pktap, OID_AUTO, log,
101    CTLFLAG_RW | CTLFLAG_LOCKED, &pktap_log, 0, "");
102
103#define	PKTAP_LOG(mask, fmt, ...) \
104do { \
105	if ((pktap_log & mask)) \
106		printf("%s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \
107} while (false)
108
109#define	PKTP_LOG_FUNC 0x01
110#define	PKTP_LOG_FILTER 0x02
111#define	PKTP_LOG_INPUT 0x04
112#define	PKTP_LOG_OUTPUT 0x08
113#define	PKTP_LOG_ERROR 0x10
114#define	PKTP_LOG_NOPCB 0x20
115
116/*
117 * pktap_lck_rw protects the global list of pktap interfaces
118 */
119decl_lck_rw_data(static, pktap_lck_rw_data);
120static lck_rw_t *pktap_lck_rw = &pktap_lck_rw_data;
121static lck_grp_t *pktap_lck_grp = NULL;
122static lck_attr_t *pktap_lck_attr = NULL;
123
124static LIST_HEAD(pktap_list, pktap_softc) pktap_list =
125    LIST_HEAD_INITIALIZER(pktap_list);
126
127int pktap_clone_create(struct if_clone *, u_int32_t, void *);
128int pktap_clone_destroy(struct ifnet *);
129
130static struct if_clone pktap_cloner =
131	IF_CLONE_INITIALIZER(PKTAP_IFNAME,
132		pktap_clone_create,
133		pktap_clone_destroy,
134		0,
135		IF_MAXUNIT);
136
137errno_t pktap_if_output(ifnet_t, mbuf_t);
138errno_t pktap_demux(ifnet_t, mbuf_t, char *, protocol_family_t *);
139errno_t pktap_add_proto(ifnet_t, protocol_family_t,
140	const struct ifnet_demux_desc *, u_int32_t);
141errno_t pktap_del_proto(ifnet_t, protocol_family_t);
142errno_t pktap_getdrvspec(ifnet_t, struct ifdrv64 *);
143errno_t pktap_setdrvspec(ifnet_t, struct ifdrv64 *);
144errno_t pktap_ioctl(ifnet_t, unsigned long, void *);
145void pktap_detach(ifnet_t);
146int pktap_filter_evaluate(struct pktap_softc *, struct ifnet *);
147void pktap_bpf_tap(struct ifnet *, protocol_family_t, struct mbuf *,
148    u_int32_t, u_int32_t, int);
149errno_t pktap_tap_callback(ifnet_t, u_int32_t, bpf_tap_mode);
150
151static void
152pktap_hexdump(int mask, void *addr, size_t len)
153{
154	unsigned char *buf = addr;
155	size_t i;
156
157	if (!(pktap_log & mask))
158		return;
159
160	for (i = 0; i < len; i++) {
161		unsigned char  h = (buf[i] & 0xf0) >> 4;
162		unsigned char  l = buf[i] & 0x0f;
163
164		if (i != 0) {
165			if (i % 32 == 0)
166				printf("\n");
167			else if (i % 4 == 0)
168				printf(" ");
169		}
170		printf("%c%c",
171			h < 10 ? h + '0' : h - 10 + 'a',
172			l < 10 ? l + '0' : l - 10 + 'a');
173	}
174	if (i % 32 != 0)
175		printf("\n");
176}
177
178__private_extern__ void
179pktap_init(void)
180{
181	int error = 0;
182	lck_grp_attr_t *lck_grp_attr = NULL;
183
184	/* Make sure we're called only once */
185	VERIFY(pktap_inited == 0);
186
187	pktap_inited = 1;
188
189	lck_grp_attr = lck_grp_attr_alloc_init();
190	pktap_lck_grp = lck_grp_alloc_init("pktap", lck_grp_attr);
191	pktap_lck_attr = lck_attr_alloc_init();
192#if PKTAP_DEBUG
193	lck_attr_setdebug(pktap_lck_attr);
194#endif /* PKTAP_DEBUG */
195	lck_rw_init(pktap_lck_rw, pktap_lck_grp, pktap_lck_attr);
196	lck_grp_attr_free(lck_grp_attr);
197
198	LIST_INIT(&pktap_list);
199
200	error = if_clone_attach(&pktap_cloner);
201	if (error != 0)
202		panic("%s: if_clone_attach() failed, error %d\n",
203		    __func__, error);
204}
205
206__private_extern__ int
207pktap_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
208{
209	int error = 0;
210	struct pktap_softc *pktap = NULL;
211	struct ifnet_init_params if_init;
212
213	PKTAP_LOG(PKTP_LOG_FUNC, "unit %u\n", unit);
214
215	pktap = _MALLOC(sizeof(struct pktap_softc), M_DEVBUF,
216	    M_WAITOK | M_ZERO);
217	if (pktap == NULL) {
218		printf("%s: _MALLOC failed\n", __func__);
219		error = ENOMEM;
220		goto done;
221	}
222	pktap->pktp_unit = unit;
223
224	/*
225	 * By default accept packet from physical interfaces
226	 */
227	pktap->pktp_filters[0].filter_op = PKTAP_FILTER_OP_PASS;
228	pktap->pktp_filters[0].filter_param = PKTAP_FILTER_PARAM_IF_TYPE;
229	pktap->pktp_filters[0].filter_param_if_type = IFT_ETHER;
230
231	pktap->pktp_filters[1].filter_op = PKTAP_FILTER_OP_PASS;
232	pktap->pktp_filters[1].filter_param = PKTAP_FILTER_PARAM_IF_TYPE;
233	pktap->pktp_filters[1].filter_param_if_type = IFT_IEEE1394;
234	/*
235	 * We do not use a set_bpf_tap() function as we rather rely on the more
236	 * accurate callback passed to bpf_attach()
237	 */
238	bzero(&if_init, sizeof(struct ifnet_init_params));
239	if_init.name = ifc->ifc_name;
240	if_init.unit = unit;
241	if_init.type = IFT_PKTAP;
242	if_init.family = IFNET_FAMILY_LOOPBACK;
243	if_init.output = pktap_if_output;
244	if_init.demux = pktap_demux;
245	if_init.add_proto = pktap_add_proto;
246	if_init.del_proto = pktap_del_proto;
247	if_init.softc = pktap;
248	if_init.ioctl = pktap_ioctl;
249	if_init.detach = pktap_detach;
250
251	error = ifnet_allocate(&if_init, &pktap->pktp_ifp);
252	if (error != 0) {
253		printf("%s: ifnet_allocate failed, error %d\n",
254		    __func__, error);
255		goto done;
256	}
257
258	ifnet_set_flags(pktap->pktp_ifp, IFF_UP, IFF_UP);
259
260	error = ifnet_attach(pktap->pktp_ifp, NULL);
261	if (error != 0) {
262		printf("%s: ifnet_attach failed - error %d\n", __func__, error);
263		ifnet_release(pktap->pktp_ifp);
264		goto done;
265	}
266
267	/* Attach DLT_PKTAP as the default DLT */
268	bpf_attach(pktap->pktp_ifp, DLT_PKTAP, sizeof(struct pktap_header),
269	    NULL, pktap_tap_callback);
270	bpf_attach(pktap->pktp_ifp, DLT_RAW, 0, NULL, pktap_tap_callback);
271
272	/* Take a reference and add to the global list */
273	ifnet_reference(pktap->pktp_ifp);
274	lck_rw_lock_exclusive(pktap_lck_rw);
275	LIST_INSERT_HEAD(&pktap_list, pktap, pktp_link);
276	lck_rw_done(pktap_lck_rw);
277done:
278	if (error != 0) {
279		if (pktap != NULL)
280			_FREE(pktap, M_DEVBUF);
281	}
282	return (error);
283}
284
285__private_extern__ int
286pktap_clone_destroy(struct ifnet *ifp)
287{
288	int error = 0;
289
290	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
291
292	(void) ifnet_detach(ifp);
293
294	return (error);
295}
296
297/*
298 * This function is called whenever a DLT is set on the interface:
299 * - When interface is attached to a BPF device via BIOCSETIF for the
300 *   default DLT
301 * - Whenever a new DLT is selected via BIOCSDLT
302 * - When the interface is detached from a BPF device (direction is zero)
303 */
304__private_extern__ errno_t
305pktap_tap_callback(ifnet_t ifp, u_int32_t dlt, bpf_tap_mode direction)
306{
307	struct pktap_softc *pktap;
308
309	pktap = ifp->if_softc;
310	if (pktap == NULL) {
311		printf("%s: if_softc is NULL for ifp %s\n", __func__,
312		    ifp->if_xname);
313		goto done;
314	}
315	switch (dlt) {
316		case DLT_RAW:
317			if (direction == 0) {
318				if (pktap->pktp_dlt_raw_count > 0) {
319					pktap->pktp_dlt_raw_count--;
320					OSAddAtomic(-1, &pktap_total_tap_count);
321
322				}
323			} else {
324				pktap->pktp_dlt_raw_count++;
325				OSAddAtomic(1, &pktap_total_tap_count);
326			}
327			break;
328		case DLT_PKTAP:
329			if (direction == 0) {
330				if (pktap->pktp_dlt_pkttap_count > 0) {
331					pktap->pktp_dlt_pkttap_count--;
332					OSAddAtomic(-1, &pktap_total_tap_count);
333				}
334			} else {
335				pktap->pktp_dlt_pkttap_count++;
336				OSAddAtomic(1, &pktap_total_tap_count);
337			}
338			break;
339	}
340done:
341	/*
342	 * Attachements count must be positive and we're in trouble
343	 * if we have more that 2**31 attachements
344	 */
345	VERIFY(pktap_total_tap_count >= 0);
346
347	return (0);
348}
349
350__private_extern__ errno_t
351pktap_if_output(ifnet_t ifp, mbuf_t m)
352{
353	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
354	mbuf_freem(m);
355	return (ENOTSUP);
356}
357
358__private_extern__ errno_t
359pktap_demux(ifnet_t ifp, __unused mbuf_t m, __unused char *header,
360	__unused protocol_family_t *ppf)
361{
362	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
363	return (ENOTSUP);
364}
365
366__private_extern__ errno_t
367pktap_add_proto(__unused ifnet_t ifp, protocol_family_t pf,
368    __unused const struct ifnet_demux_desc *dmx, __unused u_int32_t cnt)
369{
370	PKTAP_LOG(PKTP_LOG_FUNC, "%s pf %u\n", ifp->if_xname, pf);
371	return (0);
372}
373
374__private_extern__ errno_t
375pktap_del_proto(__unused ifnet_t ifp, __unused protocol_family_t pf)
376{
377	PKTAP_LOG(PKTP_LOG_FUNC, "%s pf %u\n", ifp->if_xname, pf);
378	return (0);
379}
380
381__private_extern__ errno_t
382pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd)
383{
384	errno_t error = 0;
385	struct pktap_softc *pktap;
386	int i;
387
388	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
389
390	pktap = ifp->if_softc;
391	if (pktap == NULL) {
392		error = ENOENT;
393		printf("%s: pktap NULL - error %d\n", __func__, error);
394		goto done;
395	}
396
397	switch (ifd->ifd_cmd) {
398	case PKTP_CMD_FILTER_GET: {
399		struct x_pktap_filter x_filters[PKTAP_MAX_FILTERS];
400
401		bzero(&x_filters, sizeof(x_filters));
402
403		if (ifd->ifd_len < PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) {
404			printf("%s: PKTP_CMD_FILTER_GET ifd_len %llu too small - error %d\n",
405				__func__, ifd->ifd_len, error);
406			error = EINVAL;
407			break;
408		}
409		for (i = 0; i < PKTAP_MAX_FILTERS; i++) {
410			struct pktap_filter *pktap_filter = pktap->pktp_filters + i;
411			struct x_pktap_filter *x_filter = x_filters + i;
412
413			x_filter->filter_op = pktap_filter->filter_op;
414			x_filter->filter_param = pktap_filter->filter_param;
415
416			if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE)
417				x_filter->filter_param_if_type = pktap_filter->filter_param_if_type;
418			else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME)
419				strlcpy(x_filter->filter_param_if_name,
420						pktap_filter->filter_param_if_name,
421						sizeof(x_filter->filter_param_if_name));
422		}
423		error = copyout(x_filters, ifd->ifd_data,
424			PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter));
425		if (error) {
426			printf("%s: PKTP_CMD_FILTER_GET copyout - error %d\n", __func__, error);
427			goto done;
428		}
429		break;
430	}
431	case PKTP_CMD_TAP_COUNT: {
432		uint32_t tap_count = pktap->pktp_dlt_raw_count + pktap->pktp_dlt_pkttap_count;
433
434		if (ifd->ifd_len < sizeof(tap_count)) {
435			printf("%s: PKTP_CMD_TAP_COUNT ifd_len %llu too small - error %d\n",
436				__func__, ifd->ifd_len, error);
437			error = EINVAL;
438			break;
439		}
440		error = copyout(&tap_count, ifd->ifd_data, sizeof(tap_count));
441		if (error) {
442			printf("%s: PKTP_CMD_TAP_COUNT copyout - error %d\n", __func__, error);
443			goto done;
444		}
445		break;
446	}
447	default:
448		error = EINVAL;
449		break;
450	}
451
452done:
453	return (error);
454}
455
456__private_extern__ errno_t
457pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd)
458{
459	errno_t error = 0;
460	struct pktap_softc *pktap;
461
462	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
463
464	pktap = ifp->if_softc;
465	if (pktap == NULL) {
466		error = ENOENT;
467		printf("%s: pktap NULL - error %d\n", __func__, error);
468		goto done;
469	}
470
471	switch (ifd->ifd_cmd) {
472	case PKTP_CMD_FILTER_SET: {
473		struct x_pktap_filter user_filters[PKTAP_MAX_FILTERS];
474		int i;
475		int got_op_none = 0;
476
477		if (ifd->ifd_len != PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) {
478			printf("%s: PKTP_CMD_FILTER_SET bad ifd_len %llu - error %d\n",
479				__func__, ifd->ifd_len, error);
480			error = EINVAL;
481			break;
482		}
483		error = copyin(ifd->ifd_data, &user_filters, ifd->ifd_len);
484		if (error) {
485			printf("%s: copyin - error %d\n", __func__, error);
486			goto done;
487		}
488		/*
489		 * Validate user provided parameters
490		 */
491		for (i = 0; i < PKTAP_MAX_FILTERS; i++) {
492			struct x_pktap_filter *x_filter = user_filters + i;
493
494			switch (x_filter->filter_op) {
495				case PKTAP_FILTER_OP_NONE:
496					/* Following entries must be PKTAP_FILTER_OP_NONE */
497					got_op_none = 1;
498					break;
499				case PKTAP_FILTER_OP_PASS:
500				case PKTAP_FILTER_OP_SKIP:
501					/* Invalid after PKTAP_FILTER_OP_NONE */
502					if (got_op_none) {
503						error = EINVAL;
504						break;
505					}
506					break;
507				default:
508					error = EINVAL;
509					break;
510			}
511			if (error != 0)
512				break;
513
514			switch (x_filter->filter_param) {
515				case PKTAP_FILTER_OP_NONE:
516					if (x_filter->filter_op != PKTAP_FILTER_OP_NONE) {
517						error = EINVAL;
518						break;
519					}
520					break;
521
522				/*
523				 * Do not allow to tap a pktap from a pktap
524				 */
525				case PKTAP_FILTER_PARAM_IF_TYPE:
526					if (x_filter->filter_param_if_type == IFT_PKTAP ||
527						x_filter->filter_param_if_type > 0xff) {
528						error = EINVAL;
529						break;
530					}
531					break;
532
533				case PKTAP_FILTER_PARAM_IF_NAME:
534					if (x_filter->filter_param_if_name == 0 ||
535						strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME,
536							strlen(PKTAP_IFNAME)) == 0) {
537						error = EINVAL;
538						break;
539					}
540					break;
541
542				default:
543					error = EINVAL;
544					break;
545			}
546			if (error != 0)
547				break;
548		}
549		if (error != 0)
550			break;
551		for (i = 0; i < PKTAP_MAX_FILTERS; i++) {
552			struct pktap_filter *pktap_filter = pktap->pktp_filters + i;
553			struct x_pktap_filter *x_filter = user_filters + i;
554
555			pktap_filter->filter_op = x_filter->filter_op;
556			pktap_filter->filter_param = x_filter->filter_param;
557
558			if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE)
559				pktap_filter->filter_param_if_type = x_filter->filter_param_if_type;
560			else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) {
561				size_t len;
562
563				strlcpy(pktap_filter->filter_param_if_name,
564						x_filter->filter_param_if_name,
565						sizeof(pktap_filter->filter_param_if_name));
566				/*
567				 * If name does not end with a number then it's a "wildcard" match
568				 * where we compare the prefix of the interface name
569				 */
570				len = strlen(pktap_filter->filter_param_if_name);
571				if (pktap_filter->filter_param_if_name[len] < '0' ||
572					pktap_filter->filter_param_if_name[len] > '9')
573					pktap_filter->filter_ifname_prefix_len = len;
574			}
575		}
576		break;
577	}
578	default:
579		error = EINVAL;
580		break;
581	}
582
583done:
584	return (error);
585}
586
587__private_extern__ errno_t
588pktap_ioctl(ifnet_t ifp, unsigned long cmd, void *data)
589{
590	errno_t error = 0;
591
592	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
593
594	if ((cmd & IOC_IN)) {
595		error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER);
596		if (error) {
597			PKTAP_LOG(PKTP_LOG_ERROR,
598				"%s: kauth_authorize_generic(KAUTH_GENERIC_ISSUSER) - error %d\n",
599				__func__, error);
600			goto done;
601		}
602	}
603
604	switch (cmd) {
605	case SIOCGDRVSPEC32: {
606		struct ifdrv64 ifd;
607		struct ifdrv32 *ifd32 = (struct ifdrv32 *)data;
608
609		memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name));
610		ifd.ifd_cmd = ifd32->ifd_cmd;
611		ifd.ifd_len = ifd32->ifd_len;
612		ifd.ifd_data = ifd32->ifd_data;
613
614		error = pktap_getdrvspec(ifp, &ifd);
615
616		break;
617	}
618	case SIOCGDRVSPEC64: {
619		struct ifdrv64 *ifd64 = (struct ifdrv64 *)data;
620
621		error = pktap_getdrvspec(ifp, ifd64);
622
623		break;
624	}
625	case SIOCSDRVSPEC32: {
626		struct ifdrv64 ifd;
627		struct ifdrv32 *ifd32 = (struct ifdrv32 *)data;
628
629		memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name));
630		ifd.ifd_cmd = ifd32->ifd_cmd;
631		ifd.ifd_len = ifd32->ifd_len;
632		ifd.ifd_data = ifd32->ifd_data;
633
634		error = pktap_setdrvspec(ifp, &ifd);
635		break;
636	}
637	case SIOCSDRVSPEC64: {
638		struct ifdrv64 *ifd64 = (struct ifdrv64 *)data;
639
640		error = pktap_setdrvspec(ifp, ifd64);
641
642		break;
643	}
644	default:
645		error = ENOTSUP;
646		break;
647	}
648done:
649	return (error);
650}
651
652__private_extern__ void
653pktap_detach(ifnet_t ifp)
654{
655	struct pktap_softc *pktap;
656
657	PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname);
658
659	lck_rw_lock_exclusive(pktap_lck_rw);
660
661	pktap = ifp->if_softc;
662	ifp->if_softc = NULL;
663	LIST_REMOVE(pktap, pktp_link);
664
665	lck_rw_done(pktap_lck_rw);
666
667	/* Drop reference as it's no more on the global list */
668	ifnet_release(ifp);
669
670	_FREE(pktap, M_DEVBUF);
671
672	/* This is for the reference taken by ifnet_attach() */
673	(void) ifnet_release(ifp);
674}
675
676__private_extern__ int
677pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp)
678{
679	int i;
680	int result = PKTAP_FILTER_SKIP; /* Need positive matching rule to pass */
681	int match = 0;
682
683	for (i = 0; i < PKTAP_MAX_FILTERS; i++) {
684		struct pktap_filter *pktap_filter = pktap->pktp_filters + i;
685		size_t len = pktap_filter->filter_ifname_prefix_len != 0 ?
686			pktap_filter->filter_ifname_prefix_len : PKTAP_IFXNAMESIZE;
687
688		switch (pktap_filter->filter_op) {
689			case PKTAP_FILTER_OP_NONE:
690				match = 1;
691				break;
692
693			case PKTAP_FILTER_OP_PASS:
694				if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) {
695					if (pktap_filter->filter_param_if_type == 0 ||
696						ifp->if_type == pktap_filter->filter_param_if_type) {
697						result = PKTAP_FILTER_OK;
698						match = 1;
699						PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match type %u\n",
700							ifp->if_xname, pktap_filter->filter_param_if_type);
701						break;
702					}
703				}
704				if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) {
705					if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name,
706							len) == 0) {
707						result = PKTAP_FILTER_OK;
708						match = 1;
709						PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match name %s\n",
710							ifp->if_xname, pktap_filter->filter_param_if_name);
711						break;
712					}
713				}
714				break;
715
716			case PKTAP_FILTER_OP_SKIP:
717				if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) {
718					if (pktap_filter->filter_param_if_type == 0 ||
719						ifp->if_type == pktap_filter->filter_param_if_type) {
720						result = PKTAP_FILTER_SKIP;
721						match = 1;
722						PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match type %u\n",
723							ifp->if_xname, pktap_filter->filter_param_if_type);
724						break;
725					}
726				}
727				if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) {
728					if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name,
729							len) == 0) {
730						result = PKTAP_FILTER_SKIP;
731						match = 1;
732						PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match name %s\n",
733							ifp->if_xname, pktap_filter->filter_param_if_name);
734						break;
735					}
736				}
737				break;
738		}
739		if (match)
740			break;
741	}
742
743	if (match == 0) {
744		PKTAP_LOG(PKTP_LOG_FILTER, "%s no match\n",
745			ifp->if_xname);
746	}
747	return (result);
748}
749
750static void
751pktap_set_procinfo(struct pktap_header *hdr, struct so_procinfo *soprocinfo)
752{
753	hdr->pth_pid = soprocinfo->spi_pid;
754	proc_name(soprocinfo->spi_pid, hdr->pth_comm, MAXCOMLEN);
755	if (soprocinfo->spi_pid != 0)
756		uuid_copy(hdr->pth_uuid, soprocinfo->spi_uuid);
757
758	/*
759	 * When not delegated, the effective pid is the same as the real pid
760	 */
761	if (soprocinfo->spi_epid != soprocinfo->spi_pid) {
762		hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED;
763		hdr->pth_epid = soprocinfo->spi_epid;
764		proc_name(soprocinfo->spi_epid, hdr->pth_ecomm, MAXCOMLEN);
765		if (soprocinfo->spi_epid != 0)
766			uuid_copy(hdr->pth_uuid, soprocinfo->spi_euuid);
767	}
768}
769
770__private_extern__ void
771pktap_finalize_proc_info(struct pktap_header *hdr)
772{
773	int found;
774	struct so_procinfo soprocinfo;
775
776	if (!(hdr->pth_flags & PTH_FLAG_DELAY_PKTAP))
777		return;
778
779	/*
780	 * Clear the flag as it's internal
781	 */
782	hdr->pth_flags &= ~PTH_FLAG_DELAY_PKTAP;
783
784	if (hdr->pth_ipproto == IPPROTO_TCP)
785		found = inp_findinpcb_procinfo(&tcbinfo, hdr->pth_flowid,
786		    &soprocinfo);
787	else if (hdr->pth_ipproto == IPPROTO_UDP)
788		found = inp_findinpcb_procinfo(&udbinfo, hdr->pth_flowid,
789		    &soprocinfo);
790	else
791		found = inp_findinpcb_procinfo(&ripcbinfo, hdr->pth_flowid,
792		    &soprocinfo);
793
794	if (found == 1)
795		pktap_set_procinfo(hdr, &soprocinfo);
796}
797
798__private_extern__ void
799pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto,
800	struct mbuf *m, u_int32_t pre, int outgoing, struct ifnet *ifp)
801{
802	int found = 0;
803	struct so_procinfo soprocinfo;
804
805	/*
806	 * Getting the pid and procname is expensive
807	 * For outgoing, do the lookup only if there's an
808	 * associated socket as indicated by the flowhash
809	 */
810	if (outgoing != 0 && (m->m_pkthdr.pkt_flags &
811		(PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC)) == (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
812		m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
813		/*
814		 * To avoid lock ordering issues we delay the process lookup
815		 * to the BPF read as we cannot
816		 * assume the socket lock is unlocked on output
817		 */
818		if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) ||
819		    m->m_pkthdr.pkt_proto == IPPROTO_TCP ||
820		    m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
821			found = 0;
822			hdr->pth_flags |= PTH_FLAG_DELAY_PKTAP;
823			hdr->pth_flowid = m->m_pkthdr.pkt_flowid;
824			if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK)
825				hdr->pth_ipproto = IPPROTO_RAW;
826			else
827				hdr->pth_ipproto = m->m_pkthdr.pkt_proto;
828		}
829	} else if (outgoing == 0) {
830		struct inpcb *inp = NULL;
831
832		if (proto == PF_INET) {
833			struct ip ip;
834			errno_t error;
835			size_t hlen;
836			struct in_addr faddr, laddr;
837			u_short fport, lport;
838			struct inpcbinfo *pcbinfo = NULL;
839			int wildcard = 0;
840
841			error = mbuf_copydata(m, pre, sizeof(struct ip), &ip);
842			if (error != 0) {
843				PKTAP_LOG(PKTP_LOG_ERROR,
844				    "mbuf_copydata tcp v4 failed for %s\n",
845				    hdr->pth_ifname);
846				goto done;
847			}
848			hlen = IP_VHL_HL(ip.ip_vhl) << 2;
849
850			faddr = ip.ip_src;
851			laddr = ip.ip_dst;
852
853			if (ip.ip_p == IPPROTO_TCP) {
854				struct tcphdr th;
855
856				error = mbuf_copydata(m, pre + hlen,
857					sizeof(struct tcphdr), &th);
858				if (error != 0)
859					goto done;
860
861				fport = th.th_sport;
862				lport = th.th_dport;
863
864				pcbinfo = &tcbinfo;
865			} else if (ip.ip_p == IPPROTO_UDP) {
866				struct udphdr uh;
867
868				error = mbuf_copydata(m, pre + hlen,
869					sizeof(struct udphdr), &uh);
870				if (error != 0) {
871					PKTAP_LOG(PKTP_LOG_ERROR,
872					    "mbuf_copydata udp v4 failed for %s\n",
873					    hdr->pth_ifname);
874					goto done;
875				}
876				fport = uh.uh_sport;
877				lport = uh.uh_dport;
878
879				pcbinfo = &udbinfo;
880				wildcard = 1;
881			}
882			if (pcbinfo != NULL) {
883				inp = in_pcblookup_hash(pcbinfo, faddr, fport,
884					laddr, lport, wildcard, outgoing ? NULL : ifp);
885
886				if (inp == NULL && hdr->pth_iftype != IFT_LOOP)
887					PKTAP_LOG(PKTP_LOG_NOPCB,
888					    "in_pcblookup_hash no pcb %s\n",
889					    hdr->pth_ifname);
890			} else {
891				PKTAP_LOG(PKTP_LOG_NOPCB,
892				    "unknown ip_p %u on %s\n",
893				    ip.ip_p, hdr->pth_ifname);
894				pktap_hexdump(PKTP_LOG_NOPCB, &ip, sizeof(struct ip));
895			}
896		} else if (proto == PF_INET6) {
897			struct ip6_hdr ip6;
898			errno_t error;
899			struct in6_addr *faddr;
900			struct in6_addr *laddr;
901			u_short fport, lport;
902			struct inpcbinfo *pcbinfo = NULL;
903			int wildcard = 0;
904
905			error = mbuf_copydata(m, pre, sizeof(struct ip6_hdr), &ip6);
906			if (error != 0)
907				goto done;
908
909			faddr = &ip6.ip6_src;
910			laddr = &ip6.ip6_dst;
911
912			if (ip6.ip6_nxt == IPPROTO_TCP) {
913				struct tcphdr th;
914
915				error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr),
916					sizeof(struct tcphdr), &th);
917				if (error != 0) {
918					PKTAP_LOG(PKTP_LOG_ERROR,
919					    "mbuf_copydata tcp v6 failed for %s\n",
920					    hdr->pth_ifname);
921					goto done;
922				}
923
924				fport = th.th_sport;
925				lport = th.th_dport;
926
927				pcbinfo = &tcbinfo;
928			} else if (ip6.ip6_nxt == IPPROTO_UDP) {
929				struct udphdr uh;
930
931				error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr),
932					sizeof(struct udphdr), &uh);
933				if (error != 0) {
934					PKTAP_LOG(PKTP_LOG_ERROR,
935					    "mbuf_copydata udp v6 failed for %s\n",
936					    hdr->pth_ifname);
937					goto done;
938				}
939
940				fport = uh.uh_sport;
941				lport = uh.uh_dport;
942
943				pcbinfo = &udbinfo;
944				wildcard = 1;
945			}
946			if (pcbinfo != NULL) {
947				inp = in6_pcblookup_hash(pcbinfo, faddr, fport,
948					laddr, lport, wildcard, outgoing ? NULL : ifp);
949
950				if (inp == NULL && hdr->pth_iftype != IFT_LOOP)
951					PKTAP_LOG(PKTP_LOG_NOPCB,
952					    "in6_pcblookup_hash no pcb %s\n",
953					    hdr->pth_ifname);
954			} else {
955				PKTAP_LOG(PKTP_LOG_NOPCB,
956				    "unknown ip6.ip6_nxt %u on %s\n",
957				    ip6.ip6_nxt, hdr->pth_ifname);
958				pktap_hexdump(PKTP_LOG_NOPCB, &ip6, sizeof(struct ip6_hdr));
959			}
960		}
961		if (inp != NULL) {
962			if (inp->inp_state != INPCB_STATE_DEAD && inp->inp_socket != NULL) {
963				found = 1;
964				inp_get_soprocinfo(inp, &soprocinfo);
965			}
966			in_pcb_checkstate(inp, WNT_RELEASE, 0);
967		}
968	}
969done:
970	/*
971	 * -1 means PID not found
972	 */
973	hdr->pth_pid = -1;
974	hdr->pth_epid = -1;
975	if (found != 0)
976		pktap_set_procinfo(hdr, &soprocinfo);
977}
978
979__private_extern__ void
980pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m,
981    u_int32_t pre, u_int32_t post, int outgoing)
982{
983	struct pktap_softc *pktap;
984	void (*bpf_tap_func)(ifnet_t, u_int32_t, mbuf_t, void *, size_t) =
985		outgoing ? bpf_tap_out : bpf_tap_in;
986
987	lck_rw_lock_shared(pktap_lck_rw);
988
989	/*
990	 * No need to take the ifnet_lock as the struct ifnet field if_bpf is
991	 * protected by the BPF subsystem
992	 */
993	LIST_FOREACH(pktap, &pktap_list, pktp_link) {
994		int filter_result;
995
996		filter_result = pktap_filter_evaluate(pktap, ifp);
997		if (filter_result == PKTAP_FILTER_SKIP)
998			continue;
999
1000		if (pktap->pktp_dlt_raw_count > 0) {
1001			/* We accept only IPv4 and IPv6 packets for the raw DLT */
1002			if ((proto == AF_INET ||proto == AF_INET6) &&
1003				!(m->m_pkthdr.pkt_flags & PKTF_INET_RESOLVE)) {
1004				/*
1005				 * We can play just with the length of the first mbuf in the
1006				 * chain because bpf_tap_imp() disregard the packet length
1007				 * of the mbuf packet header.
1008				 */
1009				if (mbuf_setdata(m, m->m_data + pre,  m->m_len - pre) == 0) {
1010					bpf_tap_func(pktap->pktp_ifp, DLT_RAW, m, NULL, 0);
1011					mbuf_setdata(m, m->m_data - pre, m->m_len + pre);
1012				}
1013			}
1014		}
1015
1016		if (pktap->pktp_dlt_pkttap_count > 0) {
1017			struct {
1018				struct pktap_header hdr;
1019				u_int32_t proto;
1020			} hdr_buffer;
1021			struct pktap_header *hdr = &hdr_buffer.hdr;
1022			size_t hdr_size = sizeof(struct pktap_header);
1023			int unknown_if_type = 0;
1024			size_t data_adjust = 0;
1025			u_int32_t pre_adjust = 0;
1026
1027			/* Verify the structure is packed */
1028			_CASSERT(sizeof(hdr_buffer) == sizeof(struct pktap_header) + sizeof(u_int32_t));
1029
1030			bzero(&hdr_buffer, sizeof(hdr_buffer));
1031			hdr->pth_length = sizeof(struct pktap_header);
1032			hdr->pth_type_next = PTH_TYPE_PACKET;
1033
1034			/*
1035			 * Set DLT of packet based on interface type
1036			 */
1037			switch (ifp->if_type) {
1038				case IFT_LOOP:
1039				case IFT_GIF:
1040				case IFT_STF:
1041				case IFT_CELLULAR:
1042					/*
1043					 * Packets from pdp interfaces have no loopback
1044					 * header that contain the protocol number.
1045					 * As BPF just concatenate the header and the
1046					 * packet content in a single buffer,
1047					 * stash the protocol after the pktap header
1048					 * and adjust the size of the header accordingly
1049					 */
1050					hdr->pth_dlt = DLT_NULL;
1051					if (pre == 0) {
1052						hdr_buffer.proto = proto;
1053						hdr_size = sizeof(hdr_buffer);
1054						pre_adjust = sizeof(hdr_buffer.proto);
1055					}
1056					break;
1057				case IFT_ETHER:
1058				case IFT_BRIDGE:
1059				case IFT_L2VLAN:
1060				case IFT_IEEE8023ADLAG:
1061					hdr->pth_dlt = DLT_EN10MB;
1062					break;
1063				case IFT_PPP:
1064					hdr->pth_dlt = DLT_PPP;
1065					break;
1066				case IFT_IEEE1394:
1067					hdr->pth_dlt = DLT_APPLE_IP_OVER_IEEE1394;
1068					break;
1069				case IFT_OTHER:
1070					if (strncmp(ifp->if_name, "utun", strlen("utun")) == 0) {
1071						/*
1072						 * For utun:
1073						 * - incoming packets do not have the prefix set to four
1074						 * - some packets are as small as two bytes!
1075						 */
1076						if (m_pktlen(m) < 4)
1077							goto done;
1078						if (proto != AF_INET && proto != AF_INET6)
1079							goto done;
1080						if (proto == AF_INET && (size_t) m_pktlen(m) - 4 < sizeof(struct ip))
1081							goto done;
1082						if (proto == AF_INET6 && (size_t) m_pktlen(m) - 4 < sizeof(struct ip6_hdr))
1083							goto done;
1084						/*
1085						 * Skip the protocol in the mbuf as it's in network order
1086						 */
1087						pre = 4;
1088						data_adjust = 4;
1089						hdr->pth_dlt = DLT_NULL;
1090						hdr_buffer.proto = proto;
1091						hdr_size = sizeof(hdr_buffer);
1092						break;
1093					}
1094				default:
1095					if (pre == 0)
1096						hdr->pth_dlt = DLT_RAW;
1097					else
1098						unknown_if_type = 1;
1099					break;
1100			}
1101			if (unknown_if_type) {
1102				PKTAP_LOG(PKTP_LOG_FUNC,
1103				    "unknown if_type %u for %s\n",
1104				    ifp->if_type, ifp->if_xname);
1105				pktap_count_unknown_if_type += 1;
1106			} else {
1107				snprintf(hdr->pth_ifname, sizeof(hdr->pth_ifname), "%s",
1108					ifp->if_xname);
1109				hdr->pth_flags |= outgoing ? PTH_FLAG_DIR_OUT : PTH_FLAG_DIR_IN;
1110				hdr->pth_protocol_family = proto;
1111				hdr->pth_frame_pre_length = pre + pre_adjust;
1112				hdr->pth_frame_post_length = post;
1113				hdr->pth_iftype = ifp->if_type;
1114				hdr->pth_ifunit = ifp->if_unit;
1115
1116				pktap_fill_proc_info(hdr, proto, m, pre, outgoing, ifp);
1117
1118				hdr->pth_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
1119
1120				if (data_adjust == 0) {
1121					bpf_tap_func(pktap->pktp_ifp, DLT_PKTAP, m, hdr, hdr_size);
1122				} else {
1123					/*
1124					 * We can play just with the length of the first mbuf in the
1125					 * chain because bpf_tap_imp() disregard the packet length
1126					 * of the mbuf packet header.
1127					 */
1128					if (mbuf_setdata(m, m->m_data + data_adjust,  m->m_len - data_adjust) == 0) {
1129						bpf_tap_func(pktap->pktp_ifp, DLT_PKTAP, m, hdr, hdr_size);
1130						mbuf_setdata(m, m->m_data - data_adjust, m->m_len + data_adjust);
1131					}
1132				}
1133			}
1134		}
1135	}
1136done:
1137	lck_rw_done(pktap_lck_rw);
1138}
1139
1140__private_extern__ void
1141pktap_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m,
1142    char *frame_header)
1143{
1144	char *hdr = (char *)mbuf_data(m);
1145	char *start = (char *)mbuf_datastart(m);
1146
1147	/* Fast path */
1148	if (pktap_total_tap_count == 0)
1149		return;
1150
1151	/* Make sure the frame header is fully contained in the  mbuf */
1152	if (frame_header != NULL && frame_header >= start && frame_header <= hdr) {
1153		size_t o_len = m->m_len;
1154		u_int32_t pre = hdr - frame_header;
1155
1156		if (mbuf_setdata(m, frame_header, o_len + pre) == 0) {
1157			PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n",
1158				ifp->if_xname, proto, pre, 0);
1159
1160			pktap_bpf_tap(ifp, proto, m,  pre, 0, 0);
1161			mbuf_setdata(m, hdr, o_len);
1162		}
1163	} else {
1164		PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n",
1165			ifp->if_xname, proto, 0, 0);
1166
1167		pktap_bpf_tap(ifp, proto, m, 0, 0, 0);
1168	}
1169}
1170
1171__private_extern__ void
1172pktap_output(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m,
1173    u_int32_t pre, u_int32_t post)
1174{
1175	/* Fast path */
1176	if (pktap_total_tap_count == 0)
1177		return;
1178
1179	PKTAP_LOG(PKTP_LOG_OUTPUT, "ifp %s proto %u pre %u post %u\n",
1180		ifp->if_xname, proto, pre, post);
1181
1182	pktap_bpf_tap(ifp, proto, m, pre, post, 1);
1183}
1184