/* * Copyright (c) 2013-2014 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * THEORY OF OPERATION * * The socket content filter subsystem provides a way for user space agents to * make filtering decisions based on the content of the data being sent and * received by TCP/IP sockets. * * A content filter user space agents gets a copy of the data and the data is * also kept in kernel buffer until the user space agents makes a pass or drop * decision. This unidirectional flow of content avoids unnecessary data copies * back to the kernel. * * * A user space filter agent opens a kernel control socket with the name * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem. * When connected, a "struct content_filter" is created and set as the * "unitinfo" of the corresponding kernel control socket instance. * * The socket content filter subsystem exchanges messages with the user space * filter agent until an ultimate pass or drop decision is made by the * user space filter agent. * * It should be noted that messages about many TCP/IP sockets can be multiplexed * over a single kernel control socket. * * Notes: * - The current implementation is limited to TCP sockets. * - The current implementation supports up to two simultaneous content filters * for the sake of simplicity of the implementation. * * * NECP FILTER CONTROL UNIT * * A user space filter agent uses the Network Extension Control Policy (NECP) * database specify which TCP/IP sockets needs to be filtered. The NECP * criteria may be based on a variety of properties like user ID or proc UUID. * * The NECP "filter control unit" is used by the socket content filter subsystem * to deliver the relevant TCP/IP content information to the appropriate * user space filter agent via its kernel control socket instance. * This works as follows: * * 1) The user space filter agent specifies an NECP filter control unit when * in adds its filtering rules to the NECP database. * * 2) The user space filter agent also sets its NECP filter control unit on the * content filter kernel control socket via the socket option * CFIL_OPT_NECP_CONTROL_UNIT. * * 3) The NECP database is consulted to find out if a given TCP/IP socket * needs to be subjected to content filtering and returns the corresponding * NECP filter control unit -- the NECP filter control unit is actually * stored in the TCP/IP socket structure so the NECP lookup is really simple. * * 4) The NECP filter control unit is then used to find the corresponding * kernel control socket instance. * * Note: NECP currently supports a ingle filter control unit per TCP/IP socket * but this restriction may be soon lifted. * * * THE MESSAGING PROTOCOL * * The socket content filter subsystem and a user space filter agent * communicate over the kernel control socket via an asynchronous * messaging protocol (this is not a request-response protocol). * The socket content filter subsystem sends event messages to the user * space filter agent about the TCP/IP sockets it is interested to filter. * The user space filter agent sends action messages to either allow * data to pass or to disallow the data flow (and drop the connection). * * All messages over a content filter kernel control socket share the same * common header of type "struct cfil_msg_hdr". The message type tells if * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION". * The message header field "cfm_sock_id" identifies a given TCP/IP socket. * Note the message header length field may be padded for alignment and can * be larger than the actual content of the message. * The field "cfm_op" describe the kind of event or action. * * Here are the kinds of content filter events: * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket * * * EVENT MESSAGES * * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of * data that is being sent or received. The position of this span of data * in the data flow is described by a set of start and end offsets. These * are absolute 64 bits offsets. The first byte sent (or received) starts * at offset 0 and ends at offset 1. The length of the content data * is given by the difference between the end offset and the start offset. * * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE * action message is send by the user space filter agent. * * Note: absolute 64 bits offsets should be large enough for the foreseeable * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec: * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63 * * They are two kinds of content filter actions: * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction. * - CFM_OP_DROP: to shutdown socket and disallow further data flow * * * ACTION MESSAGES * * The CFM_OP_DATA_UPDATE action messages let the user space filter * agent allow data to flow up to the specified pass offset -- there * is a pass offset for outgoing data and a pass offset for incoming data. * When a new TCP/IP socket is attached to the content filter, each pass offset * is initially set to 0 so not data is allowed to pass by default. * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE * then the data flow becomes unrestricted. * * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message * with a pass offset smaller than the pass offset of a previous * CFM_OP_DATA_UPDATE message is silently ignored. * * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages * to tell the kernel how much data it wants to see by using the peek offsets. * Just like pass offsets, there is a peek offset for each direction. * When a new TCP/IP socket is attached to the content filter, each peek offset * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message * with a greater than 0 peek offset is sent by the user space filter agent. * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE * then the flow of update data events becomes unrestricted. * * Note that peek offsets cannot be smaller than the corresponding pass offset. * Also a peek offsets cannot be smaller than the corresponding end offset * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying * to set a too small peek value is silently ignored. * * * PER SOCKET "struct cfil_info" * * As soon as a TCP/IP socket gets attached to a content filter, a * "struct cfil_info" is created to hold the content filtering state for this * socket. * * The content filtering state is made of the following information * for each direction: * - The current pass offset; * - The first and last offsets of the data pending, waiting for a filtering * decision; * - The inject queue for data that passed the filters and that needs * to be re-injected; * - A content filter specific state in a set of "struct cfil_entry" * * * CONTENT FILTER STATE "struct cfil_entry" * * The "struct cfil_entry" maintains the information most relevant to the * message handling over a kernel control socket with a user space filter agent. * * The "struct cfil_entry" holds the NECP filter control unit that corresponds * to the kernel control socket unit it corresponds to and also has a pointer * to the corresponding "struct content_filter". * * For each direction, "struct cfil_entry" maintains the following information: * - The pass offset * - The peek offset * - The offset of the last data peeked at by the filter * - A queue of data that's waiting to be delivered to the user space filter * agent on the kernel control socket * - A queue of data for which event messages have been sent on the kernel * control socket and are pending for a filtering decision. * * * CONTENT FILTER QUEUES * * Data that is being filtered is steered away from the TCP/IP socket buffer * and instead will sit in one of three content filter queue until the data * can be re-injected into the TCP/IP socket buffer. * * A content filter queue is represented by "struct cfil_queue" that contains * a list of mbufs and the start and end offset of the data span of * the list of mbufs. * * The data moves into the three content filter queues according to this * sequence: * a) The "cfe_ctl_q" of "struct cfil_entry" * b) The "cfe_pending_q" of "struct cfil_entry" * c) The "cfi_inject_q" of "struct cfil_info" * * Note: The seqyence (a),(b) may be repeated several times if there are more * than one content filter attached to the TCP/IP socket. * * The "cfe_ctl_q" queue holds data than cannot be delivered to the * kernel conntrol socket for two reasons: * - The peek offset is less that the end offset of the mbuf data * - The kernel control socket is flow controlled * * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or * CFM_OP_DATA_IN have been successfully dispatched to the kernel control * socket and are waiting for a pass action message fromn the user space * filter agent. An mbuf length must be fully allowed to pass to be removed * from the cfe_pending_q. * * The "cfi_inject_q" queue holds data that has been fully allowed to pass * by the user space filter agent and that needs to be re-injected into the * TCP/IP socket. * * * IMPACT ON FLOW CONTROL * * An essential aspect of the content filer subsystem is to minimize the * impact on flow control of the TCP/IP sockets being filtered. * * The processing overhead of the content filtering may have an effect on * flow control by adding noticeable delays and cannot be eliminated -- * care must be taken by the user space filter agent to minimize the * processing delays. * * The amount of data being filtered is kept in buffers while waiting for * a decision by the user space filter agent. This amount of data pending * needs to be subtracted from the amount of data available in the * corresponding TCP/IP socket buffer. This is done by modifying * sbspace() and tcp_sbspace() to account for amount of data pending * in the content filter. * * * LOCKING STRATEGY * * The global state of content filter subsystem is protected by a single * read-write lock "cfil_lck_rw". The data flow can be done with the * cfil read-write lock held as shared so it can be re-entered from multiple * threads. * * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is * protected by the socket lock. * * A TCP/IP socket lock cannot be taken while the cfil read-write lock * is held. That's why we have some sequences where we drop the cfil read-write * lock before taking the TCP/IP lock. * * It is also important to lock the TCP/IP socket buffer while the content * filter is modifying the amount of pending data. Otherwise the calculations * in sbspace() and tcp_sbspace() could be wrong. * * The "cfil_lck_rw" protects "struct content_filter" and also the fields * "cfe_link" and "cfe_filter" of "struct cfil_entry". * * Actually "cfe_link" and "cfe_filter" are protected by both by * "cfil_lck_rw" and the socket lock: they may be modified only when * "cfil_lck_rw" is exclusive and the socket is locked. * * To read the other fields of "struct content_filter" we have to take * "cfil_lck_rw" in shared mode. * * * LIMITATIONS * * - For TCP sockets only * * - Does not support TCP unordered messages */ /* * TO DO LIST * * SOONER: * * Deal with OOB * * LATER: * * If support datagram, enqueue control and address mbufs as well */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_CONTENT_FILTER 2 struct cfil_entry; /* * The structure content_filter represents a user space content filter * It's created and associated with a kernel control socket instance */ struct content_filter { kern_ctl_ref cf_kcref; u_int32_t cf_kcunit; u_int32_t cf_flags; uint32_t cf_necp_control_unit; uint32_t cf_sock_count; TAILQ_HEAD(, cfil_entry) cf_sock_entries; }; #define CFF_ACTIVE 0x01 #define CFF_DETACHING 0x02 #define CFF_FLOW_CONTROLLED 0x04 struct content_filter **content_filters = NULL; uint32_t cfil_active_count = 0; /* Number of active content filters */ uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */ uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */ static kern_ctl_ref cfil_kctlref = NULL; static lck_grp_attr_t *cfil_lck_grp_attr = NULL; static lck_attr_t *cfil_lck_attr = NULL; static lck_grp_t *cfil_lck_grp = NULL; decl_lck_rw_data(static, cfil_lck_rw); #define CFIL_RW_LCK_MAX 8 int cfil_rw_nxt_lck = 0; void* cfil_rw_lock_history[CFIL_RW_LCK_MAX]; int cfil_rw_nxt_unlck = 0; void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX]; #define CONTENT_FILTER_ZONE_NAME "content_filter" #define CONTENT_FILTER_ZONE_MAX 10 static struct zone *content_filter_zone = NULL; /* zone for content_filter */ #define CFIL_INFO_ZONE_NAME "cfil_info" #define CFIL_INFO_ZONE_MAX 1024 static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */ MBUFQ_HEAD(cfil_mqhead); struct cfil_queue { uint64_t q_start; /* offset of first byte in queue */ uint64_t q_end; /* offset of last byte in queue */ struct cfil_mqhead q_mq; }; /* * struct cfil_entry * * The is one entry per content filter */ struct cfil_entry { TAILQ_ENTRY(cfil_entry) cfe_link; struct content_filter *cfe_filter; struct cfil_info *cfe_cfil_info; uint32_t cfe_flags; uint32_t cfe_necp_control_unit; struct timeval cfe_last_event; /* To user space */ struct timeval cfe_last_action; /* From user space */ struct cfe_buf { /* * cfe_pending_q holds data that has been delivered to * the filter and for which we are waiting for an action */ struct cfil_queue cfe_pending_q; /* * This queue is for data that has not be delivered to * the content filter (new data, pass peek or flow control) */ struct cfil_queue cfe_ctl_q; uint64_t cfe_pass_offset; uint64_t cfe_peek_offset; uint64_t cfe_peeked; } cfe_snd, cfe_rcv; }; #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */ #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */ #define CFEF_DATA_START 0x0004 /* can send data event */ #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */ #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */ #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */ #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */ #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */ /* * struct cfil_info * * There is a struct cfil_info per socket */ struct cfil_info { TAILQ_ENTRY(cfil_info) cfi_link; struct socket *cfi_so; uint64_t cfi_flags; uint64_t cfi_sock_id; struct cfi_buf { /* * cfi_pending_first and cfi_pending_last describe the total * amount of data outstanding for all the filters on * this socket and data in the flow queue * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used" */ uint64_t cfi_pending_first; uint64_t cfi_pending_last; int cfi_pending_mbcnt; /* * cfi_pass_offset is the minimum of all the filters */ uint64_t cfi_pass_offset; /* * cfi_inject_q holds data that needs to be re-injected * into the socket after filtering and that can * be queued because of flow control */ struct cfil_queue cfi_inject_q; } cfi_snd, cfi_rcv; struct cfil_entry cfi_entries[MAX_CONTENT_FILTER]; }; #define CFIF_DROP 0x0001 /* drop action applied */ #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */ #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */ #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */ #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */ #define CFIF_SHUT_WR 0x0040 /* shutdown write */ #define CFIF_SHUT_RD 0x0080 /* shutdown read */ #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */ #define CFI_SHIFT_GENCNT 32 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */ #define CFI_SHIFT_FLOWHASH 0 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head; #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x) #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x) /* * Statistics */ struct cfil_stats cfil_stats; /* * For troubleshooting */ int cfil_log_level = LOG_ERR; int cfil_debug = 1; /* * Sysctls for logs and statistics */ static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int, struct sysctl_req *); static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int, struct sysctl_req *); SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil"); SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED, &cfil_log_level, 0, ""); SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED, &cfil_debug, 0, ""); SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED, &cfil_sock_attached_count, 0, ""); SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED, &cfil_active_count, 0, ""); SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED, &cfil_close_wait_timeout, 0, ""); static int cfil_sbtrim = 1; SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED, &cfil_sbtrim, 0, ""); SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED, 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", ""); SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED, 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", ""); SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED, &cfil_stats, cfil_stats, ""); /* * Forward declaration to appease the compiler */ static int cfil_action_data_pass(struct socket *, uint32_t, int, uint64_t, uint64_t); static int cfil_action_drop(struct socket *, uint32_t); static int cfil_dispatch_closed_event(struct socket *, int); static int cfil_data_common(struct socket *, int, struct sockaddr *, struct mbuf *, struct mbuf *, uint32_t); static int cfil_data_filter(struct socket *, uint32_t, int, struct mbuf *, uint64_t); static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *, struct in_addr, u_int16_t); static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *, struct in6_addr *, u_int16_t); static int cfil_dispatch_attach_event(struct socket *, uint32_t); static void cfil_info_free(struct socket *, struct cfil_info *); static struct cfil_info * cfil_info_alloc(struct socket *); static int cfil_info_attach_unit(struct socket *, uint32_t); static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t); static int cfil_service_pending_queue(struct socket *, uint32_t, int); static int cfil_data_service_ctl_q(struct socket *, uint32_t, int); static void cfil_info_verify(struct cfil_info *); static int cfil_update_data_offsets(struct socket *, uint32_t, int, uint64_t, uint64_t); static int cfil_acquire_sockbuf(struct socket *, int); static void cfil_release_sockbuf(struct socket *, int); static int cfil_filters_attached(struct socket *); static void cfil_rw_lock_exclusive(lck_rw_t *); static void cfil_rw_unlock_exclusive(lck_rw_t *); static void cfil_rw_lock_shared(lck_rw_t *); static void cfil_rw_unlock_shared(lck_rw_t *); static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *); static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *); static unsigned int cfil_data_length(struct mbuf *, int *); /* * Content filter global read write lock */ static void cfil_rw_lock_exclusive(lck_rw_t *lck) { void *lr_saved; lr_saved = __builtin_return_address(0); lck_rw_lock_exclusive(lck); cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; } static void cfil_rw_unlock_exclusive(lck_rw_t *lck) { void *lr_saved; lr_saved = __builtin_return_address(0); lck_rw_unlock_exclusive(lck); cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; } static void cfil_rw_lock_shared(lck_rw_t *lck) { void *lr_saved; lr_saved = __builtin_return_address(0); lck_rw_lock_shared(lck); cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; } static void cfil_rw_unlock_shared(lck_rw_t *lck) { void *lr_saved; lr_saved = __builtin_return_address(0); lck_rw_unlock_shared(lck); cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; } static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck) { void *lr_saved; boolean_t upgraded; lr_saved = __builtin_return_address(0); upgraded = lck_rw_lock_shared_to_exclusive(lck); if (upgraded) { cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; } return (upgraded); } static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck) { void *lr_saved; lr_saved = __builtin_return_address(0); lck_rw_lock_exclusive_to_shared(lck); cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; } static void cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive) { lck_rw_assert(lck, exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD); } static void socket_lock_assert_owned(struct socket *so) { lck_mtx_t *mutex_held; if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); } /* * Return the number of bytes in the mbuf chain using the same * method as m_length() or sballoc() */ static unsigned int cfil_data_length(struct mbuf *m, int *retmbcnt) { struct mbuf *m0; unsigned int pktlen; int mbcnt; if (retmbcnt == NULL) return (m_length(m)); pktlen = 0; mbcnt = 0; for (m0 = m; m0 != NULL; m0 = m0->m_next) { pktlen += m0->m_len; mbcnt += MSIZE; if (m0->m_flags & M_EXT) mbcnt += m0->m_ext.ext_size; } *retmbcnt = mbcnt; return (pktlen); } /* * Common mbuf queue utilities */ static inline void cfil_queue_init(struct cfil_queue *cfq) { cfq->q_start = 0; cfq->q_end = 0; MBUFQ_INIT(&cfq->q_mq); } static inline uint64_t cfil_queue_drain(struct cfil_queue *cfq) { uint64_t drained = cfq->q_start - cfq->q_end; cfq->q_start = 0; cfq->q_end = 0; MBUFQ_DRAIN(&cfq->q_mq); return (drained); } /* Return 1 when empty, 0 otherwise */ static inline int cfil_queue_empty(struct cfil_queue *cfq) { return (MBUFQ_EMPTY(&cfq->q_mq)); } static inline uint64_t cfil_queue_offset_first(struct cfil_queue *cfq) { return (cfq->q_start); } static inline uint64_t cfil_queue_offset_last(struct cfil_queue *cfq) { return (cfq->q_end); } static inline uint64_t cfil_queue_len(struct cfil_queue *cfq) { return (cfq->q_end - cfq->q_start); } /* * Routines to verify some fundamental assumptions */ static void cfil_queue_verify(struct cfil_queue *cfq) { mbuf_t m; mbuf_t n; uint64_t queuesize = 0; /* Verify offset are ordered */ VERIFY(cfq->q_start <= cfq->q_end); /* * When queue is empty, the offsets are equal otherwise the offsets * are different */ VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) || (!MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start != cfq->q_end)); MBUFQ_FOREACH(m, &cfq->q_mq) { size_t chainsize = 0; unsigned int mlen = m_length(m); if (m == (void *)M_TAG_FREE_PATTERN || m->m_next == (void *)M_TAG_FREE_PATTERN || m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) panic("%s - mq %p is free at %p", __func__, &cfq->q_mq, m); for (n = m; n != NULL; n = n->m_next) { if (n->m_type != MT_DATA && n->m_type != MT_HEADER && n->m_type != MT_OOBDATA) panic("%s - %p unsupported type %u", __func__, n, n->m_type); chainsize += n->m_len; } if (mlen != chainsize) panic("%s - %p m_length() %u != chainsize %lu", __func__, m, mlen, chainsize); queuesize += chainsize; } if (queuesize != cfq->q_end - cfq->q_start) panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__, m, queuesize, cfq->q_end - cfq->q_start); } static void cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len) { CFIL_QUEUE_VERIFY(cfq); MBUFQ_ENQUEUE(&cfq->q_mq, m); cfq->q_end += len; CFIL_QUEUE_VERIFY(cfq); } static void cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len) { CFIL_QUEUE_VERIFY(cfq); VERIFY(m_length(m) == len); MBUFQ_REMOVE(&cfq->q_mq, m); MBUFQ_NEXT(m) = NULL; cfq->q_start += len; CFIL_QUEUE_VERIFY(cfq); } static mbuf_t cfil_queue_first(struct cfil_queue *cfq) { return (MBUFQ_FIRST(&cfq->q_mq)); } static mbuf_t cfil_queue_next(struct cfil_queue *cfq, mbuf_t m) { #pragma unused(cfq) return (MBUFQ_NEXT(m)); } static void cfil_entry_buf_verify(struct cfe_buf *cfe_buf) { CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q); CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q); /* Verify the queues are ordered so that pending is before ctl */ VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end); /* The peek offset cannot be less than the pass offset */ VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset); /* Make sure we've updated the offset we peeked at */ VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked); } static void cfil_entry_verify(struct cfil_entry *entry) { cfil_entry_buf_verify(&entry->cfe_snd); cfil_entry_buf_verify(&entry->cfe_rcv); } static void cfil_info_buf_verify(struct cfi_buf *cfi_buf) { CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q); VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last); VERIFY(cfi_buf->cfi_pending_mbcnt >= 0); } static void cfil_info_verify(struct cfil_info *cfil_info) { int i; if (cfil_info == NULL) return; cfil_info_buf_verify(&cfil_info->cfi_snd); cfil_info_buf_verify(&cfil_info->cfi_rcv); for (i = 0; i < MAX_CONTENT_FILTER; i++) cfil_entry_verify(&cfil_info->cfi_entries[i]); } static void verify_content_filter(struct content_filter *cfc) { struct cfil_entry *entry; uint32_t count = 0; VERIFY(cfc->cf_sock_count >= 0); TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { count++; VERIFY(cfc == entry->cfe_filter); } VERIFY(count == cfc->cf_sock_count); } /* * Kernel control socket callbacks */ static errno_t cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo) { errno_t error = 0; struct content_filter *cfc = NULL; CFIL_LOG(LOG_NOTICE, ""); cfc = zalloc(content_filter_zone); if (cfc == NULL) { CFIL_LOG(LOG_ERR, "zalloc failed"); error = ENOMEM; goto done; } bzero(cfc, sizeof(struct content_filter)); cfil_rw_lock_exclusive(&cfil_lck_rw); if (content_filters == NULL) { struct content_filter **tmp; cfil_rw_unlock_exclusive(&cfil_lck_rw); MALLOC(tmp, struct content_filter **, MAX_CONTENT_FILTER * sizeof(struct content_filter *), M_TEMP, M_WAITOK | M_ZERO); cfil_rw_lock_exclusive(&cfil_lck_rw); if (tmp == NULL && content_filters == NULL) { error = ENOMEM; cfil_rw_unlock_exclusive(&cfil_lck_rw); goto done; } /* Another thread may have won the race */ if (content_filters != NULL) FREE(tmp, M_TEMP); else content_filters = tmp; } if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit); error = EINVAL; } else if (content_filters[sac->sc_unit - 1] != NULL) { CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit); error = EADDRINUSE; } else { /* * kernel control socket kcunit numbers start at 1 */ content_filters[sac->sc_unit - 1] = cfc; cfc->cf_kcref = kctlref; cfc->cf_kcunit = sac->sc_unit; TAILQ_INIT(&cfc->cf_sock_entries); *unitinfo = cfc; cfil_active_count++; } cfil_rw_unlock_exclusive(&cfil_lck_rw); done: if (error != 0 && cfc != NULL) zfree(content_filter_zone, cfc); if (error == 0) OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok); else OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail); CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", error, cfil_active_count, sac->sc_unit); return (error); } static errno_t cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo) { #pragma unused(kctlref) errno_t error = 0; struct content_filter *cfc; struct cfil_entry *entry; CFIL_LOG(LOG_NOTICE, ""); if (content_filters == NULL) { CFIL_LOG(LOG_ERR, "no content filter"); error = EINVAL; goto done; } if (kcunit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", kcunit, MAX_CONTENT_FILTER); error = EINVAL; goto done; } cfc = (struct content_filter *)unitinfo; if (cfc == NULL) goto done; cfil_rw_lock_exclusive(&cfil_lck_rw); if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) { CFIL_LOG(LOG_ERR, "bad unit info %u)", kcunit); cfil_rw_unlock_exclusive(&cfil_lck_rw); goto done; } cfc->cf_flags |= CFF_DETACHING; /* * Remove all sockets from the filter */ while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) { cfil_rw_lock_assert_held(&cfil_lck_rw, 1); verify_content_filter(cfc); /* * Accept all outstanding data by pushing to next filter * or back to socket * * TBD: Actually we should make sure all data has been pushed * back to socket */ if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) { struct cfil_info *cfil_info = entry->cfe_cfil_info; struct socket *so = cfil_info->cfi_so; /* Need to let data flow immediately */ entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED | CFEF_DATA_START; /* * Respect locking hierarchy */ cfil_rw_unlock_exclusive(&cfil_lck_rw); socket_lock(so, 1); /* * When cfe_filter is NULL the filter is detached * and the entry has been removed from cf_sock_entries */ if (so->so_cfil == NULL || entry->cfe_filter == NULL) { cfil_rw_lock_exclusive(&cfil_lck_rw); goto release; } (void) cfil_action_data_pass(so, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET); (void) cfil_action_data_pass(so, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET); cfil_rw_lock_exclusive(&cfil_lck_rw); /* * Check again as the socket may have been unlocked * when when calling cfil_acquire_sockbuf() */ if (so->so_cfil == NULL || entry->cfe_filter == NULL) goto release; /* The filter is now detached */ entry->cfe_flags |= CFEF_CFIL_DETACHED; CFIL_LOG(LOG_NOTICE, "so %llx detached %u", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && cfil_filters_attached(so) == 0) { CFIL_LOG(LOG_NOTICE, "so %llx waking", (uint64_t)VM_KERNEL_ADDRPERM(so)); wakeup((caddr_t)&so->so_cfil); } /* * Remove the filter entry from the content filter * but leave the rest of the state intact as the queues * may not be empty yet */ entry->cfe_filter = NULL; entry->cfe_necp_control_unit = 0; TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); cfc->cf_sock_count--; release: socket_unlock(so, 1); } } verify_content_filter(cfc); VERIFY(cfc->cf_sock_count == 0); /* * Make filter inactive */ content_filters[kcunit - 1] = NULL; cfil_active_count--; cfil_rw_unlock_exclusive(&cfil_lck_rw); zfree(content_filter_zone, cfc); done: if (error == 0) OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok); else OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail); CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", error, cfil_active_count, kcunit); return (error); } /* * cfil_acquire_sockbuf() * * Prevent any other thread from acquiring the sockbuf * We use sb_cfil_thread as a semaphore to prevent other threads from * messing with the sockbuf -- see sblock() * Note: We do not set SB_LOCK here because the thread may check or modify * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently * sblock(), sbunlock() or sodefunct() */ static int cfil_acquire_sockbuf(struct socket *so, int outgoing) { thread_t tp = current_thread(); struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; lck_mtx_t *mutex_held; int error = 0; /* * Wait until no thread is holding the sockbuf and other content * filter threads have released the sockbuf */ while ((sb->sb_flags & SB_LOCK) || (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) { if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); sb->sb_wantlock++; VERIFY(sb->sb_wantlock != 0); msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf", NULL); VERIFY(sb->sb_wantlock != 0); sb->sb_wantlock--; } /* * Use reference count for repetitive calls on same thread */ if (sb->sb_cfil_refs == 0) { VERIFY(sb->sb_cfil_thread == NULL); VERIFY((sb->sb_flags & SB_LOCK) == 0); sb->sb_cfil_thread = tp; sb->sb_flags |= SB_LOCK; } sb->sb_cfil_refs++; /* We acquire the socket buffer when we need to cleanup */ if (so->so_cfil == NULL) { CFIL_LOG(LOG_ERR, "so %llx cfil detached", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = 0; } else if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EPIPE; } return (error); } static void cfil_release_sockbuf(struct socket *so, int outgoing) { struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; thread_t tp = current_thread(); socket_lock_assert_owned(so); if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) panic("%s sb_cfil_thread %p not current %p", __func__, sb->sb_cfil_thread, tp); /* * Don't panic if we are defunct because SB_LOCK has * been cleared by sodefunct() */ if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) panic("%s SB_LOCK not set on %p", __func__, sb); /* * We can unlock when the thread unwinds to the last reference */ sb->sb_cfil_refs--; if (sb->sb_cfil_refs == 0) { sb->sb_cfil_thread = NULL; sb->sb_flags &= ~SB_LOCK; if (sb->sb_wantlock > 0) wakeup(&sb->sb_flags); } } cfil_sock_id_t cfil_sock_id_from_socket(struct socket *so) { if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) return (so->so_cfil->cfi_sock_id); else return (CFIL_SOCK_ID_NONE); } static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id) { struct socket *so = NULL; u_int64_t gencnt = cfil_sock_id >> 32; u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff); struct inpcb *inp = NULL; struct inpcbinfo *pcbinfo = &tcbinfo; lck_rw_lock_shared(pcbinfo->ipi_lock); LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { if (inp->inp_state != INPCB_STATE_DEAD && inp->inp_socket != NULL && inp->inp_flowhash == flowhash && (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt && inp->inp_socket->so_cfil != NULL) { so = inp->inp_socket; break; } } lck_rw_done(pcbinfo->ipi_lock); if (so == NULL) { OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found); CFIL_LOG(LOG_DEBUG, "no socket for sock_id %llx gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash); } return (so); } static errno_t cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m, int flags) { #pragma unused(kctlref, flags) errno_t error = 0; struct cfil_msg_hdr *msghdr; struct content_filter *cfc = (struct content_filter *)unitinfo; struct socket *so; struct cfil_msg_action *action_msg; struct cfil_entry *entry; CFIL_LOG(LOG_INFO, ""); if (content_filters == NULL) { CFIL_LOG(LOG_ERR, "no content filter"); error = EINVAL; goto done; } if (kcunit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", kcunit, MAX_CONTENT_FILTER); error = EINVAL; goto done; } if (m_length(m) < sizeof(struct cfil_msg_hdr)) { CFIL_LOG(LOG_ERR, "too short %u", m_length(m)); error = EINVAL; goto done; } msghdr = (struct cfil_msg_hdr *)mbuf_data(m); if (msghdr->cfm_version != CFM_VERSION_CURRENT) { CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version); error = EINVAL; goto done; } if (msghdr->cfm_type != CFM_TYPE_ACTION) { CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type); error = EINVAL; goto done; } /* Validate action operation */ switch (msghdr->cfm_op) { case CFM_OP_DATA_UPDATE: OSIncrementAtomic( &cfil_stats.cfs_ctl_action_data_update); break; case CFM_OP_DROP: OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop); break; default: OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op); CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op); error = EINVAL; goto done; } if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) { OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len); error = EINVAL; CFIL_LOG(LOG_ERR, "bad len: %u for op %u", msghdr->cfm_len, msghdr->cfm_op); goto done; } cfil_rw_lock_shared(&cfil_lck_rw); if (cfc != (void *)content_filters[kcunit - 1]) { CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", kcunit); error = EINVAL; cfil_rw_unlock_shared(&cfil_lck_rw); goto done; } so = cfil_socket_from_sock_id(msghdr->cfm_sock_id); if (so == NULL) { CFIL_LOG(LOG_NOTICE, "bad sock_id %llx", msghdr->cfm_sock_id); error = EINVAL; cfil_rw_unlock_shared(&cfil_lck_rw); goto done; } cfil_rw_unlock_shared(&cfil_lck_rw); socket_lock(so, 1); if (so->so_cfil == NULL) { CFIL_LOG(LOG_NOTICE, "so %llx not attached", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EINVAL; goto unlock; } else if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_NOTICE, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EINVAL; goto unlock; } entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (entry->cfe_filter == NULL) { CFIL_LOG(LOG_NOTICE, "so %llx no filter", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EINVAL; goto unlock; } if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) entry->cfe_flags |= CFEF_DATA_START; else { CFIL_LOG(LOG_ERR, "so %llx attached not sent for %u", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); error = EINVAL; goto unlock; } microuptime(&entry->cfe_last_action); action_msg = (struct cfil_msg_action *)msghdr; switch (msghdr->cfm_op) { case CFM_OP_DATA_UPDATE: if (action_msg->cfa_out_peek_offset != 0 || action_msg->cfa_out_pass_offset != 0) error = cfil_action_data_pass(so, kcunit, 1, action_msg->cfa_out_pass_offset, action_msg->cfa_out_peek_offset); if (error == EJUSTRETURN) error = 0; if (error != 0) break; if (action_msg->cfa_in_peek_offset != 0 || action_msg->cfa_in_pass_offset != 0) error = cfil_action_data_pass(so, kcunit, 0, action_msg->cfa_in_pass_offset, action_msg->cfa_in_peek_offset); if (error == EJUSTRETURN) error = 0; break; case CFM_OP_DROP: error = cfil_action_drop(so, kcunit); break; default: error = EINVAL; break; } unlock: socket_unlock(so, 1); done: mbuf_freem(m); if (error == 0) OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok); else OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad); return (error); } static errno_t cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int opt, void *data, size_t *len) { #pragma unused(kctlref, opt) errno_t error = 0; struct content_filter *cfc = (struct content_filter *)unitinfo; CFIL_LOG(LOG_NOTICE, ""); cfil_rw_lock_shared(&cfil_lck_rw); if (content_filters == NULL) { CFIL_LOG(LOG_ERR, "no content filter"); error = EINVAL; goto done; } if (kcunit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", kcunit, MAX_CONTENT_FILTER); error = EINVAL; goto done; } if (cfc != (void *)content_filters[kcunit - 1]) { CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", kcunit); error = EINVAL; goto done; } switch (opt) { case CFIL_OPT_NECP_CONTROL_UNIT: if (*len < sizeof(uint32_t)) { CFIL_LOG(LOG_ERR, "len too small %lu", *len); error = EINVAL; goto done; } if (data != NULL) *(uint32_t *)data = cfc->cf_necp_control_unit; break; default: error = ENOPROTOOPT; break; } done: cfil_rw_unlock_shared(&cfil_lck_rw); return (error); } static errno_t cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int opt, void *data, size_t len) { #pragma unused(kctlref, opt) errno_t error = 0; struct content_filter *cfc = (struct content_filter *)unitinfo; CFIL_LOG(LOG_NOTICE, ""); cfil_rw_lock_exclusive(&cfil_lck_rw); if (content_filters == NULL) { CFIL_LOG(LOG_ERR, "no content filter"); error = EINVAL; goto done; } if (kcunit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", kcunit, MAX_CONTENT_FILTER); error = EINVAL; goto done; } if (cfc != (void *)content_filters[kcunit - 1]) { CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", kcunit); error = EINVAL; goto done; } switch (opt) { case CFIL_OPT_NECP_CONTROL_UNIT: if (len < sizeof(uint32_t)) { CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " "len too small %lu", len); error = EINVAL; goto done; } if (cfc->cf_necp_control_unit != 0) { CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " "already set %u", cfc->cf_necp_control_unit); error = EINVAL; goto done; } cfc->cf_necp_control_unit = *(uint32_t *)data; break; default: error = ENOPROTOOPT; break; } done: cfil_rw_unlock_exclusive(&cfil_lck_rw); return (error); } static void cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags) { #pragma unused(kctlref, flags) struct content_filter *cfc = (struct content_filter *)unitinfo; struct socket *so = NULL; int error; struct cfil_entry *entry; CFIL_LOG(LOG_INFO, ""); if (content_filters == NULL) { CFIL_LOG(LOG_ERR, "no content filter"); OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); return; } if (kcunit > MAX_CONTENT_FILTER) { CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", kcunit, MAX_CONTENT_FILTER); OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); return; } cfil_rw_lock_shared(&cfil_lck_rw); if (cfc != (void *)content_filters[kcunit - 1]) { CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", kcunit); OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); goto done; } /* Let's assume the flow control is lifted */ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) cfil_rw_lock_exclusive(&cfil_lck_rw); cfc->cf_flags &= ~CFF_FLOW_CONTROLLED; cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw); lck_rw_assert(&cfil_lck_rw, LCK_RW_ASSERT_SHARED); } /* * Flow control will be raised again as soon as an entry cannot enqueue * to the kernel control socket */ while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) { verify_content_filter(cfc); cfil_rw_lock_assert_held(&cfil_lck_rw, 0); /* Find an entry that is flow controlled */ TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { if (entry->cfe_cfil_info == NULL || entry->cfe_cfil_info->cfi_so == NULL) continue; if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) continue; } if (entry == NULL) break; OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift); so = entry->cfe_cfil_info->cfi_so; cfil_rw_unlock_shared(&cfil_lck_rw); socket_lock(so, 1); do { error = cfil_acquire_sockbuf(so, 1); if (error == 0) error = cfil_data_service_ctl_q(so, kcunit, 1); cfil_release_sockbuf(so, 1); if (error != 0) break; error = cfil_acquire_sockbuf(so, 0); if (error == 0) error = cfil_data_service_ctl_q(so, kcunit, 0); cfil_release_sockbuf(so, 0); } while (0); socket_lock_assert_owned(so); socket_unlock(so, 1); cfil_rw_lock_shared(&cfil_lck_rw); } done: cfil_rw_unlock_shared(&cfil_lck_rw); } void cfil_init(void) { struct kern_ctl_reg kern_ctl; errno_t error = 0; vm_size_t content_filter_size = 0; /* size of content_filter */ vm_size_t cfil_info_size = 0; /* size of cfil_info */ CFIL_LOG(LOG_NOTICE, ""); /* * Compile time verifications */ _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER); _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0); _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0); _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0); /* * Runtime time verifications */ VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed, sizeof(uint32_t))); VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed, sizeof(uint32_t))); /* * Zone for content filters kernel control sockets */ content_filter_size = sizeof(struct content_filter); content_filter_zone = zinit(content_filter_size, CONTENT_FILTER_ZONE_MAX * content_filter_size, 0, CONTENT_FILTER_ZONE_NAME); if (content_filter_zone == NULL) { panic("%s: zinit(%s) failed", __func__, CONTENT_FILTER_ZONE_NAME); /* NOTREACHED */ } zone_change(content_filter_zone, Z_CALLERACCT, FALSE); zone_change(content_filter_zone, Z_EXPAND, TRUE); /* * Zone for per socket content filters */ cfil_info_size = sizeof(struct cfil_info); cfil_info_zone = zinit(cfil_info_size, CFIL_INFO_ZONE_MAX * cfil_info_size, 0, CFIL_INFO_ZONE_NAME); if (cfil_info_zone == NULL) { panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME); /* NOTREACHED */ } zone_change(cfil_info_zone, Z_CALLERACCT, FALSE); zone_change(cfil_info_zone, Z_EXPAND, TRUE); /* * Allocate locks */ cfil_lck_grp_attr = lck_grp_attr_alloc_init(); if (cfil_lck_grp_attr == NULL) { panic("%s: lck_grp_attr_alloc_init failed", __func__); /* NOTREACHED */ } cfil_lck_grp = lck_grp_alloc_init("content filter", cfil_lck_grp_attr); if (cfil_lck_grp == NULL) { panic("%s: lck_grp_alloc_init failed", __func__); /* NOTREACHED */ } cfil_lck_attr = lck_attr_alloc_init(); if (cfil_lck_attr == NULL) { panic("%s: lck_attr_alloc_init failed", __func__); /* NOTREACHED */ } lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr); TAILQ_INIT(&cfil_sock_head); /* * Register kernel control */ bzero(&kern_ctl, sizeof(kern_ctl)); strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */ kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */ kern_ctl.ctl_connect = cfil_ctl_connect; kern_ctl.ctl_disconnect = cfil_ctl_disconnect; kern_ctl.ctl_send = cfil_ctl_send; kern_ctl.ctl_getopt = cfil_ctl_getopt; kern_ctl.ctl_setopt = cfil_ctl_setopt; kern_ctl.ctl_rcvd = cfil_ctl_rcvd; error = ctl_register(&kern_ctl, &cfil_kctlref); if (error != 0) { CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error); return; } } struct cfil_info * cfil_info_alloc(struct socket *so) { int kcunit; struct cfil_info *cfil_info = NULL; struct inpcb *inp = sotoinpcb(so); CFIL_LOG(LOG_INFO, ""); socket_lock_assert_owned(so); cfil_info = zalloc(cfil_info_zone); if (cfil_info == NULL) goto done; bzero(cfil_info, sizeof(struct cfil_info)); cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q); cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { struct cfil_entry *entry; entry = &cfil_info->cfi_entries[kcunit - 1]; entry->cfe_cfil_info = cfil_info; /* Initialize the filter entry */ entry->cfe_filter = NULL; entry->cfe_flags = 0; entry->cfe_necp_control_unit = 0; entry->cfe_snd.cfe_pass_offset = 0; entry->cfe_snd.cfe_peek_offset = 0; entry->cfe_snd.cfe_peeked = 0; entry->cfe_rcv.cfe_pass_offset = 0; entry->cfe_rcv.cfe_peek_offset = 0; entry->cfe_rcv.cfe_peeked = 0; cfil_queue_init(&entry->cfe_snd.cfe_pending_q); cfil_queue_init(&entry->cfe_rcv.cfe_pending_q); cfil_queue_init(&entry->cfe_snd.cfe_ctl_q); cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q); } cfil_rw_lock_exclusive(&cfil_lck_rw); so->so_cfil = cfil_info; cfil_info->cfi_so = so; /* * Create a cfi_sock_id that's not the socket pointer! */ if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | inp->inp_flowhash); TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link); cfil_sock_attached_count++; cfil_rw_unlock_exclusive(&cfil_lck_rw); done: if (cfil_info != NULL) OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok); else OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail); return (cfil_info); } int cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit) { int kcunit; struct cfil_info *cfil_info = so->so_cfil; int attached = 0; CFIL_LOG(LOG_INFO, ""); socket_lock_assert_owned(so); cfil_rw_lock_exclusive(&cfil_lck_rw); for (kcunit = 1; content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; kcunit++) { struct content_filter *cfc = content_filters[kcunit - 1]; struct cfil_entry *entry; if (cfc == NULL) continue; if (cfc->cf_necp_control_unit != filter_control_unit) continue; entry = &cfil_info->cfi_entries[kcunit - 1]; entry->cfe_filter = cfc; entry->cfe_necp_control_unit = filter_control_unit; TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link); cfc->cf_sock_count++; verify_content_filter(cfc); attached = 1; entry->cfe_flags |= CFEF_CFIL_ATTACHED; break; } cfil_rw_unlock_exclusive(&cfil_lck_rw); return (attached); } static void cfil_info_free(struct socket *so, struct cfil_info *cfil_info) { int kcunit; uint64_t in_drain = 0; uint64_t out_drained = 0; so->so_cfil = NULL; if (so->so_flags & SOF_CONTENT_FILTER) { so->so_flags &= ~SOF_CONTENT_FILTER; so->so_usecount--; } if (cfil_info == NULL) return; CFIL_LOG(LOG_INFO, ""); cfil_rw_lock_exclusive(&cfil_lck_rw); for (kcunit = 1; content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; kcunit++) { struct cfil_entry *entry; struct content_filter *cfc; entry = &cfil_info->cfi_entries[kcunit - 1]; /* Don't be silly and try to detach twice */ if (entry->cfe_filter == NULL) continue; cfc = content_filters[kcunit - 1]; VERIFY(cfc == entry->cfe_filter); entry->cfe_filter = NULL; entry->cfe_necp_control_unit = 0; TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); cfc->cf_sock_count--; verify_content_filter(cfc); } cfil_sock_attached_count--; TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link); out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q); in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { struct cfil_entry *entry; entry = &cfil_info->cfi_entries[kcunit - 1]; out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q); in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q); out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q); } cfil_rw_unlock_exclusive(&cfil_lck_rw); if (out_drained) OSIncrementAtomic(&cfil_stats.cfs_flush_out_free); if (in_drain) OSIncrementAtomic(&cfil_stats.cfs_flush_in_free); zfree(cfil_info_zone, cfil_info); } /* * Entry point from Sockets layer * The socket is locked. */ errno_t cfil_sock_attach(struct socket *so) { errno_t error = 0; uint32_t filter_control_unit; socket_lock_assert_owned(so); /* Limit ourselves to TCP */ if ((so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || so->so_proto->pr_type != SOCK_STREAM || so->so_proto->pr_protocol != IPPROTO_TCP) goto done; filter_control_unit = necp_socket_get_content_filter_control_unit(so); if (filter_control_unit == 0) goto done; if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) { OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only); goto done; } if (cfil_active_count == 0) { OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain); goto done; } if (so->so_cfil != NULL) { OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already); CFIL_LOG(LOG_ERR, "already attached"); } else { cfil_info_alloc(so); if (so->so_cfil == NULL) { error = ENOMEM; OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem); goto done; } } if (cfil_info_attach_unit(so, filter_control_unit) == 0) { CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed", filter_control_unit); OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed); goto done; } CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx", (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, so->so_cfil->cfi_sock_id); so->so_flags |= SOF_CONTENT_FILTER; OSIncrementAtomic(&cfil_stats.cfs_sock_attached); /* Hold a reference on the socket */ so->so_usecount++; error = cfil_dispatch_attach_event(so, filter_control_unit); /* We can recover from flow control or out of memory errors */ if (error == ENOBUFS || error == ENOMEM) error = 0; else if (error != 0) goto done; CFIL_INFO_VERIFY(so->so_cfil); done: return (error); } /* * Entry point from Sockets layer * The socket is locked. */ errno_t cfil_sock_detach(struct socket *so) { if (so->so_cfil) { cfil_info_free(so, so->so_cfil); OSIncrementAtomic(&cfil_stats.cfs_sock_detached); } return (0); } static int cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit) { errno_t error = 0; struct cfil_entry *entry = NULL; struct cfil_msg_sock_attached msg_attached; uint32_t kcunit; struct content_filter *cfc; socket_lock_assert_owned(so); cfil_rw_lock_shared(&cfil_lck_rw); if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) { error = EINVAL; goto done; } /* * Find the matching filter unit */ for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { cfc = content_filters[kcunit - 1]; if (cfc == NULL) continue; if (cfc->cf_necp_control_unit != filter_control_unit) continue; entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (entry->cfe_filter == NULL) continue; VERIFY(cfc == entry->cfe_filter); break; } if (entry == NULL || entry->cfe_filter == NULL) goto done; if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) goto done; CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u", (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit); /* Would be wasteful to try when flow controlled */ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { error = ENOBUFS; goto done; } bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached)); msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached); msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT; msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT; msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED; msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family; msg_attached.cfs_sock_type = so->so_proto->pr_type; msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol; msg_attached.cfs_pid = so->last_pid; memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t)); if (so->so_flags & SOF_DELEGATED) { msg_attached.cfs_e_pid = so->e_pid; memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t)); } else { msg_attached.cfs_e_pid = so->last_pid; memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t)); } error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, entry->cfe_filter->cf_kcunit, &msg_attached, sizeof(struct cfil_msg_sock_attached), CTL_DATA_EOR); if (error != 0) { CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error); goto done; } microuptime(&entry->cfe_last_event); entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED; OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok); done: /* We can recover from flow control */ if (error == ENOBUFS) { entry->cfe_flags |= CFEF_FLOW_CONTROLLED; OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control); if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) cfil_rw_lock_exclusive(&cfil_lck_rw); cfc->cf_flags |= CFF_FLOW_CONTROLLED; cfil_rw_unlock_exclusive(&cfil_lck_rw); } else { if (error != 0) OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail); cfil_rw_unlock_shared(&cfil_lck_rw); } return (error); } static int cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing) { errno_t error = 0; struct mbuf *msg = NULL; struct cfil_entry *entry; struct cfe_buf *entrybuf; struct cfil_msg_hdr msg_disconnected; struct content_filter *cfc; socket_lock_assert_owned(so); cfil_rw_lock_shared(&cfil_lck_rw); entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; cfc = entry->cfe_filter; if (cfc == NULL) goto done; CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); /* * Send the disconnection event once */ if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) || (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) { CFIL_LOG(LOG_INFO, "so %llx disconnect already sent", (uint64_t)VM_KERNEL_ADDRPERM(so)); goto done; } /* * We're not disconnected as long as some data is waiting * to be delivered to the filter */ if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) { CFIL_LOG(LOG_INFO, "so %llx control queue not empty", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EBUSY; goto done; } /* Would be wasteful to try when flow controlled */ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { error = ENOBUFS; goto done; } bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr)); msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr); msg_disconnected.cfm_version = CFM_VERSION_CURRENT; msg_disconnected.cfm_type = CFM_TYPE_EVENT; msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT : CFM_OP_DISCONNECT_IN; msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, entry->cfe_filter->cf_kcunit, &msg_disconnected, sizeof(struct cfil_msg_hdr), CTL_DATA_EOR); if (error != 0) { CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); mbuf_freem(msg); goto done; } microuptime(&entry->cfe_last_event); /* Remember we have sent the disconnection message */ if (outgoing) { entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT; OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok); } else { entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN; OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok); } done: if (error == ENOBUFS) { entry->cfe_flags |= CFEF_FLOW_CONTROLLED; OSIncrementAtomic( &cfil_stats.cfs_disconnect_event_flow_control); if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) cfil_rw_lock_exclusive(&cfil_lck_rw); cfc->cf_flags |= CFF_FLOW_CONTROLLED; cfil_rw_unlock_exclusive(&cfil_lck_rw); } else { if (error != 0) OSIncrementAtomic( &cfil_stats.cfs_disconnect_event_fail); cfil_rw_unlock_shared(&cfil_lck_rw); } return (error); } int cfil_dispatch_closed_event(struct socket *so, int kcunit) { struct cfil_entry *entry; struct cfil_msg_hdr msg_closed; errno_t error = 0; struct content_filter *cfc; socket_lock_assert_owned(so); cfil_rw_lock_shared(&cfil_lck_rw); entry = &so->so_cfil->cfi_entries[kcunit - 1]; cfc = entry->cfe_filter; if (cfc == NULL) goto done; CFIL_LOG(LOG_INFO, "so %llx kcunit %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); /* Would be wasteful to try when flow controlled */ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { error = ENOBUFS; goto done; } /* * Send a single closed message per filter */ if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) goto done; if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) goto done; bzero(&msg_closed, sizeof(struct cfil_msg_hdr)); msg_closed.cfm_len = sizeof(struct cfil_msg_hdr); msg_closed.cfm_version = CFM_VERSION_CURRENT; msg_closed.cfm_type = CFM_TYPE_EVENT; msg_closed.cfm_op = CFM_OP_SOCKET_CLOSED; msg_closed.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, entry->cfe_filter->cf_kcunit, &msg_closed, sizeof(struct cfil_msg_hdr), CTL_DATA_EOR); if (error != 0) { CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error); goto done; } microuptime(&entry->cfe_last_event); entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED; OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok); done: /* We can recover from flow control */ if (error == ENOBUFS) { entry->cfe_flags |= CFEF_FLOW_CONTROLLED; OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control); if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) cfil_rw_lock_exclusive(&cfil_lck_rw); cfc->cf_flags |= CFF_FLOW_CONTROLLED; cfil_rw_unlock_exclusive(&cfil_lck_rw); } else { if (error != 0) OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail); cfil_rw_unlock_shared(&cfil_lck_rw); } return (error); } static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46, struct in6_addr *ip6, u_int16_t port) { struct sockaddr_in6 *sin6 = &sin46->sin6; sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *ip6; if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) { sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); sin6->sin6_addr.s6_addr16[1] = 0; } } static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46, struct in_addr ip, u_int16_t port) { struct sockaddr_in *sin = &sin46->sin; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_port = port; sin->sin_addr.s_addr = ip.s_addr; } static int cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing, struct mbuf *data, unsigned int copyoffset, unsigned int copylen) { errno_t error = 0; struct mbuf *copy = NULL; struct mbuf *msg = NULL; unsigned int one = 1; struct cfil_msg_data_event *data_req; size_t hdrsize; struct inpcb *inp = (struct inpcb *)so->so_pcb; struct cfil_entry *entry; struct cfe_buf *entrybuf; struct content_filter *cfc; cfil_rw_lock_shared(&cfil_lck_rw); entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; cfc = entry->cfe_filter; if (cfc == NULL) goto done; CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); socket_lock_assert_owned(so); /* Would be wasteful to try */ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { error = ENOBUFS; goto done; } /* Make a copy of the data to pass to kernel control socket */ copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, M_COPYM_NOOP_HDR); if (copy == NULL) { CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); error = ENOMEM; goto done; } /* We need an mbuf packet for the message header */ hdrsize = sizeof(struct cfil_msg_data_event); error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg); if (error != 0) { CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed"); m_freem(copy); /* * ENOBUFS is to indicate flow control */ error = ENOMEM; goto done; } mbuf_setlen(msg, hdrsize); mbuf_pkthdr_setlen(msg, hdrsize + copylen); msg->m_next = copy; data_req = (struct cfil_msg_data_event *)mbuf_data(msg); bzero(data_req, hdrsize); data_req->cfd_msghdr.cfm_len = hdrsize + copylen; data_req->cfd_msghdr.cfm_version = 1; data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT; data_req->cfd_msghdr.cfm_op = outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN; data_req->cfd_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; data_req->cfd_start_offset = entrybuf->cfe_peeked; data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen; /* * TBD: * For non connected sockets need to copy addresses from passed * parameters */ if (inp->inp_vflag & INP_IPV6) { if (outgoing) { fill_ip6_sockaddr_4_6(&data_req->cfc_src, &inp->in6p_laddr, inp->inp_lport); fill_ip6_sockaddr_4_6(&data_req->cfc_dst, &inp->in6p_faddr, inp->inp_fport); } else { fill_ip6_sockaddr_4_6(&data_req->cfc_src, &inp->in6p_faddr, inp->inp_fport); fill_ip6_sockaddr_4_6(&data_req->cfc_dst, &inp->in6p_laddr, inp->inp_lport); } } else if (inp->inp_vflag & INP_IPV4) { if (outgoing) { fill_ip_sockaddr_4_6(&data_req->cfc_src, inp->inp_laddr, inp->inp_lport); fill_ip_sockaddr_4_6(&data_req->cfc_dst, inp->inp_faddr, inp->inp_fport); } else { fill_ip_sockaddr_4_6(&data_req->cfc_src, inp->inp_faddr, inp->inp_fport); fill_ip_sockaddr_4_6(&data_req->cfc_dst, inp->inp_laddr, inp->inp_lport); } } /* Pass the message to the content filter */ error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref, entry->cfe_filter->cf_kcunit, msg, CTL_DATA_EOR); if (error != 0) { CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); mbuf_freem(msg); goto done; } entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED; OSIncrementAtomic(&cfil_stats.cfs_data_event_ok); done: if (error == ENOBUFS) { entry->cfe_flags |= CFEF_FLOW_CONTROLLED; OSIncrementAtomic( &cfil_stats.cfs_data_event_flow_control); if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) cfil_rw_lock_exclusive(&cfil_lck_rw); cfc->cf_flags |= CFF_FLOW_CONTROLLED; cfil_rw_unlock_exclusive(&cfil_lck_rw); } else { if (error != 0) OSIncrementAtomic(&cfil_stats.cfs_data_event_fail); cfil_rw_unlock_shared(&cfil_lck_rw); } return (error); } /* * Process the queue of data waiting to be delivered to content filter */ static int cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing) { errno_t error = 0; struct mbuf *data, *tmp = NULL; unsigned int datalen = 0, copylen = 0, copyoffset = 0; struct cfil_entry *entry; struct cfe_buf *entrybuf; uint64_t currentoffset = 0; if (so->so_cfil == NULL) return (0); CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); socket_lock_assert_owned(so); entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; /* Send attached message if not yet done */ if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) { error = cfil_dispatch_attach_event(so, kcunit); if (error != 0) { /* We can recover from flow control */ if (error == ENOBUFS || error == ENOMEM) error = 0; goto done; } } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) { OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started); goto done; } CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu", entrybuf->cfe_pass_offset, entrybuf->cfe_peeked, entrybuf->cfe_peek_offset); /* Move all data that can pass */ while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL && entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) { datalen = cfil_data_length(data, NULL); tmp = data; if (entrybuf->cfe_ctl_q.q_start + datalen <= entrybuf->cfe_pass_offset) { /* * The first mbuf can fully pass */ copylen = datalen; } else { /* * The first mbuf can partially pass */ copylen = entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start; } VERIFY(copylen <= datalen); CFIL_LOG(LOG_DEBUG, "%llx first %llu peeked %llu pass %llu peek %llu" "datalen %u copylen %u", (uint64_t)VM_KERNEL_ADDRPERM(tmp), entrybuf->cfe_ctl_q.q_start, entrybuf->cfe_peeked, entrybuf->cfe_pass_offset, entrybuf->cfe_peek_offset, datalen, copylen); /* * Data that passes has been peeked at explicitly or * implicitly */ if (entrybuf->cfe_ctl_q.q_start + copylen > entrybuf->cfe_peeked) entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start + copylen; /* * Stop on partial pass */ if (copylen < datalen) break; /* All good, move full data from ctl queue to pending queue */ cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen); cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen); if (outgoing) OSAddAtomic64(datalen, &cfil_stats.cfs_pending_q_out_enqueued); else OSAddAtomic64(datalen, &cfil_stats.cfs_pending_q_in_enqueued); } CFIL_INFO_VERIFY(so->so_cfil); if (tmp != NULL) CFIL_LOG(LOG_DEBUG, "%llx first %llu peeked %llu pass %llu peek %llu" "datalen %u copylen %u", (uint64_t)VM_KERNEL_ADDRPERM(tmp), entrybuf->cfe_ctl_q.q_start, entrybuf->cfe_peeked, entrybuf->cfe_pass_offset, entrybuf->cfe_peek_offset, datalen, copylen); tmp = NULL; /* Now deal with remaining data the filter wants to peek at */ for (data = cfil_queue_first(&entrybuf->cfe_ctl_q), currentoffset = entrybuf->cfe_ctl_q.q_start; data != NULL && currentoffset < entrybuf->cfe_peek_offset; data = cfil_queue_next(&entrybuf->cfe_ctl_q, data), currentoffset += datalen) { datalen = cfil_data_length(data, NULL); tmp = data; /* We've already peeked at this mbuf */ if (currentoffset + datalen <= entrybuf->cfe_peeked) continue; /* * The data in the first mbuf may have been * partially peeked at */ copyoffset = entrybuf->cfe_peeked - currentoffset; VERIFY(copyoffset < datalen); copylen = datalen - copyoffset; VERIFY(copylen <= datalen); /* * Do not copy more than needed */ if (currentoffset + copyoffset + copylen > entrybuf->cfe_peek_offset) { copylen = entrybuf->cfe_peek_offset - (currentoffset + copyoffset); } CFIL_LOG(LOG_DEBUG, "%llx current %llu peeked %llu pass %llu peek %llu" "datalen %u copylen %u copyoffset %u", (uint64_t)VM_KERNEL_ADDRPERM(tmp), currentoffset, entrybuf->cfe_peeked, entrybuf->cfe_pass_offset, entrybuf->cfe_peek_offset, datalen, copylen, copyoffset); /* * Stop if there is nothing more to peek at */ if (copylen == 0) break; /* * Let the filter get a peek at this span of data */ error = cfil_dispatch_data_event(so, kcunit, outgoing, data, copyoffset, copylen); if (error != 0) { /* On error, leave data in ctl_q */ break; } entrybuf->cfe_peeked += copylen; if (outgoing) OSAddAtomic64(copylen, &cfil_stats.cfs_ctl_q_out_peeked); else OSAddAtomic64(copylen, &cfil_stats.cfs_ctl_q_in_peeked); /* Stop when data could not be fully peeked at */ if (copylen + copyoffset < datalen) break; } CFIL_INFO_VERIFY(so->so_cfil); if (tmp != NULL) CFIL_LOG(LOG_DEBUG, "%llx first %llu peeked %llu pass %llu peek %llu" "datalen %u copylen %u copyoffset %u", (uint64_t)VM_KERNEL_ADDRPERM(tmp), currentoffset, entrybuf->cfe_peeked, entrybuf->cfe_pass_offset, entrybuf->cfe_peek_offset, datalen, copylen, copyoffset); /* * Process data that has passed the filter */ error = cfil_service_pending_queue(so, kcunit, outgoing); if (error != 0) { CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d", error); goto done; } /* * Dispatch disconnect events that could not be sent */ if (so->so_cfil == NULL) goto done; else if (outgoing) { if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) && !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) cfil_dispatch_disconnect_event(so, kcunit, 1); } else { if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) && !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) cfil_dispatch_disconnect_event(so, kcunit, 0); } done: CFIL_LOG(LOG_DEBUG, "first %llu peeked %llu pass %llu peek %llu", entrybuf->cfe_ctl_q.q_start, entrybuf->cfe_peeked, entrybuf->cfe_pass_offset, entrybuf->cfe_peek_offset); CFIL_INFO_VERIFY(so->so_cfil); return (error); } /* * cfil_data_filter() * * Process data for a content filter installed on a socket */ int cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing, struct mbuf *data, uint64_t datalen) { errno_t error = 0; struct cfil_entry *entry; struct cfe_buf *entrybuf; CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); socket_lock_assert_owned(so); entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; /* Are we attached to the filter? */ if (entry->cfe_filter == NULL) { error = 0; goto done; } /* Dispatch to filters */ cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen); if (outgoing) OSAddAtomic64(datalen, &cfil_stats.cfs_ctl_q_out_enqueued); else OSAddAtomic64(datalen, &cfil_stats.cfs_ctl_q_in_enqueued); error = cfil_data_service_ctl_q(so, kcunit, outgoing); if (error != 0) { CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", error); } /* * We have to return EJUSTRETURN in all cases to avoid double free * by socket layer */ error = EJUSTRETURN; done: CFIL_INFO_VERIFY(so->so_cfil); CFIL_LOG(LOG_INFO, "return %d", error); return (error); } /* * cfil_service_inject_queue() re-inject data that passed the * content filters */ static int cfil_service_inject_queue(struct socket *so, int outgoing) { mbuf_t data; unsigned int datalen; int mbcnt; unsigned int copylen; errno_t error = 0; struct mbuf *copy = NULL; struct cfi_buf *cfi_buf; struct cfil_queue *inject_q; int need_rwakeup = 0; if (so->so_cfil == NULL) return (0); CFIL_LOG(LOG_INFO, "so %llx outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); socket_lock_assert_owned(so); if (outgoing) { cfi_buf = &so->so_cfil->cfi_snd; so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT; } else { cfi_buf = &so->so_cfil->cfi_rcv; so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN; } inject_q = &cfi_buf->cfi_inject_q; while ((data = cfil_queue_first(inject_q)) != NULL) { datalen = cfil_data_length(data, &mbcnt); CFIL_LOG(LOG_INFO, "data %llx datalen %u", (uint64_t)VM_KERNEL_ADDRPERM(data), datalen); /* Make a copy in case of injection error */ copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR); if (copy == NULL) { CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); error = ENOMEM; break; } if ((copylen = m_length(copy)) != datalen) panic("%s so %p copylen %d != datalen %d", __func__, so, copylen, datalen); if (outgoing) { socket_unlock(so, 0); /* * Set both DONTWAIT and NBIO flags are we really * do not want to block */ error = sosend(so, NULL, NULL, copy, NULL, MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO); socket_lock(so, 0); if (error != 0) { CFIL_LOG(LOG_ERR, "sosend() failed %d", error); } } else { copy->m_flags |= M_SKIPCFIL; /* * NOTE: * This work only because we support plain TCP * For UDP, RAWIP, MPTCP and message TCP we'll * need to call the appropriate sbappendxxx() * of fix sock_inject_data_in() */ if (sbappendstream(&so->so_rcv, copy)) need_rwakeup = 1; } /* Need to reassess if filter is still attached after unlock */ if (so->so_cfil == NULL) { CFIL_LOG(LOG_ERR, "so %llx cfil detached", (uint64_t)VM_KERNEL_ADDRPERM(so)); OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached); error = 0; break; } if (error != 0) break; /* Injection successful */ cfil_queue_remove(inject_q, data, datalen); mbuf_freem(data); cfi_buf->cfi_pending_first += datalen; cfi_buf->cfi_pending_mbcnt -= mbcnt; cfil_info_buf_verify(cfi_buf); if (outgoing) OSAddAtomic64(datalen, &cfil_stats.cfs_inject_q_out_passed); else OSAddAtomic64(datalen, &cfil_stats.cfs_inject_q_in_passed); } /* A single wakeup is for several packets is more efficient */ if (need_rwakeup) sorwakeup(so); if (error != 0 && so->so_cfil) { if (error == ENOBUFS) OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs); if (error == ENOMEM) OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem); if (outgoing) { so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT; OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail); } else { so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN; OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail); } } /* * Notify */ if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) { cfil_sock_notify_shutdown(so, SHUT_WR); if (cfil_sock_data_pending(&so->so_snd) == 0) soshutdownlock_final(so, SHUT_WR); } if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { if (cfil_filters_attached(so) == 0) { CFIL_LOG(LOG_INFO, "so %llx waking", (uint64_t)VM_KERNEL_ADDRPERM(so)); wakeup((caddr_t)&so->so_cfil); } } CFIL_INFO_VERIFY(so->so_cfil); return (error); } static int cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing) { uint64_t passlen, curlen; mbuf_t data; unsigned int datalen; errno_t error = 0; struct cfil_entry *entry; struct cfe_buf *entrybuf; struct cfil_queue *pending_q; CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); socket_lock_assert_owned(so); entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; pending_q = &entrybuf->cfe_pending_q; passlen = entrybuf->cfe_pass_offset - pending_q->q_start; /* * Locate the chunks of data that we can pass to the next filter * A data chunk must be on mbuf boundaries */ curlen = 0; while ((data = cfil_queue_first(pending_q)) != NULL) { datalen = cfil_data_length(data, NULL); CFIL_LOG(LOG_INFO, "data %llx datalen %u passlen %llu curlen %llu", (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, passlen, curlen); if (curlen + datalen > passlen) break; cfil_queue_remove(pending_q, data, datalen); curlen += datalen; for (kcunit += 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { error = cfil_data_filter(so, kcunit, outgoing, data, datalen); /* 0 means passed so we can continue */ if (error != 0) break; } /* When data has passed all filters, re-inject */ if (error == 0) { if (outgoing) { cfil_queue_enqueue( &so->so_cfil->cfi_snd.cfi_inject_q, data, datalen); OSAddAtomic64(datalen, &cfil_stats.cfs_inject_q_out_enqueued); } else { cfil_queue_enqueue( &so->so_cfil->cfi_rcv.cfi_inject_q, data, datalen); OSAddAtomic64(datalen, &cfil_stats.cfs_inject_q_in_enqueued); } } } CFIL_INFO_VERIFY(so->so_cfil); return (error); } int cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing, uint64_t pass_offset, uint64_t peek_offset) { errno_t error = 0; struct cfil_entry *entry; struct cfe_buf *entrybuf; int updated = 0; CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset); socket_lock_assert_owned(so); if (so->so_cfil == NULL) { CFIL_LOG(LOG_ERR, "so %llx cfil detached", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = 0; goto done; } else if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EPIPE; goto done; } entry = &so->so_cfil->cfi_entries[kcunit - 1]; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; /* Record updated offsets for this content filter */ if (pass_offset > entrybuf->cfe_pass_offset) { entrybuf->cfe_pass_offset = pass_offset; if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; updated = 1; } else { CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu", pass_offset, entrybuf->cfe_pass_offset); } /* Filter does not want or need to see data that's allowed to pass */ if (peek_offset > entrybuf->cfe_pass_offset && peek_offset > entrybuf->cfe_peek_offset) { entrybuf->cfe_peek_offset = peek_offset; updated = 1; } /* Nothing to do */ if (updated == 0) goto done; /* Move data held in control queue to pending queue if needed */ error = cfil_data_service_ctl_q(so, kcunit, outgoing); if (error != 0) { CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", error); goto done; } error = EJUSTRETURN; done: /* * The filter is effectively detached when pass all from both sides * or when the socket is closed and no more data is waiting * to be delivered to the filter */ if (so->so_cfil != NULL && ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET && entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) || ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) && cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) { entry->cfe_flags |= CFEF_CFIL_DETACHED; CFIL_LOG(LOG_INFO, "so %llx detached %u", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && cfil_filters_attached(so) == 0) { CFIL_LOG(LOG_INFO, "so %llx waking", (uint64_t)VM_KERNEL_ADDRPERM(so)); wakeup((caddr_t)&so->so_cfil); } } CFIL_INFO_VERIFY(so->so_cfil); CFIL_LOG(LOG_INFO, "return %d", error); return (error); } /* * Update pass offset for socket when no data is pending */ static int cfil_set_socket_pass_offset(struct socket *so, int outgoing) { struct cfi_buf *cfi_buf; struct cfil_entry *entry; struct cfe_buf *entrybuf; uint32_t kcunit; uint64_t pass_offset = 0; if (so->so_cfil == NULL) return (0); CFIL_LOG(LOG_INFO, "so %llx outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); socket_lock_assert_owned(so); if (outgoing) cfi_buf = &so->so_cfil->cfi_snd; else cfi_buf = &so->so_cfil->cfi_rcv; if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) { for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { entry = &so->so_cfil->cfi_entries[kcunit - 1]; /* Are we attached to a filter? */ if (entry->cfe_filter == NULL) continue; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; if (pass_offset == 0 || entrybuf->cfe_pass_offset < pass_offset) pass_offset = entrybuf->cfe_pass_offset; } cfi_buf->cfi_pass_offset = pass_offset; } return (0); } int cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing, uint64_t pass_offset, uint64_t peek_offset) { errno_t error = 0; CFIL_LOG(LOG_INFO, ""); socket_lock_assert_owned(so); error = cfil_acquire_sockbuf(so, outgoing); if (error != 0) { CFIL_LOG(LOG_INFO, "so %llx %s dropped", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing ? "out" : "in"); goto release; } error = cfil_update_data_offsets(so, kcunit, outgoing, pass_offset, peek_offset); cfil_service_inject_queue(so, outgoing); cfil_set_socket_pass_offset(so, outgoing); release: CFIL_INFO_VERIFY(so->so_cfil); cfil_release_sockbuf(so, outgoing); return (error); } static void cfil_flush_queues(struct socket *so) { struct cfil_entry *entry; int kcunit; uint64_t drained; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) goto done; socket_lock_assert_owned(so); /* * Flush the output queues and ignore errors as long as * we are attached */ (void) cfil_acquire_sockbuf(so, 1); if (so->so_cfil != NULL) { drained = 0; for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { entry = &so->so_cfil->cfi_entries[kcunit - 1]; drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); drained += cfil_queue_drain( &entry->cfe_snd.cfe_pending_q); } drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q); if (drained) { if (so->so_cfil->cfi_flags & CFIF_DROP) OSIncrementAtomic( &cfil_stats.cfs_flush_out_drop); else OSIncrementAtomic( &cfil_stats.cfs_flush_out_close); } } cfil_release_sockbuf(so, 1); /* * Flush the input queues */ (void) cfil_acquire_sockbuf(so, 0); if (so->so_cfil != NULL) { drained = 0; for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { entry = &so->so_cfil->cfi_entries[kcunit - 1]; drained += cfil_queue_drain( &entry->cfe_rcv.cfe_ctl_q); drained += cfil_queue_drain( &entry->cfe_rcv.cfe_pending_q); } drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q); if (drained) { if (so->so_cfil->cfi_flags & CFIF_DROP) OSIncrementAtomic( &cfil_stats.cfs_flush_in_drop); else OSIncrementAtomic( &cfil_stats.cfs_flush_in_close); } } cfil_release_sockbuf(so, 0); done: CFIL_INFO_VERIFY(so->so_cfil); } int cfil_action_drop(struct socket *so, uint32_t kcunit) { errno_t error = 0; struct cfil_entry *entry; struct proc *p; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) goto done; socket_lock_assert_owned(so); entry = &so->so_cfil->cfi_entries[kcunit - 1]; /* Are we attached to the filter? */ if (entry->cfe_filter == NULL) goto done; so->so_cfil->cfi_flags |= CFIF_DROP; p = current_proc(); /* Force the socket to be marked defunct */ error = sosetdefunct(p, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, 1); /* Flush the socket buffer and disconnect */ if (error == 0) error = sodefunct(p, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL); /* The filter is done, mark as detached */ entry->cfe_flags |= CFEF_CFIL_DETACHED; CFIL_LOG(LOG_INFO, "so %llx detached %u", (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); /* Pending data needs to go */ cfil_flush_queues(so); if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { if (cfil_filters_attached(so) == 0) { CFIL_LOG(LOG_INFO, "so %llx waking", (uint64_t)VM_KERNEL_ADDRPERM(so)); wakeup((caddr_t)&so->so_cfil); } } done: return (error); } static int cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen) { struct cfil_entry *entry; struct cfe_buf *entrybuf; uint32_t kcunit; CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { entry = &so->so_cfil->cfi_entries[kcunit - 1]; /* Are we attached to the filter? */ if (entry->cfe_filter == NULL) continue; if (outgoing) entrybuf = &entry->cfe_snd; else entrybuf = &entry->cfe_rcv; entrybuf->cfe_ctl_q.q_start += datalen; entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start; entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start; if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; entrybuf->cfe_ctl_q.q_end += datalen; entrybuf->cfe_pending_q.q_start += datalen; entrybuf->cfe_pending_q.q_end += datalen; } CFIL_INFO_VERIFY(so->so_cfil); return (0); } int cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to, struct mbuf *data, struct mbuf *control, uint32_t flags) { #pragma unused(to, control, flags) errno_t error = 0; unsigned int datalen; int mbcnt; int kcunit; struct cfi_buf *cfi_buf; if (so->so_cfil == NULL) { CFIL_LOG(LOG_ERR, "so %llx cfil detached", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = 0; goto done; } else if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); error = EPIPE; goto done; } datalen = cfil_data_length(data, &mbcnt); CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing ? "out" : "in", (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags, (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt)); if (outgoing) cfi_buf = &so->so_cfil->cfi_snd; else cfi_buf = &so->so_cfil->cfi_rcv; cfi_buf->cfi_pending_last += datalen; cfi_buf->cfi_pending_mbcnt += mbcnt; cfil_info_buf_verify(cfi_buf); CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu", (uint64_t)VM_KERNEL_ADDRPERM(so), cfi_buf->cfi_pending_last, cfi_buf->cfi_pass_offset); /* Fast path when below pass offset */ if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) { cfil_update_entry_offsets(so, outgoing, datalen); } else { for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { error = cfil_data_filter(so, kcunit, outgoing, data, datalen); /* 0 means passed so continue with next filter */ if (error != 0) break; } } /* Move cursor if no filter claimed the data */ if (error == 0) { cfi_buf->cfi_pending_first += datalen; cfi_buf->cfi_pending_mbcnt -= mbcnt; cfil_info_buf_verify(cfi_buf); } done: CFIL_INFO_VERIFY(so->so_cfil); return (error); } /* * Callback from socket layer sosendxxx() */ int cfil_sock_data_out(struct socket *so, struct sockaddr *to, struct mbuf *data, struct mbuf *control, uint32_t flags) { int error = 0; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return (0); socket_lock_assert_owned(so); if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); return (EPIPE); } if (control != NULL) { CFIL_LOG(LOG_ERR, "so %llx control", (uint64_t)VM_KERNEL_ADDRPERM(so)); OSIncrementAtomic(&cfil_stats.cfs_data_out_control); } if ((flags & MSG_OOB)) { CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", (uint64_t)VM_KERNEL_ADDRPERM(so)); OSIncrementAtomic(&cfil_stats.cfs_data_out_oob); } if ((so->so_snd.sb_flags & SB_LOCK) == 0) panic("so %p SB_LOCK not set", so); if (so->so_snd.sb_cfil_thread != NULL) panic("%s sb_cfil_thread %p not NULL", __func__, so->so_snd.sb_cfil_thread); error = cfil_data_common(so, 1, to, data, control, flags); return (error); } /* * Callback from socket layer sbappendxxx() */ int cfil_sock_data_in(struct socket *so, struct sockaddr *from, struct mbuf *data, struct mbuf *control, uint32_t flags) { int error = 0; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return (0); socket_lock_assert_owned(so); if (so->so_cfil->cfi_flags & CFIF_DROP) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); return (EPIPE); } if (control != NULL) { CFIL_LOG(LOG_ERR, "so %llx control", (uint64_t)VM_KERNEL_ADDRPERM(so)); OSIncrementAtomic(&cfil_stats.cfs_data_in_control); } if (data->m_type == MT_OOBDATA) { CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", (uint64_t)VM_KERNEL_ADDRPERM(so)); OSIncrementAtomic(&cfil_stats.cfs_data_in_oob); } error = cfil_data_common(so, 0, from, data, control, flags); return (error); } /* * Callback from socket layer soshutdownxxx() * * We may delay the shutdown write if there's outgoing data in process. * * There is no point in delaying the shutdown read because the process * indicated that it does not want to read anymore data. */ int cfil_sock_shutdown(struct socket *so, int *how) { int error = 0; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) goto done; socket_lock_assert_owned(so); CFIL_LOG(LOG_INFO, "so %llx how %d", (uint64_t)VM_KERNEL_ADDRPERM(so), *how); /* * Check the state of the socket before the content filter */ if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) { /* read already shut down */ error = ENOTCONN; goto done; } if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) { /* write already shut down */ error = ENOTCONN; goto done; } if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) { CFIL_LOG(LOG_ERR, "so %llx drop set", (uint64_t)VM_KERNEL_ADDRPERM(so)); goto done; } /* * shutdown read: SHUT_RD or SHUT_RDWR */ if (*how != SHUT_WR) { if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) { error = ENOTCONN; goto done; } so->so_cfil->cfi_flags |= CFIF_SHUT_RD; cfil_sock_notify_shutdown(so, SHUT_RD); } /* * shutdown write: SHUT_WR or SHUT_RDWR */ if (*how != SHUT_RD) { if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) { error = ENOTCONN; goto done; } so->so_cfil->cfi_flags |= CFIF_SHUT_WR; cfil_sock_notify_shutdown(so, SHUT_WR); /* * When outgoing data is pending, we delay the shutdown at the * protocol level until the content filters give the final * verdict on the pending data. */ if (cfil_sock_data_pending(&so->so_snd) != 0) { /* * When shutting down the read and write sides at once * we can proceed to the final shutdown of the read * side. Otherwise, we just return. */ if (*how == SHUT_WR) { error = EJUSTRETURN; } else if (*how == SHUT_RDWR) { *how = SHUT_RD; } } } done: return (error); } /* * This is called when the socket is closed and there is no more * opportunity for filtering */ void cfil_sock_is_closed(struct socket *so) { errno_t error = 0; int kcunit; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return; CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); socket_lock_assert_owned(so); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { /* Let the filters know of the closing */ error = cfil_dispatch_closed_event(so, kcunit); } /* Last chance to push passed data out */ error = cfil_acquire_sockbuf(so, 1); if (error == 0) cfil_service_inject_queue(so, 1); cfil_release_sockbuf(so, 1); so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED; /* Pending data needs to go */ cfil_flush_queues(so); CFIL_INFO_VERIFY(so->so_cfil); } /* * This is called when the socket is disconnected so let the filters * know about the disconnection and that no more data will come * * The how parameter has the same values as soshutown() */ void cfil_sock_notify_shutdown(struct socket *so, int how) { errno_t error = 0; int kcunit; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return; CFIL_LOG(LOG_INFO, "so %llx how %d", (uint64_t)VM_KERNEL_ADDRPERM(so), how); socket_lock_assert_owned(so); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { /* Disconnect incoming side */ if (how != SHUT_WR) error = cfil_dispatch_disconnect_event(so, kcunit, 0); /* Disconnect outgoing side */ if (how != SHUT_RD) error = cfil_dispatch_disconnect_event(so, kcunit, 1); } } static int cfil_filters_attached(struct socket *so) { struct cfil_entry *entry; uint32_t kcunit; int attached = 0; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return (0); socket_lock_assert_owned(so); for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { entry = &so->so_cfil->cfi_entries[kcunit - 1]; /* Are we attached to the filter? */ if (entry->cfe_filter == NULL) continue; if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) continue; if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) continue; attached = 1; break; } return (attached); } /* * This is called when the socket is closed and we are waiting for * the filters to gives the final pass or drop */ void cfil_sock_close_wait(struct socket *so) { lck_mtx_t *mutex_held; struct timespec ts; int error; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return; CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); while (cfil_filters_attached(so)) { /* * Notify the filters we are going away so they can detach */ cfil_sock_notify_shutdown(so, SHUT_RDWR); /* * Make sure we need to wait after the filter are notified * of the disconnection */ if (cfil_filters_attached(so) == 0) break; CFIL_LOG(LOG_INFO, "so %llx waiting", (uint64_t)VM_KERNEL_ADDRPERM(so)); ts.tv_sec = cfil_close_wait_timeout / 1000; ts.tv_nsec = (cfil_close_wait_timeout % 1000) * NSEC_PER_USEC * 1000; OSIncrementAtomic(&cfil_stats.cfs_close_wait); so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT; error = msleep((caddr_t)&so->so_cfil, mutex_held, PSOCK | PCATCH, "cfil_sock_close_wait", &ts); so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT; CFIL_LOG(LOG_NOTICE, "so %llx timed out %d", (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0)); /* * Force close in case of timeout */ if (error != 0) { OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout); break; } } } /* * Returns the size of the data held by the content filter by using */ int32_t cfil_sock_data_pending(struct sockbuf *sb) { struct socket *so = sb->sb_so; uint64_t pending = 0; if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) { struct cfi_buf *cfi_buf; socket_lock_assert_owned(so); if ((sb->sb_flags & SB_RECV) == 0) cfi_buf = &so->so_cfil->cfi_snd; else cfi_buf = &so->so_cfil->cfi_rcv; pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first; /* * If we are limited by the "chars of mbufs used" roughly * adjust so we won't overcommit */ if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) pending = cfi_buf->cfi_pending_mbcnt; } VERIFY(pending < INT32_MAX); return (int32_t)(pending); } /* * Return the socket buffer space used by data being held by content filters * so processes won't clog the socket buffer */ int32_t cfil_sock_data_space(struct sockbuf *sb) { struct socket *so = sb->sb_so; uint64_t pending = 0; if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL && so->so_snd.sb_cfil_thread != current_thread()) { struct cfi_buf *cfi_buf; socket_lock_assert_owned(so); if ((sb->sb_flags & SB_RECV) == 0) cfi_buf = &so->so_cfil->cfi_snd; else cfi_buf = &so->so_cfil->cfi_rcv; pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first; /* * If we are limited by the "chars of mbufs used" roughly * adjust so we won't overcommit */ if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) pending = cfi_buf->cfi_pending_mbcnt; } VERIFY(pending < INT32_MAX); return (int32_t)(pending); } /* * A callback from the socket and protocol layer when data becomes * available in the socket buffer to give a chance for the content filter * to re-inject data that was held back */ void cfil_sock_buf_update(struct sockbuf *sb) { int outgoing; int error; struct socket *so = sb->sb_so; if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) return; if (!cfil_sbtrim) return; socket_lock_assert_owned(so); if ((sb->sb_flags & SB_RECV) == 0) { if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) return; outgoing = 1; OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry); } else { if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) return; outgoing = 0; OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry); } CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); error = cfil_acquire_sockbuf(so, outgoing); if (error == 0) cfil_service_inject_queue(so, outgoing); cfil_release_sockbuf(so, outgoing); } int sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { #pragma unused(oidp, arg1, arg2) int error = 0; size_t len = 0; u_int32_t i; /* Read only */ if (req->newptr != USER_ADDR_NULL) return (EPERM); cfil_rw_lock_shared(&cfil_lck_rw); for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) { struct cfil_filter_stat filter_stat; struct content_filter *cfc = content_filters[i]; if (cfc == NULL) continue; /* If just asking for the size */ if (req->oldptr == USER_ADDR_NULL) { len += sizeof(struct cfil_filter_stat); continue; } bzero(&filter_stat, sizeof(struct cfil_filter_stat)); filter_stat.cfs_len = sizeof(struct cfil_filter_stat); filter_stat.cfs_filter_id = cfc->cf_kcunit; filter_stat.cfs_flags = cfc->cf_flags; filter_stat.cfs_sock_count = cfc->cf_sock_count; filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit; error = SYSCTL_OUT(req, &filter_stat, sizeof (struct cfil_filter_stat)); if (error != 0) break; } /* If just asking for the size */ if (req->oldptr == USER_ADDR_NULL) req->oldidx = len; cfil_rw_unlock_shared(&cfil_lck_rw); return (error); } static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { #pragma unused(oidp, arg1, arg2) int error = 0; u_int32_t i; struct cfil_info *cfi; /* Read only */ if (req->newptr != USER_ADDR_NULL) return (EPERM); cfil_rw_lock_shared(&cfil_lck_rw); /* * If just asking for the size, */ if (req->oldptr == USER_ADDR_NULL) { req->oldidx = cfil_sock_attached_count * sizeof(struct cfil_sock_stat); /* Bump the length in case new sockets gets attached */ req->oldidx += req->oldidx >> 3; goto done; } TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) { struct cfil_entry *entry; struct cfil_sock_stat stat; struct socket *so = cfi->cfi_so; bzero(&stat, sizeof(struct cfil_sock_stat)); stat.cfs_len = sizeof(struct cfil_sock_stat); stat.cfs_sock_id = cfi->cfi_sock_id; stat.cfs_flags = cfi->cfi_flags; if (so != NULL) { stat.cfs_pid = so->last_pid; memcpy(stat.cfs_uuid, so->last_uuid, sizeof(uuid_t)); if (so->so_flags & SOF_DELEGATED) { stat.cfs_e_pid = so->e_pid; memcpy(stat.cfs_e_uuid, so->e_uuid, sizeof(uuid_t)); } else { stat.cfs_e_pid = so->last_pid; memcpy(stat.cfs_e_uuid, so->last_uuid, sizeof(uuid_t)); } } stat.cfs_snd.cbs_pending_first = cfi->cfi_snd.cfi_pending_first; stat.cfs_snd.cbs_pending_last = cfi->cfi_snd.cfi_pending_last; stat.cfs_snd.cbs_inject_q_len = cfil_queue_len(&cfi->cfi_snd.cfi_inject_q); stat.cfs_snd.cbs_pass_offset = cfi->cfi_snd.cfi_pass_offset; stat.cfs_rcv.cbs_pending_first = cfi->cfi_rcv.cfi_pending_first; stat.cfs_rcv.cbs_pending_last = cfi->cfi_rcv.cfi_pending_last; stat.cfs_rcv.cbs_inject_q_len = cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q); stat.cfs_rcv.cbs_pass_offset = cfi->cfi_rcv.cfi_pass_offset; for (i = 0; i < MAX_CONTENT_FILTER; i++) { struct cfil_entry_stat *estat; struct cfe_buf *ebuf; struct cfe_buf_stat *sbuf; entry = &cfi->cfi_entries[i]; estat = &stat.ces_entries[i]; estat->ces_len = sizeof(struct cfil_entry_stat); estat->ces_filter_id = entry->cfe_filter ? entry->cfe_filter->cf_kcunit : 0; estat->ces_flags = entry->cfe_flags; estat->ces_necp_control_unit = entry->cfe_necp_control_unit; estat->ces_last_event.tv_sec = (int64_t)entry->cfe_last_event.tv_sec; estat->ces_last_event.tv_usec = (int64_t)entry->cfe_last_event.tv_usec; estat->ces_last_action.tv_sec = (int64_t)entry->cfe_last_action.tv_sec; estat->ces_last_action.tv_usec = (int64_t)entry->cfe_last_action.tv_usec; ebuf = &entry->cfe_snd; sbuf = &estat->ces_snd; sbuf->cbs_pending_first = cfil_queue_offset_first(&ebuf->cfe_pending_q); sbuf->cbs_pending_last = cfil_queue_offset_last(&ebuf->cfe_pending_q); sbuf->cbs_ctl_first = cfil_queue_offset_first(&ebuf->cfe_ctl_q); sbuf->cbs_ctl_last = cfil_queue_offset_last(&ebuf->cfe_ctl_q); sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; sbuf->cbs_peeked = ebuf->cfe_peeked; ebuf = &entry->cfe_rcv; sbuf = &estat->ces_rcv; sbuf->cbs_pending_first = cfil_queue_offset_first(&ebuf->cfe_pending_q); sbuf->cbs_pending_last = cfil_queue_offset_last(&ebuf->cfe_pending_q); sbuf->cbs_ctl_first = cfil_queue_offset_first(&ebuf->cfe_ctl_q); sbuf->cbs_ctl_last = cfil_queue_offset_last(&ebuf->cfe_ctl_q); sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; sbuf->cbs_peeked = ebuf->cfe_peeked; } error = SYSCTL_OUT(req, &stat, sizeof (struct cfil_sock_stat)); if (error != 0) break; } done: cfil_rw_unlock_shared(&cfil_lck_rw); return (error); }