1/* 2 * Copyright (c) 2013-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* 25 * THEORY OF OPERATION 26 * 27 * The socket content filter subsystem provides a way for user space agents to 28 * make filtering decisions based on the content of the data being sent and 29 * received by TCP/IP sockets. 30 * 31 * A content filter user space agents gets a copy of the data and the data is 32 * also kept in kernel buffer until the user space agents makes a pass or drop 33 * decision. This unidirectional flow of content avoids unnecessary data copies 34 * back to the kernel. 35 * * 36 * A user space filter agent opens a kernel control socket with the name 37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem. 38 * When connected, a "struct content_filter" is created and set as the 39 * "unitinfo" of the corresponding kernel control socket instance. 40 * 41 * The socket content filter subsystem exchanges messages with the user space 42 * filter agent until an ultimate pass or drop decision is made by the 43 * user space filter agent. 44 * 45 * It should be noted that messages about many TCP/IP sockets can be multiplexed 46 * over a single kernel control socket. 47 * 48 * Notes: 49 * - The current implementation is limited to TCP sockets. 50 * - The current implementation supports up to two simultaneous content filters 51 * for the sake of simplicity of the implementation. 52 * 53 * 54 * NECP FILTER CONTROL UNIT 55 * 56 * A user space filter agent uses the Network Extension Control Policy (NECP) 57 * database specify which TCP/IP sockets needs to be filtered. The NECP 58 * criteria may be based on a variety of properties like user ID or proc UUID. 59 * 60 * The NECP "filter control unit" is used by the socket content filter subsystem 61 * to deliver the relevant TCP/IP content information to the appropriate 62 * user space filter agent via its kernel control socket instance. 63 * This works as follows: 64 * 65 * 1) The user space filter agent specifies an NECP filter control unit when 66 * in adds its filtering rules to the NECP database. 67 * 68 * 2) The user space filter agent also sets its NECP filter control unit on the 69 * content filter kernel control socket via the socket option 70 * CFIL_OPT_NECP_CONTROL_UNIT. 71 * 72 * 3) The NECP database is consulted to find out if a given TCP/IP socket 73 * needs to be subjected to content filtering and returns the corresponding 74 * NECP filter control unit -- the NECP filter control unit is actually 75 * stored in the TCP/IP socket structure so the NECP lookup is really simple. 76 * 77 * 4) The NECP filter control unit is then used to find the corresponding 78 * kernel control socket instance. 79 * 80 * Note: NECP currently supports a ingle filter control unit per TCP/IP socket 81 * but this restriction may be soon lifted. 82 * 83 * 84 * THE MESSAGING PROTOCOL 85 * 86 * The socket content filter subsystem and a user space filter agent 87 * communicate over the kernel control socket via an asynchronous 88 * messaging protocol (this is not a request-response protocol). 89 * The socket content filter subsystem sends event messages to the user 90 * space filter agent about the TCP/IP sockets it is interested to filter. 91 * The user space filter agent sends action messages to either allow 92 * data to pass or to disallow the data flow (and drop the connection). 93 * 94 * All messages over a content filter kernel control socket share the same 95 * common header of type "struct cfil_msg_hdr". The message type tells if 96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION". 97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket. 98 * Note the message header length field may be padded for alignment and can 99 * be larger than the actual content of the message. 100 * The field "cfm_op" describe the kind of event or action. 101 * 102 * Here are the kinds of content filter events: 103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered 104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed 105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket 106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket 107 * 108 * 109 * EVENT MESSAGES 110 * 111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of 112 * data that is being sent or received. The position of this span of data 113 * in the data flow is described by a set of start and end offsets. These 114 * are absolute 64 bits offsets. The first byte sent (or received) starts 115 * at offset 0 and ends at offset 1. The length of the content data 116 * is given by the difference between the end offset and the start offset. 117 * 118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and 119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE 120 * action message is send by the user space filter agent. 121 * 122 * Note: absolute 64 bits offsets should be large enough for the foreseeable 123 * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec: 124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63 125 * 126 * They are two kinds of content filter actions: 127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction. 128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow 129 * 130 * 131 * ACTION MESSAGES 132 * 133 * The CFM_OP_DATA_UPDATE action messages let the user space filter 134 * agent allow data to flow up to the specified pass offset -- there 135 * is a pass offset for outgoing data and a pass offset for incoming data. 136 * When a new TCP/IP socket is attached to the content filter, each pass offset 137 * is initially set to 0 so not data is allowed to pass by default. 138 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE 139 * then the data flow becomes unrestricted. 140 * 141 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message 142 * with a pass offset smaller than the pass offset of a previous 143 * CFM_OP_DATA_UPDATE message is silently ignored. 144 * 145 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages 146 * to tell the kernel how much data it wants to see by using the peek offsets. 147 * Just like pass offsets, there is a peek offset for each direction. 148 * When a new TCP/IP socket is attached to the content filter, each peek offset 149 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event 150 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message 151 * with a greater than 0 peek offset is sent by the user space filter agent. 152 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE 153 * then the flow of update data events becomes unrestricted. 154 * 155 * Note that peek offsets cannot be smaller than the corresponding pass offset. 156 * Also a peek offsets cannot be smaller than the corresponding end offset 157 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying 158 * to set a too small peek value is silently ignored. 159 * 160 * 161 * PER SOCKET "struct cfil_info" 162 * 163 * As soon as a TCP/IP socket gets attached to a content filter, a 164 * "struct cfil_info" is created to hold the content filtering state for this 165 * socket. 166 * 167 * The content filtering state is made of the following information 168 * for each direction: 169 * - The current pass offset; 170 * - The first and last offsets of the data pending, waiting for a filtering 171 * decision; 172 * - The inject queue for data that passed the filters and that needs 173 * to be re-injected; 174 * - A content filter specific state in a set of "struct cfil_entry" 175 * 176 * 177 * CONTENT FILTER STATE "struct cfil_entry" 178 * 179 * The "struct cfil_entry" maintains the information most relevant to the 180 * message handling over a kernel control socket with a user space filter agent. 181 * 182 * The "struct cfil_entry" holds the NECP filter control unit that corresponds 183 * to the kernel control socket unit it corresponds to and also has a pointer 184 * to the corresponding "struct content_filter". 185 * 186 * For each direction, "struct cfil_entry" maintains the following information: 187 * - The pass offset 188 * - The peek offset 189 * - The offset of the last data peeked at by the filter 190 * - A queue of data that's waiting to be delivered to the user space filter 191 * agent on the kernel control socket 192 * - A queue of data for which event messages have been sent on the kernel 193 * control socket and are pending for a filtering decision. 194 * 195 * 196 * CONTENT FILTER QUEUES 197 * 198 * Data that is being filtered is steered away from the TCP/IP socket buffer 199 * and instead will sit in one of three content filter queue until the data 200 * can be re-injected into the TCP/IP socket buffer. 201 * 202 * A content filter queue is represented by "struct cfil_queue" that contains 203 * a list of mbufs and the start and end offset of the data span of 204 * the list of mbufs. 205 * 206 * The data moves into the three content filter queues according to this 207 * sequence: 208 * a) The "cfe_ctl_q" of "struct cfil_entry" 209 * b) The "cfe_pending_q" of "struct cfil_entry" 210 * c) The "cfi_inject_q" of "struct cfil_info" 211 * 212 * Note: The seqyence (a),(b) may be repeated several times if there are more 213 * than one content filter attached to the TCP/IP socket. 214 * 215 * The "cfe_ctl_q" queue holds data than cannot be delivered to the 216 * kernel conntrol socket for two reasons: 217 * - The peek offset is less that the end offset of the mbuf data 218 * - The kernel control socket is flow controlled 219 * 220 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or 221 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control 222 * socket and are waiting for a pass action message fromn the user space 223 * filter agent. An mbuf length must be fully allowed to pass to be removed 224 * from the cfe_pending_q. 225 * 226 * The "cfi_inject_q" queue holds data that has been fully allowed to pass 227 * by the user space filter agent and that needs to be re-injected into the 228 * TCP/IP socket. 229 * 230 * 231 * IMPACT ON FLOW CONTROL 232 * 233 * An essential aspect of the content filer subsystem is to minimize the 234 * impact on flow control of the TCP/IP sockets being filtered. 235 * 236 * The processing overhead of the content filtering may have an effect on 237 * flow control by adding noticeable delays and cannot be eliminated -- 238 * care must be taken by the user space filter agent to minimize the 239 * processing delays. 240 * 241 * The amount of data being filtered is kept in buffers while waiting for 242 * a decision by the user space filter agent. This amount of data pending 243 * needs to be subtracted from the amount of data available in the 244 * corresponding TCP/IP socket buffer. This is done by modifying 245 * sbspace() and tcp_sbspace() to account for amount of data pending 246 * in the content filter. 247 * 248 * 249 * LOCKING STRATEGY 250 * 251 * The global state of content filter subsystem is protected by a single 252 * read-write lock "cfil_lck_rw". The data flow can be done with the 253 * cfil read-write lock held as shared so it can be re-entered from multiple 254 * threads. 255 * 256 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is 257 * protected by the socket lock. 258 * 259 * A TCP/IP socket lock cannot be taken while the cfil read-write lock 260 * is held. That's why we have some sequences where we drop the cfil read-write 261 * lock before taking the TCP/IP lock. 262 * 263 * It is also important to lock the TCP/IP socket buffer while the content 264 * filter is modifying the amount of pending data. Otherwise the calculations 265 * in sbspace() and tcp_sbspace() could be wrong. 266 * 267 * The "cfil_lck_rw" protects "struct content_filter" and also the fields 268 * "cfe_link" and "cfe_filter" of "struct cfil_entry". 269 * 270 * Actually "cfe_link" and "cfe_filter" are protected by both by 271 * "cfil_lck_rw" and the socket lock: they may be modified only when 272 * "cfil_lck_rw" is exclusive and the socket is locked. 273 * 274 * To read the other fields of "struct content_filter" we have to take 275 * "cfil_lck_rw" in shared mode. 276 * 277 * 278 * LIMITATIONS 279 * 280 * - For TCP sockets only 281 * 282 * - Does not support TCP unordered messages 283 */ 284 285/* 286 * TO DO LIST 287 * 288 * SOONER: 289 * 290 * Deal with OOB 291 * 292 * LATER: 293 * 294 * If support datagram, enqueue control and address mbufs as well 295 */ 296 297#include <sys/types.h> 298#include <sys/kern_control.h> 299#include <sys/queue.h> 300#include <sys/domain.h> 301#include <sys/protosw.h> 302#include <sys/syslog.h> 303 304#include <kern/locks.h> 305#include <kern/zalloc.h> 306#include <kern/debug.h> 307 308#include <net/content_filter.h> 309 310#include <netinet/in_pcb.h> 311#include <netinet/tcp.h> 312#include <netinet/tcp_var.h> 313 314#include <string.h> 315#include <libkern/libkern.h> 316 317 318#define MAX_CONTENT_FILTER 2 319 320struct cfil_entry; 321 322/* 323 * The structure content_filter represents a user space content filter 324 * It's created and associated with a kernel control socket instance 325 */ 326struct content_filter { 327 kern_ctl_ref cf_kcref; 328 u_int32_t cf_kcunit; 329 u_int32_t cf_flags; 330 331 uint32_t cf_necp_control_unit; 332 333 uint32_t cf_sock_count; 334 TAILQ_HEAD(, cfil_entry) cf_sock_entries; 335}; 336 337#define CFF_ACTIVE 0x01 338#define CFF_DETACHING 0x02 339#define CFF_FLOW_CONTROLLED 0x04 340 341struct content_filter **content_filters = NULL; 342uint32_t cfil_active_count = 0; /* Number of active content filters */ 343uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */ 344uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */ 345 346static kern_ctl_ref cfil_kctlref = NULL; 347 348static lck_grp_attr_t *cfil_lck_grp_attr = NULL; 349static lck_attr_t *cfil_lck_attr = NULL; 350static lck_grp_t *cfil_lck_grp = NULL; 351decl_lck_rw_data(static, cfil_lck_rw); 352 353#define CFIL_RW_LCK_MAX 8 354 355int cfil_rw_nxt_lck = 0; 356void* cfil_rw_lock_history[CFIL_RW_LCK_MAX]; 357 358int cfil_rw_nxt_unlck = 0; 359void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX]; 360 361#define CONTENT_FILTER_ZONE_NAME "content_filter" 362#define CONTENT_FILTER_ZONE_MAX 10 363static struct zone *content_filter_zone = NULL; /* zone for content_filter */ 364 365 366#define CFIL_INFO_ZONE_NAME "cfil_info" 367#define CFIL_INFO_ZONE_MAX 1024 368static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */ 369 370MBUFQ_HEAD(cfil_mqhead); 371 372struct cfil_queue { 373 uint64_t q_start; /* offset of first byte in queue */ 374 uint64_t q_end; /* offset of last byte in queue */ 375 struct cfil_mqhead q_mq; 376}; 377 378/* 379 * struct cfil_entry 380 * 381 * The is one entry per content filter 382 */ 383struct cfil_entry { 384 TAILQ_ENTRY(cfil_entry) cfe_link; 385 struct content_filter *cfe_filter; 386 387 struct cfil_info *cfe_cfil_info; 388 uint32_t cfe_flags; 389 uint32_t cfe_necp_control_unit; 390 struct timeval cfe_last_event; /* To user space */ 391 struct timeval cfe_last_action; /* From user space */ 392 393 struct cfe_buf { 394 /* 395 * cfe_pending_q holds data that has been delivered to 396 * the filter and for which we are waiting for an action 397 */ 398 struct cfil_queue cfe_pending_q; 399 /* 400 * This queue is for data that has not be delivered to 401 * the content filter (new data, pass peek or flow control) 402 */ 403 struct cfil_queue cfe_ctl_q; 404 405 uint64_t cfe_pass_offset; 406 uint64_t cfe_peek_offset; 407 uint64_t cfe_peeked; 408 } cfe_snd, cfe_rcv; 409}; 410 411#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */ 412#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */ 413#define CFEF_DATA_START 0x0004 /* can send data event */ 414#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */ 415#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */ 416#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */ 417#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */ 418#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */ 419 420/* 421 * struct cfil_info 422 * 423 * There is a struct cfil_info per socket 424 */ 425struct cfil_info { 426 TAILQ_ENTRY(cfil_info) cfi_link; 427 struct socket *cfi_so; 428 uint64_t cfi_flags; 429 uint64_t cfi_sock_id; 430 431 struct cfi_buf { 432 /* 433 * cfi_pending_first and cfi_pending_last describe the total 434 * amount of data outstanding for all the filters on 435 * this socket and data in the flow queue 436 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used" 437 */ 438 uint64_t cfi_pending_first; 439 uint64_t cfi_pending_last; 440 int cfi_pending_mbcnt; 441 /* 442 * cfi_pass_offset is the minimum of all the filters 443 */ 444 uint64_t cfi_pass_offset; 445 /* 446 * cfi_inject_q holds data that needs to be re-injected 447 * into the socket after filtering and that can 448 * be queued because of flow control 449 */ 450 struct cfil_queue cfi_inject_q; 451 } cfi_snd, cfi_rcv; 452 453 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER]; 454}; 455 456#define CFIF_DROP 0x0001 /* drop action applied */ 457#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */ 458#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */ 459#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */ 460#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */ 461#define CFIF_SHUT_WR 0x0040 /* shutdown write */ 462#define CFIF_SHUT_RD 0x0080 /* shutdown read */ 463 464#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */ 465#define CFI_SHIFT_GENCNT 32 466#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */ 467#define CFI_SHIFT_FLOWHASH 0 468 469TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head; 470 471#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x) 472#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x) 473 474/* 475 * Statistics 476 */ 477 478struct cfil_stats cfil_stats; 479 480/* 481 * For troubleshooting 482 */ 483int cfil_log_level = LOG_ERR; 484int cfil_debug = 1; 485 486/* 487 * Sysctls for logs and statistics 488 */ 489static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int, 490 struct sysctl_req *); 491static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int, 492 struct sysctl_req *); 493 494SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil"); 495 496SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED, 497 &cfil_log_level, 0, ""); 498 499SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED, 500 &cfil_debug, 0, ""); 501 502SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED, 503 &cfil_sock_attached_count, 0, ""); 504 505SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED, 506 &cfil_active_count, 0, ""); 507 508SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED, 509 &cfil_close_wait_timeout, 0, ""); 510 511static int cfil_sbtrim = 1; 512SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED, 513 &cfil_sbtrim, 0, ""); 514 515SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED, 516 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", ""); 517 518SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED, 519 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", ""); 520 521SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED, 522 &cfil_stats, cfil_stats, ""); 523 524/* 525 * Forward declaration to appease the compiler 526 */ 527static int cfil_action_data_pass(struct socket *, uint32_t, int, 528 uint64_t, uint64_t); 529static int cfil_action_drop(struct socket *, uint32_t); 530static int cfil_dispatch_closed_event(struct socket *, int); 531static int cfil_data_common(struct socket *, int, struct sockaddr *, 532 struct mbuf *, struct mbuf *, uint32_t); 533static int cfil_data_filter(struct socket *, uint32_t, int, 534 struct mbuf *, uint64_t); 535static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *, 536 struct in_addr, u_int16_t); 537static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *, 538 struct in6_addr *, u_int16_t); 539static int cfil_dispatch_attach_event(struct socket *, uint32_t); 540static void cfil_info_free(struct socket *, struct cfil_info *); 541static struct cfil_info * cfil_info_alloc(struct socket *); 542static int cfil_info_attach_unit(struct socket *, uint32_t); 543static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t); 544static int cfil_service_pending_queue(struct socket *, uint32_t, int); 545static int cfil_data_service_ctl_q(struct socket *, uint32_t, int); 546static void cfil_info_verify(struct cfil_info *); 547static int cfil_update_data_offsets(struct socket *, uint32_t, int, 548 uint64_t, uint64_t); 549static int cfil_acquire_sockbuf(struct socket *, int); 550static void cfil_release_sockbuf(struct socket *, int); 551static int cfil_filters_attached(struct socket *); 552 553static void cfil_rw_lock_exclusive(lck_rw_t *); 554static void cfil_rw_unlock_exclusive(lck_rw_t *); 555static void cfil_rw_lock_shared(lck_rw_t *); 556static void cfil_rw_unlock_shared(lck_rw_t *); 557static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *); 558static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *); 559 560static unsigned int cfil_data_length(struct mbuf *, int *); 561 562/* 563 * Content filter global read write lock 564 */ 565 566static void 567cfil_rw_lock_exclusive(lck_rw_t *lck) 568{ 569 void *lr_saved; 570 571 lr_saved = __builtin_return_address(0); 572 573 lck_rw_lock_exclusive(lck); 574 575 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; 576 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; 577} 578 579static void 580cfil_rw_unlock_exclusive(lck_rw_t *lck) 581{ 582 void *lr_saved; 583 584 lr_saved = __builtin_return_address(0); 585 586 lck_rw_unlock_exclusive(lck); 587 588 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; 589 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; 590} 591 592static void 593cfil_rw_lock_shared(lck_rw_t *lck) 594{ 595 void *lr_saved; 596 597 lr_saved = __builtin_return_address(0); 598 599 lck_rw_lock_shared(lck); 600 601 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; 602 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; 603} 604 605static void 606cfil_rw_unlock_shared(lck_rw_t *lck) 607{ 608 void *lr_saved; 609 610 lr_saved = __builtin_return_address(0); 611 612 lck_rw_unlock_shared(lck); 613 614 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; 615 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; 616} 617 618static boolean_t 619cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck) 620{ 621 void *lr_saved; 622 boolean_t upgraded; 623 624 lr_saved = __builtin_return_address(0); 625 626 upgraded = lck_rw_lock_shared_to_exclusive(lck); 627 if (upgraded) { 628 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved; 629 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX; 630 } 631 return (upgraded); 632} 633 634static void 635cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck) 636{ 637 void *lr_saved; 638 639 lr_saved = __builtin_return_address(0); 640 641 lck_rw_lock_exclusive_to_shared(lck); 642 643 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved; 644 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX; 645} 646 647static void 648cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive) 649{ 650 lck_rw_assert(lck, 651 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD); 652} 653 654static void 655socket_lock_assert_owned(struct socket *so) 656{ 657 lck_mtx_t *mutex_held; 658 659 if (so->so_proto->pr_getlock != NULL) 660 mutex_held = (*so->so_proto->pr_getlock)(so, 0); 661 else 662 mutex_held = so->so_proto->pr_domain->dom_mtx; 663 664 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 665} 666 667/* 668 * Return the number of bytes in the mbuf chain using the same 669 * method as m_length() or sballoc() 670 */ 671static unsigned int 672cfil_data_length(struct mbuf *m, int *retmbcnt) 673{ 674 struct mbuf *m0; 675 unsigned int pktlen; 676 int mbcnt; 677 678 if (retmbcnt == NULL) 679 return (m_length(m)); 680 681 pktlen = 0; 682 mbcnt = 0; 683 for (m0 = m; m0 != NULL; m0 = m0->m_next) { 684 pktlen += m0->m_len; 685 mbcnt += MSIZE; 686 if (m0->m_flags & M_EXT) 687 mbcnt += m0->m_ext.ext_size; 688 } 689 *retmbcnt = mbcnt; 690 return (pktlen); 691} 692 693/* 694 * Common mbuf queue utilities 695 */ 696 697static inline void 698cfil_queue_init(struct cfil_queue *cfq) 699{ 700 cfq->q_start = 0; 701 cfq->q_end = 0; 702 MBUFQ_INIT(&cfq->q_mq); 703} 704 705static inline uint64_t 706cfil_queue_drain(struct cfil_queue *cfq) 707{ 708 uint64_t drained = cfq->q_start - cfq->q_end; 709 cfq->q_start = 0; 710 cfq->q_end = 0; 711 MBUFQ_DRAIN(&cfq->q_mq); 712 713 return (drained); 714} 715 716/* Return 1 when empty, 0 otherwise */ 717static inline int 718cfil_queue_empty(struct cfil_queue *cfq) 719{ 720 return (MBUFQ_EMPTY(&cfq->q_mq)); 721} 722 723static inline uint64_t 724cfil_queue_offset_first(struct cfil_queue *cfq) 725{ 726 return (cfq->q_start); 727} 728 729static inline uint64_t 730cfil_queue_offset_last(struct cfil_queue *cfq) 731{ 732 return (cfq->q_end); 733} 734 735static inline uint64_t 736cfil_queue_len(struct cfil_queue *cfq) 737{ 738 return (cfq->q_end - cfq->q_start); 739} 740 741/* 742 * Routines to verify some fundamental assumptions 743 */ 744 745static void 746cfil_queue_verify(struct cfil_queue *cfq) 747{ 748 mbuf_t m; 749 mbuf_t n; 750 uint64_t queuesize = 0; 751 752 /* Verify offset are ordered */ 753 VERIFY(cfq->q_start <= cfq->q_end); 754 755 /* 756 * When queue is empty, the offsets are equal otherwise the offsets 757 * are different 758 */ 759 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) || 760 (!MBUFQ_EMPTY(&cfq->q_mq) && 761 cfq->q_start != cfq->q_end)); 762 763 MBUFQ_FOREACH(m, &cfq->q_mq) { 764 size_t chainsize = 0; 765 unsigned int mlen = m_length(m); 766 767 if (m == (void *)M_TAG_FREE_PATTERN || 768 m->m_next == (void *)M_TAG_FREE_PATTERN || 769 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) 770 panic("%s - mq %p is free at %p", __func__, 771 &cfq->q_mq, m); 772 for (n = m; n != NULL; n = n->m_next) { 773 if (n->m_type != MT_DATA && 774 n->m_type != MT_HEADER && 775 n->m_type != MT_OOBDATA) 776 panic("%s - %p unsupported type %u", __func__, 777 n, n->m_type); 778 chainsize += n->m_len; 779 } 780 if (mlen != chainsize) 781 panic("%s - %p m_length() %u != chainsize %lu", 782 __func__, m, mlen, chainsize); 783 queuesize += chainsize; 784 } 785 if (queuesize != cfq->q_end - cfq->q_start) 786 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__, 787 m, queuesize, cfq->q_end - cfq->q_start); 788} 789 790static void 791cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len) 792{ 793 CFIL_QUEUE_VERIFY(cfq); 794 795 MBUFQ_ENQUEUE(&cfq->q_mq, m); 796 cfq->q_end += len; 797 798 CFIL_QUEUE_VERIFY(cfq); 799} 800 801static void 802cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len) 803{ 804 CFIL_QUEUE_VERIFY(cfq); 805 806 VERIFY(m_length(m) == len); 807 808 MBUFQ_REMOVE(&cfq->q_mq, m); 809 MBUFQ_NEXT(m) = NULL; 810 cfq->q_start += len; 811 812 CFIL_QUEUE_VERIFY(cfq); 813} 814 815static mbuf_t 816cfil_queue_first(struct cfil_queue *cfq) 817{ 818 return (MBUFQ_FIRST(&cfq->q_mq)); 819} 820 821static mbuf_t 822cfil_queue_next(struct cfil_queue *cfq, mbuf_t m) 823{ 824#pragma unused(cfq) 825 return (MBUFQ_NEXT(m)); 826} 827 828static void 829cfil_entry_buf_verify(struct cfe_buf *cfe_buf) 830{ 831 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q); 832 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q); 833 834 /* Verify the queues are ordered so that pending is before ctl */ 835 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end); 836 837 /* The peek offset cannot be less than the pass offset */ 838 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset); 839 840 /* Make sure we've updated the offset we peeked at */ 841 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked); 842} 843 844static void 845cfil_entry_verify(struct cfil_entry *entry) 846{ 847 cfil_entry_buf_verify(&entry->cfe_snd); 848 cfil_entry_buf_verify(&entry->cfe_rcv); 849} 850 851static void 852cfil_info_buf_verify(struct cfi_buf *cfi_buf) 853{ 854 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q); 855 856 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last); 857 VERIFY(cfi_buf->cfi_pending_mbcnt >= 0); 858} 859 860static void 861cfil_info_verify(struct cfil_info *cfil_info) 862{ 863 int i; 864 865 if (cfil_info == NULL) 866 return; 867 868 cfil_info_buf_verify(&cfil_info->cfi_snd); 869 cfil_info_buf_verify(&cfil_info->cfi_rcv); 870 871 for (i = 0; i < MAX_CONTENT_FILTER; i++) 872 cfil_entry_verify(&cfil_info->cfi_entries[i]); 873} 874 875static void 876verify_content_filter(struct content_filter *cfc) 877{ 878 struct cfil_entry *entry; 879 uint32_t count = 0; 880 881 VERIFY(cfc->cf_sock_count >= 0); 882 883 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { 884 count++; 885 VERIFY(cfc == entry->cfe_filter); 886 } 887 VERIFY(count == cfc->cf_sock_count); 888} 889 890/* 891 * Kernel control socket callbacks 892 */ 893static errno_t 894cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, 895 void **unitinfo) 896{ 897 errno_t error = 0; 898 struct content_filter *cfc = NULL; 899 900 CFIL_LOG(LOG_NOTICE, ""); 901 902 cfc = zalloc(content_filter_zone); 903 if (cfc == NULL) { 904 CFIL_LOG(LOG_ERR, "zalloc failed"); 905 error = ENOMEM; 906 goto done; 907 } 908 bzero(cfc, sizeof(struct content_filter)); 909 910 cfil_rw_lock_exclusive(&cfil_lck_rw); 911 if (content_filters == NULL) { 912 struct content_filter **tmp; 913 914 cfil_rw_unlock_exclusive(&cfil_lck_rw); 915 916 MALLOC(tmp, 917 struct content_filter **, 918 MAX_CONTENT_FILTER * sizeof(struct content_filter *), 919 M_TEMP, 920 M_WAITOK | M_ZERO); 921 922 cfil_rw_lock_exclusive(&cfil_lck_rw); 923 924 if (tmp == NULL && content_filters == NULL) { 925 error = ENOMEM; 926 cfil_rw_unlock_exclusive(&cfil_lck_rw); 927 goto done; 928 } 929 /* Another thread may have won the race */ 930 if (content_filters != NULL) 931 FREE(tmp, M_TEMP); 932 else 933 content_filters = tmp; 934 } 935 936 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) { 937 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit); 938 error = EINVAL; 939 } else if (content_filters[sac->sc_unit - 1] != NULL) { 940 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit); 941 error = EADDRINUSE; 942 } else { 943 /* 944 * kernel control socket kcunit numbers start at 1 945 */ 946 content_filters[sac->sc_unit - 1] = cfc; 947 948 cfc->cf_kcref = kctlref; 949 cfc->cf_kcunit = sac->sc_unit; 950 TAILQ_INIT(&cfc->cf_sock_entries); 951 952 *unitinfo = cfc; 953 cfil_active_count++; 954 } 955 cfil_rw_unlock_exclusive(&cfil_lck_rw); 956done: 957 if (error != 0 && cfc != NULL) 958 zfree(content_filter_zone, cfc); 959 960 if (error == 0) 961 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok); 962 else 963 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail); 964 965 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", 966 error, cfil_active_count, sac->sc_unit); 967 968 return (error); 969} 970 971static errno_t 972cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo) 973{ 974#pragma unused(kctlref) 975 errno_t error = 0; 976 struct content_filter *cfc; 977 struct cfil_entry *entry; 978 979 CFIL_LOG(LOG_NOTICE, ""); 980 981 if (content_filters == NULL) { 982 CFIL_LOG(LOG_ERR, "no content filter"); 983 error = EINVAL; 984 goto done; 985 } 986 if (kcunit > MAX_CONTENT_FILTER) { 987 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", 988 kcunit, MAX_CONTENT_FILTER); 989 error = EINVAL; 990 goto done; 991 } 992 993 cfc = (struct content_filter *)unitinfo; 994 if (cfc == NULL) 995 goto done; 996 997 cfil_rw_lock_exclusive(&cfil_lck_rw); 998 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) { 999 CFIL_LOG(LOG_ERR, "bad unit info %u)", 1000 kcunit); 1001 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1002 goto done; 1003 } 1004 cfc->cf_flags |= CFF_DETACHING; 1005 /* 1006 * Remove all sockets from the filter 1007 */ 1008 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) { 1009 cfil_rw_lock_assert_held(&cfil_lck_rw, 1); 1010 1011 verify_content_filter(cfc); 1012 /* 1013 * Accept all outstanding data by pushing to next filter 1014 * or back to socket 1015 * 1016 * TBD: Actually we should make sure all data has been pushed 1017 * back to socket 1018 */ 1019 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) { 1020 struct cfil_info *cfil_info = entry->cfe_cfil_info; 1021 struct socket *so = cfil_info->cfi_so; 1022 1023 /* Need to let data flow immediately */ 1024 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED | 1025 CFEF_DATA_START; 1026 1027 /* 1028 * Respect locking hierarchy 1029 */ 1030 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1031 1032 socket_lock(so, 1); 1033 1034 /* 1035 * When cfe_filter is NULL the filter is detached 1036 * and the entry has been removed from cf_sock_entries 1037 */ 1038 if (so->so_cfil == NULL || entry->cfe_filter == NULL) { 1039 cfil_rw_lock_exclusive(&cfil_lck_rw); 1040 goto release; 1041 } 1042 (void) cfil_action_data_pass(so, kcunit, 1, 1043 CFM_MAX_OFFSET, 1044 CFM_MAX_OFFSET); 1045 1046 (void) cfil_action_data_pass(so, kcunit, 0, 1047 CFM_MAX_OFFSET, 1048 CFM_MAX_OFFSET); 1049 1050 cfil_rw_lock_exclusive(&cfil_lck_rw); 1051 1052 /* 1053 * Check again as the socket may have been unlocked 1054 * when when calling cfil_acquire_sockbuf() 1055 */ 1056 if (so->so_cfil == NULL || entry->cfe_filter == NULL) 1057 goto release; 1058 1059 /* The filter is now detached */ 1060 entry->cfe_flags |= CFEF_CFIL_DETACHED; 1061 CFIL_LOG(LOG_NOTICE, "so %llx detached %u", 1062 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); 1063 1064 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && 1065 cfil_filters_attached(so) == 0) { 1066 CFIL_LOG(LOG_NOTICE, "so %llx waking", 1067 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1068 wakeup((caddr_t)&so->so_cfil); 1069 } 1070 1071 /* 1072 * Remove the filter entry from the content filter 1073 * but leave the rest of the state intact as the queues 1074 * may not be empty yet 1075 */ 1076 entry->cfe_filter = NULL; 1077 entry->cfe_necp_control_unit = 0; 1078 1079 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); 1080 cfc->cf_sock_count--; 1081release: 1082 socket_unlock(so, 1); 1083 } 1084 } 1085 verify_content_filter(cfc); 1086 1087 VERIFY(cfc->cf_sock_count == 0); 1088 1089 /* 1090 * Make filter inactive 1091 */ 1092 content_filters[kcunit - 1] = NULL; 1093 cfil_active_count--; 1094 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1095 1096 zfree(content_filter_zone, cfc); 1097done: 1098 if (error == 0) 1099 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok); 1100 else 1101 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail); 1102 1103 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u", 1104 error, cfil_active_count, kcunit); 1105 1106 return (error); 1107} 1108 1109/* 1110 * cfil_acquire_sockbuf() 1111 * 1112 * Prevent any other thread from acquiring the sockbuf 1113 * We use sb_cfil_thread as a semaphore to prevent other threads from 1114 * messing with the sockbuf -- see sblock() 1115 * Note: We do not set SB_LOCK here because the thread may check or modify 1116 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently 1117 * sblock(), sbunlock() or sodefunct() 1118 */ 1119static int 1120cfil_acquire_sockbuf(struct socket *so, int outgoing) 1121{ 1122 thread_t tp = current_thread(); 1123 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; 1124 lck_mtx_t *mutex_held; 1125 int error = 0; 1126 1127 /* 1128 * Wait until no thread is holding the sockbuf and other content 1129 * filter threads have released the sockbuf 1130 */ 1131 while ((sb->sb_flags & SB_LOCK) || 1132 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) { 1133 if (so->so_proto->pr_getlock != NULL) 1134 mutex_held = (*so->so_proto->pr_getlock)(so, 0); 1135 else 1136 mutex_held = so->so_proto->pr_domain->dom_mtx; 1137 1138 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 1139 1140 sb->sb_wantlock++; 1141 VERIFY(sb->sb_wantlock != 0); 1142 1143 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf", 1144 NULL); 1145 1146 VERIFY(sb->sb_wantlock != 0); 1147 sb->sb_wantlock--; 1148 } 1149 /* 1150 * Use reference count for repetitive calls on same thread 1151 */ 1152 if (sb->sb_cfil_refs == 0) { 1153 VERIFY(sb->sb_cfil_thread == NULL); 1154 VERIFY((sb->sb_flags & SB_LOCK) == 0); 1155 1156 sb->sb_cfil_thread = tp; 1157 sb->sb_flags |= SB_LOCK; 1158 } 1159 sb->sb_cfil_refs++; 1160 1161 /* We acquire the socket buffer when we need to cleanup */ 1162 if (so->so_cfil == NULL) { 1163 CFIL_LOG(LOG_ERR, "so %llx cfil detached", 1164 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1165 error = 0; 1166 } else if (so->so_cfil->cfi_flags & CFIF_DROP) { 1167 CFIL_LOG(LOG_ERR, "so %llx drop set", 1168 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1169 error = EPIPE; 1170 } 1171 1172 return (error); 1173} 1174 1175static void 1176cfil_release_sockbuf(struct socket *so, int outgoing) 1177{ 1178 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv; 1179 thread_t tp = current_thread(); 1180 1181 socket_lock_assert_owned(so); 1182 1183 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) 1184 panic("%s sb_cfil_thread %p not current %p", __func__, 1185 sb->sb_cfil_thread, tp); 1186 /* 1187 * Don't panic if we are defunct because SB_LOCK has 1188 * been cleared by sodefunct() 1189 */ 1190 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) 1191 panic("%s SB_LOCK not set on %p", __func__, 1192 sb); 1193 /* 1194 * We can unlock when the thread unwinds to the last reference 1195 */ 1196 sb->sb_cfil_refs--; 1197 if (sb->sb_cfil_refs == 0) { 1198 sb->sb_cfil_thread = NULL; 1199 sb->sb_flags &= ~SB_LOCK; 1200 1201 if (sb->sb_wantlock > 0) 1202 wakeup(&sb->sb_flags); 1203 } 1204} 1205 1206cfil_sock_id_t 1207cfil_sock_id_from_socket(struct socket *so) 1208{ 1209 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) 1210 return (so->so_cfil->cfi_sock_id); 1211 else 1212 return (CFIL_SOCK_ID_NONE); 1213} 1214 1215static struct socket * 1216cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id) 1217{ 1218 struct socket *so = NULL; 1219 u_int64_t gencnt = cfil_sock_id >> 32; 1220 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff); 1221 struct inpcb *inp = NULL; 1222 struct inpcbinfo *pcbinfo = &tcbinfo; 1223 1224 lck_rw_lock_shared(pcbinfo->ipi_lock); 1225 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 1226 if (inp->inp_state != INPCB_STATE_DEAD && 1227 inp->inp_socket != NULL && 1228 inp->inp_flowhash == flowhash && 1229 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt && 1230 inp->inp_socket->so_cfil != NULL) { 1231 so = inp->inp_socket; 1232 break; 1233 } 1234 } 1235 lck_rw_done(pcbinfo->ipi_lock); 1236 1237 if (so == NULL) { 1238 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found); 1239 CFIL_LOG(LOG_DEBUG, 1240 "no socket for sock_id %llx gencnt %llx flowhash %x", 1241 cfil_sock_id, gencnt, flowhash); 1242 } 1243 1244 return (so); 1245} 1246 1247static errno_t 1248cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m, 1249 int flags) 1250{ 1251#pragma unused(kctlref, flags) 1252 errno_t error = 0; 1253 struct cfil_msg_hdr *msghdr; 1254 struct content_filter *cfc = (struct content_filter *)unitinfo; 1255 struct socket *so; 1256 struct cfil_msg_action *action_msg; 1257 struct cfil_entry *entry; 1258 1259 CFIL_LOG(LOG_INFO, ""); 1260 1261 if (content_filters == NULL) { 1262 CFIL_LOG(LOG_ERR, "no content filter"); 1263 error = EINVAL; 1264 goto done; 1265 } 1266 if (kcunit > MAX_CONTENT_FILTER) { 1267 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", 1268 kcunit, MAX_CONTENT_FILTER); 1269 error = EINVAL; 1270 goto done; 1271 } 1272 1273 if (m_length(m) < sizeof(struct cfil_msg_hdr)) { 1274 CFIL_LOG(LOG_ERR, "too short %u", m_length(m)); 1275 error = EINVAL; 1276 goto done; 1277 } 1278 msghdr = (struct cfil_msg_hdr *)mbuf_data(m); 1279 if (msghdr->cfm_version != CFM_VERSION_CURRENT) { 1280 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version); 1281 error = EINVAL; 1282 goto done; 1283 } 1284 if (msghdr->cfm_type != CFM_TYPE_ACTION) { 1285 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type); 1286 error = EINVAL; 1287 goto done; 1288 } 1289 /* Validate action operation */ 1290 switch (msghdr->cfm_op) { 1291 case CFM_OP_DATA_UPDATE: 1292 OSIncrementAtomic( 1293 &cfil_stats.cfs_ctl_action_data_update); 1294 break; 1295 case CFM_OP_DROP: 1296 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop); 1297 break; 1298 default: 1299 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op); 1300 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op); 1301 error = EINVAL; 1302 goto done; 1303 } 1304 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) { 1305 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len); 1306 error = EINVAL; 1307 CFIL_LOG(LOG_ERR, "bad len: %u for op %u", 1308 msghdr->cfm_len, 1309 msghdr->cfm_op); 1310 goto done; 1311 } 1312 cfil_rw_lock_shared(&cfil_lck_rw); 1313 if (cfc != (void *)content_filters[kcunit - 1]) { 1314 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", 1315 kcunit); 1316 error = EINVAL; 1317 cfil_rw_unlock_shared(&cfil_lck_rw); 1318 goto done; 1319 } 1320 1321 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id); 1322 if (so == NULL) { 1323 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx", 1324 msghdr->cfm_sock_id); 1325 error = EINVAL; 1326 cfil_rw_unlock_shared(&cfil_lck_rw); 1327 goto done; 1328 } 1329 cfil_rw_unlock_shared(&cfil_lck_rw); 1330 1331 socket_lock(so, 1); 1332 1333 if (so->so_cfil == NULL) { 1334 CFIL_LOG(LOG_NOTICE, "so %llx not attached", 1335 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1336 error = EINVAL; 1337 goto unlock; 1338 } else if (so->so_cfil->cfi_flags & CFIF_DROP) { 1339 CFIL_LOG(LOG_NOTICE, "so %llx drop set", 1340 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1341 error = EINVAL; 1342 goto unlock; 1343 } 1344 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 1345 if (entry->cfe_filter == NULL) { 1346 CFIL_LOG(LOG_NOTICE, "so %llx no filter", 1347 (uint64_t)VM_KERNEL_ADDRPERM(so)); 1348 error = EINVAL; 1349 goto unlock; 1350 } 1351 1352 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) 1353 entry->cfe_flags |= CFEF_DATA_START; 1354 else { 1355 CFIL_LOG(LOG_ERR, 1356 "so %llx attached not sent for %u", 1357 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); 1358 error = EINVAL; 1359 goto unlock; 1360 } 1361 1362 microuptime(&entry->cfe_last_action); 1363 1364 action_msg = (struct cfil_msg_action *)msghdr; 1365 1366 switch (msghdr->cfm_op) { 1367 case CFM_OP_DATA_UPDATE: 1368 if (action_msg->cfa_out_peek_offset != 0 || 1369 action_msg->cfa_out_pass_offset != 0) 1370 error = cfil_action_data_pass(so, kcunit, 1, 1371 action_msg->cfa_out_pass_offset, 1372 action_msg->cfa_out_peek_offset); 1373 if (error == EJUSTRETURN) 1374 error = 0; 1375 if (error != 0) 1376 break; 1377 if (action_msg->cfa_in_peek_offset != 0 || 1378 action_msg->cfa_in_pass_offset != 0) 1379 error = cfil_action_data_pass(so, kcunit, 0, 1380 action_msg->cfa_in_pass_offset, 1381 action_msg->cfa_in_peek_offset); 1382 if (error == EJUSTRETURN) 1383 error = 0; 1384 break; 1385 1386 case CFM_OP_DROP: 1387 error = cfil_action_drop(so, kcunit); 1388 break; 1389 1390 default: 1391 error = EINVAL; 1392 break; 1393 } 1394unlock: 1395 socket_unlock(so, 1); 1396done: 1397 mbuf_freem(m); 1398 1399 if (error == 0) 1400 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok); 1401 else 1402 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad); 1403 1404 return (error); 1405} 1406 1407static errno_t 1408cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, 1409 int opt, void *data, size_t *len) 1410{ 1411#pragma unused(kctlref, opt) 1412 errno_t error = 0; 1413 struct content_filter *cfc = (struct content_filter *)unitinfo; 1414 1415 CFIL_LOG(LOG_NOTICE, ""); 1416 1417 cfil_rw_lock_shared(&cfil_lck_rw); 1418 1419 if (content_filters == NULL) { 1420 CFIL_LOG(LOG_ERR, "no content filter"); 1421 error = EINVAL; 1422 goto done; 1423 } 1424 if (kcunit > MAX_CONTENT_FILTER) { 1425 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", 1426 kcunit, MAX_CONTENT_FILTER); 1427 error = EINVAL; 1428 goto done; 1429 } 1430 if (cfc != (void *)content_filters[kcunit - 1]) { 1431 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", 1432 kcunit); 1433 error = EINVAL; 1434 goto done; 1435 } 1436 switch (opt) { 1437 case CFIL_OPT_NECP_CONTROL_UNIT: 1438 if (*len < sizeof(uint32_t)) { 1439 CFIL_LOG(LOG_ERR, "len too small %lu", *len); 1440 error = EINVAL; 1441 goto done; 1442 } 1443 if (data != NULL) 1444 *(uint32_t *)data = cfc->cf_necp_control_unit; 1445 break; 1446 default: 1447 error = ENOPROTOOPT; 1448 break; 1449 } 1450done: 1451 cfil_rw_unlock_shared(&cfil_lck_rw); 1452 1453 return (error); 1454} 1455 1456static errno_t 1457cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, 1458 int opt, void *data, size_t len) 1459{ 1460#pragma unused(kctlref, opt) 1461 errno_t error = 0; 1462 struct content_filter *cfc = (struct content_filter *)unitinfo; 1463 1464 CFIL_LOG(LOG_NOTICE, ""); 1465 1466 cfil_rw_lock_exclusive(&cfil_lck_rw); 1467 1468 if (content_filters == NULL) { 1469 CFIL_LOG(LOG_ERR, "no content filter"); 1470 error = EINVAL; 1471 goto done; 1472 } 1473 if (kcunit > MAX_CONTENT_FILTER) { 1474 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", 1475 kcunit, MAX_CONTENT_FILTER); 1476 error = EINVAL; 1477 goto done; 1478 } 1479 if (cfc != (void *)content_filters[kcunit - 1]) { 1480 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", 1481 kcunit); 1482 error = EINVAL; 1483 goto done; 1484 } 1485 switch (opt) { 1486 case CFIL_OPT_NECP_CONTROL_UNIT: 1487 if (len < sizeof(uint32_t)) { 1488 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " 1489 "len too small %lu", len); 1490 error = EINVAL; 1491 goto done; 1492 } 1493 if (cfc->cf_necp_control_unit != 0) { 1494 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT " 1495 "already set %u", 1496 cfc->cf_necp_control_unit); 1497 error = EINVAL; 1498 goto done; 1499 } 1500 cfc->cf_necp_control_unit = *(uint32_t *)data; 1501 break; 1502 default: 1503 error = ENOPROTOOPT; 1504 break; 1505 } 1506done: 1507 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1508 1509 return (error); 1510} 1511 1512 1513static void 1514cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags) 1515{ 1516#pragma unused(kctlref, flags) 1517 struct content_filter *cfc = (struct content_filter *)unitinfo; 1518 struct socket *so = NULL; 1519 int error; 1520 struct cfil_entry *entry; 1521 1522 CFIL_LOG(LOG_INFO, ""); 1523 1524 if (content_filters == NULL) { 1525 CFIL_LOG(LOG_ERR, "no content filter"); 1526 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); 1527 return; 1528 } 1529 if (kcunit > MAX_CONTENT_FILTER) { 1530 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)", 1531 kcunit, MAX_CONTENT_FILTER); 1532 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); 1533 return; 1534 } 1535 cfil_rw_lock_shared(&cfil_lck_rw); 1536 if (cfc != (void *)content_filters[kcunit - 1]) { 1537 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u", 1538 kcunit); 1539 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad); 1540 goto done; 1541 } 1542 /* Let's assume the flow control is lifted */ 1543 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { 1544 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) 1545 cfil_rw_lock_exclusive(&cfil_lck_rw); 1546 1547 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED; 1548 1549 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw); 1550 lck_rw_assert(&cfil_lck_rw, LCK_RW_ASSERT_SHARED); 1551 } 1552 /* 1553 * Flow control will be raised again as soon as an entry cannot enqueue 1554 * to the kernel control socket 1555 */ 1556 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) { 1557 verify_content_filter(cfc); 1558 1559 cfil_rw_lock_assert_held(&cfil_lck_rw, 0); 1560 1561 /* Find an entry that is flow controlled */ 1562 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) { 1563 if (entry->cfe_cfil_info == NULL || 1564 entry->cfe_cfil_info->cfi_so == NULL) 1565 continue; 1566 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) 1567 continue; 1568 } 1569 if (entry == NULL) 1570 break; 1571 1572 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift); 1573 1574 so = entry->cfe_cfil_info->cfi_so; 1575 1576 cfil_rw_unlock_shared(&cfil_lck_rw); 1577 socket_lock(so, 1); 1578 1579 do { 1580 error = cfil_acquire_sockbuf(so, 1); 1581 if (error == 0) 1582 error = cfil_data_service_ctl_q(so, kcunit, 1); 1583 cfil_release_sockbuf(so, 1); 1584 if (error != 0) 1585 break; 1586 1587 error = cfil_acquire_sockbuf(so, 0); 1588 if (error == 0) 1589 error = cfil_data_service_ctl_q(so, kcunit, 0); 1590 cfil_release_sockbuf(so, 0); 1591 } while (0); 1592 1593 socket_lock_assert_owned(so); 1594 socket_unlock(so, 1); 1595 1596 cfil_rw_lock_shared(&cfil_lck_rw); 1597 } 1598done: 1599 cfil_rw_unlock_shared(&cfil_lck_rw); 1600} 1601 1602void 1603cfil_init(void) 1604{ 1605 struct kern_ctl_reg kern_ctl; 1606 errno_t error = 0; 1607 vm_size_t content_filter_size = 0; /* size of content_filter */ 1608 vm_size_t cfil_info_size = 0; /* size of cfil_info */ 1609 1610 CFIL_LOG(LOG_NOTICE, ""); 1611 1612 /* 1613 * Compile time verifications 1614 */ 1615 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER); 1616 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0); 1617 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0); 1618 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0); 1619 1620 /* 1621 * Runtime time verifications 1622 */ 1623 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued, 1624 sizeof(uint32_t))); 1625 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued, 1626 sizeof(uint32_t))); 1627 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked, 1628 sizeof(uint32_t))); 1629 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked, 1630 sizeof(uint32_t))); 1631 1632 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued, 1633 sizeof(uint32_t))); 1634 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued, 1635 sizeof(uint32_t))); 1636 1637 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued, 1638 sizeof(uint32_t))); 1639 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued, 1640 sizeof(uint32_t))); 1641 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed, 1642 sizeof(uint32_t))); 1643 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed, 1644 sizeof(uint32_t))); 1645 1646 /* 1647 * Zone for content filters kernel control sockets 1648 */ 1649 content_filter_size = sizeof(struct content_filter); 1650 content_filter_zone = zinit(content_filter_size, 1651 CONTENT_FILTER_ZONE_MAX * content_filter_size, 1652 0, 1653 CONTENT_FILTER_ZONE_NAME); 1654 if (content_filter_zone == NULL) { 1655 panic("%s: zinit(%s) failed", __func__, 1656 CONTENT_FILTER_ZONE_NAME); 1657 /* NOTREACHED */ 1658 } 1659 zone_change(content_filter_zone, Z_CALLERACCT, FALSE); 1660 zone_change(content_filter_zone, Z_EXPAND, TRUE); 1661 1662 /* 1663 * Zone for per socket content filters 1664 */ 1665 cfil_info_size = sizeof(struct cfil_info); 1666 cfil_info_zone = zinit(cfil_info_size, 1667 CFIL_INFO_ZONE_MAX * cfil_info_size, 1668 0, 1669 CFIL_INFO_ZONE_NAME); 1670 if (cfil_info_zone == NULL) { 1671 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME); 1672 /* NOTREACHED */ 1673 } 1674 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE); 1675 zone_change(cfil_info_zone, Z_EXPAND, TRUE); 1676 1677 /* 1678 * Allocate locks 1679 */ 1680 cfil_lck_grp_attr = lck_grp_attr_alloc_init(); 1681 if (cfil_lck_grp_attr == NULL) { 1682 panic("%s: lck_grp_attr_alloc_init failed", __func__); 1683 /* NOTREACHED */ 1684 } 1685 cfil_lck_grp = lck_grp_alloc_init("content filter", 1686 cfil_lck_grp_attr); 1687 if (cfil_lck_grp == NULL) { 1688 panic("%s: lck_grp_alloc_init failed", __func__); 1689 /* NOTREACHED */ 1690 } 1691 cfil_lck_attr = lck_attr_alloc_init(); 1692 if (cfil_lck_attr == NULL) { 1693 panic("%s: lck_attr_alloc_init failed", __func__); 1694 /* NOTREACHED */ 1695 } 1696 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr); 1697 1698 TAILQ_INIT(&cfil_sock_head); 1699 1700 /* 1701 * Register kernel control 1702 */ 1703 bzero(&kern_ctl, sizeof(kern_ctl)); 1704 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME, 1705 sizeof(kern_ctl.ctl_name)); 1706 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; 1707 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */ 1708 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */ 1709 kern_ctl.ctl_connect = cfil_ctl_connect; 1710 kern_ctl.ctl_disconnect = cfil_ctl_disconnect; 1711 kern_ctl.ctl_send = cfil_ctl_send; 1712 kern_ctl.ctl_getopt = cfil_ctl_getopt; 1713 kern_ctl.ctl_setopt = cfil_ctl_setopt; 1714 kern_ctl.ctl_rcvd = cfil_ctl_rcvd; 1715 error = ctl_register(&kern_ctl, &cfil_kctlref); 1716 if (error != 0) { 1717 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error); 1718 return; 1719 } 1720} 1721 1722struct cfil_info * 1723cfil_info_alloc(struct socket *so) 1724{ 1725 int kcunit; 1726 struct cfil_info *cfil_info = NULL; 1727 struct inpcb *inp = sotoinpcb(so); 1728 1729 CFIL_LOG(LOG_INFO, ""); 1730 1731 socket_lock_assert_owned(so); 1732 1733 cfil_info = zalloc(cfil_info_zone); 1734 if (cfil_info == NULL) 1735 goto done; 1736 bzero(cfil_info, sizeof(struct cfil_info)); 1737 1738 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q); 1739 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q); 1740 1741 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 1742 struct cfil_entry *entry; 1743 1744 entry = &cfil_info->cfi_entries[kcunit - 1]; 1745 entry->cfe_cfil_info = cfil_info; 1746 1747 /* Initialize the filter entry */ 1748 entry->cfe_filter = NULL; 1749 entry->cfe_flags = 0; 1750 entry->cfe_necp_control_unit = 0; 1751 entry->cfe_snd.cfe_pass_offset = 0; 1752 entry->cfe_snd.cfe_peek_offset = 0; 1753 entry->cfe_snd.cfe_peeked = 0; 1754 entry->cfe_rcv.cfe_pass_offset = 0; 1755 entry->cfe_rcv.cfe_peek_offset = 0; 1756 entry->cfe_rcv.cfe_peeked = 0; 1757 1758 cfil_queue_init(&entry->cfe_snd.cfe_pending_q); 1759 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q); 1760 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q); 1761 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q); 1762 } 1763 1764 cfil_rw_lock_exclusive(&cfil_lck_rw); 1765 1766 so->so_cfil = cfil_info; 1767 cfil_info->cfi_so = so; 1768 /* 1769 * Create a cfi_sock_id that's not the socket pointer! 1770 */ 1771 if (inp->inp_flowhash == 0) 1772 inp->inp_flowhash = inp_calc_flowhash(inp); 1773 cfil_info->cfi_sock_id = 1774 ((so->so_gencnt << 32) | inp->inp_flowhash); 1775 1776 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link); 1777 1778 cfil_sock_attached_count++; 1779 1780 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1781 1782done: 1783 if (cfil_info != NULL) 1784 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok); 1785 else 1786 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail); 1787 1788 return (cfil_info); 1789} 1790 1791int 1792cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit) 1793{ 1794 int kcunit; 1795 struct cfil_info *cfil_info = so->so_cfil; 1796 int attached = 0; 1797 1798 CFIL_LOG(LOG_INFO, ""); 1799 1800 socket_lock_assert_owned(so); 1801 1802 cfil_rw_lock_exclusive(&cfil_lck_rw); 1803 1804 for (kcunit = 1; 1805 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; 1806 kcunit++) { 1807 struct content_filter *cfc = content_filters[kcunit - 1]; 1808 struct cfil_entry *entry; 1809 1810 if (cfc == NULL) 1811 continue; 1812 if (cfc->cf_necp_control_unit != filter_control_unit) 1813 continue; 1814 1815 entry = &cfil_info->cfi_entries[kcunit - 1]; 1816 1817 entry->cfe_filter = cfc; 1818 entry->cfe_necp_control_unit = filter_control_unit; 1819 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link); 1820 cfc->cf_sock_count++; 1821 verify_content_filter(cfc); 1822 attached = 1; 1823 entry->cfe_flags |= CFEF_CFIL_ATTACHED; 1824 break; 1825 } 1826 1827 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1828 1829 return (attached); 1830} 1831 1832static void 1833cfil_info_free(struct socket *so, struct cfil_info *cfil_info) 1834{ 1835 int kcunit; 1836 uint64_t in_drain = 0; 1837 uint64_t out_drained = 0; 1838 1839 so->so_cfil = NULL; 1840 1841 if (so->so_flags & SOF_CONTENT_FILTER) { 1842 so->so_flags &= ~SOF_CONTENT_FILTER; 1843 so->so_usecount--; 1844 } 1845 if (cfil_info == NULL) 1846 return; 1847 1848 CFIL_LOG(LOG_INFO, ""); 1849 1850 cfil_rw_lock_exclusive(&cfil_lck_rw); 1851 1852 for (kcunit = 1; 1853 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER; 1854 kcunit++) { 1855 struct cfil_entry *entry; 1856 struct content_filter *cfc; 1857 1858 entry = &cfil_info->cfi_entries[kcunit - 1]; 1859 1860 /* Don't be silly and try to detach twice */ 1861 if (entry->cfe_filter == NULL) 1862 continue; 1863 1864 cfc = content_filters[kcunit - 1]; 1865 1866 VERIFY(cfc == entry->cfe_filter); 1867 1868 entry->cfe_filter = NULL; 1869 entry->cfe_necp_control_unit = 0; 1870 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link); 1871 cfc->cf_sock_count--; 1872 1873 verify_content_filter(cfc); 1874 } 1875 cfil_sock_attached_count--; 1876 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link); 1877 1878 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q); 1879 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q); 1880 1881 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 1882 struct cfil_entry *entry; 1883 1884 entry = &cfil_info->cfi_entries[kcunit - 1]; 1885 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q); 1886 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q); 1887 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); 1888 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q); 1889 } 1890 cfil_rw_unlock_exclusive(&cfil_lck_rw); 1891 1892 if (out_drained) 1893 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free); 1894 if (in_drain) 1895 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free); 1896 1897 zfree(cfil_info_zone, cfil_info); 1898} 1899 1900/* 1901 * Entry point from Sockets layer 1902 * The socket is locked. 1903 */ 1904errno_t 1905cfil_sock_attach(struct socket *so) 1906{ 1907 errno_t error = 0; 1908 uint32_t filter_control_unit; 1909 1910 socket_lock_assert_owned(so); 1911 1912 /* Limit ourselves to TCP */ 1913 if ((so->so_proto->pr_domain->dom_family != PF_INET && 1914 so->so_proto->pr_domain->dom_family != PF_INET6) || 1915 so->so_proto->pr_type != SOCK_STREAM || 1916 so->so_proto->pr_protocol != IPPROTO_TCP) 1917 goto done; 1918 1919 filter_control_unit = necp_socket_get_content_filter_control_unit(so); 1920 if (filter_control_unit == 0) 1921 goto done; 1922 1923 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) { 1924 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only); 1925 goto done; 1926 } 1927 if (cfil_active_count == 0) { 1928 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain); 1929 goto done; 1930 } 1931 if (so->so_cfil != NULL) { 1932 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already); 1933 CFIL_LOG(LOG_ERR, "already attached"); 1934 } else { 1935 cfil_info_alloc(so); 1936 if (so->so_cfil == NULL) { 1937 error = ENOMEM; 1938 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem); 1939 goto done; 1940 } 1941 } 1942 if (cfil_info_attach_unit(so, filter_control_unit) == 0) { 1943 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed", 1944 filter_control_unit); 1945 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed); 1946 goto done; 1947 } 1948 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx", 1949 (uint64_t)VM_KERNEL_ADDRPERM(so), 1950 filter_control_unit, so->so_cfil->cfi_sock_id); 1951 1952 so->so_flags |= SOF_CONTENT_FILTER; 1953 OSIncrementAtomic(&cfil_stats.cfs_sock_attached); 1954 1955 /* Hold a reference on the socket */ 1956 so->so_usecount++; 1957 1958 error = cfil_dispatch_attach_event(so, filter_control_unit); 1959 /* We can recover from flow control or out of memory errors */ 1960 if (error == ENOBUFS || error == ENOMEM) 1961 error = 0; 1962 else if (error != 0) 1963 goto done; 1964 1965 CFIL_INFO_VERIFY(so->so_cfil); 1966done: 1967 return (error); 1968} 1969 1970/* 1971 * Entry point from Sockets layer 1972 * The socket is locked. 1973 */ 1974errno_t 1975cfil_sock_detach(struct socket *so) 1976{ 1977 if (so->so_cfil) { 1978 cfil_info_free(so, so->so_cfil); 1979 OSIncrementAtomic(&cfil_stats.cfs_sock_detached); 1980 } 1981 return (0); 1982} 1983 1984static int 1985cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit) 1986{ 1987 errno_t error = 0; 1988 struct cfil_entry *entry = NULL; 1989 struct cfil_msg_sock_attached msg_attached; 1990 uint32_t kcunit; 1991 struct content_filter *cfc; 1992 1993 socket_lock_assert_owned(so); 1994 1995 cfil_rw_lock_shared(&cfil_lck_rw); 1996 1997 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) { 1998 error = EINVAL; 1999 goto done; 2000 } 2001 /* 2002 * Find the matching filter unit 2003 */ 2004 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 2005 cfc = content_filters[kcunit - 1]; 2006 2007 if (cfc == NULL) 2008 continue; 2009 if (cfc->cf_necp_control_unit != filter_control_unit) 2010 continue; 2011 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2012 if (entry->cfe_filter == NULL) 2013 continue; 2014 2015 VERIFY(cfc == entry->cfe_filter); 2016 2017 break; 2018 } 2019 2020 if (entry == NULL || entry->cfe_filter == NULL) 2021 goto done; 2022 2023 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) 2024 goto done; 2025 2026 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u", 2027 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit); 2028 2029 /* Would be wasteful to try when flow controlled */ 2030 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { 2031 error = ENOBUFS; 2032 goto done; 2033 } 2034 2035 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached)); 2036 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached); 2037 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT; 2038 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT; 2039 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED; 2040 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; 2041 2042 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family; 2043 msg_attached.cfs_sock_type = so->so_proto->pr_type; 2044 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol; 2045 msg_attached.cfs_pid = so->last_pid; 2046 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t)); 2047 if (so->so_flags & SOF_DELEGATED) { 2048 msg_attached.cfs_e_pid = so->e_pid; 2049 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t)); 2050 } else { 2051 msg_attached.cfs_e_pid = so->last_pid; 2052 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t)); 2053 } 2054 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, 2055 entry->cfe_filter->cf_kcunit, 2056 &msg_attached, 2057 sizeof(struct cfil_msg_sock_attached), 2058 CTL_DATA_EOR); 2059 if (error != 0) { 2060 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error); 2061 goto done; 2062 } 2063 microuptime(&entry->cfe_last_event); 2064 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED; 2065 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok); 2066done: 2067 2068 /* We can recover from flow control */ 2069 if (error == ENOBUFS) { 2070 entry->cfe_flags |= CFEF_FLOW_CONTROLLED; 2071 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control); 2072 2073 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) 2074 cfil_rw_lock_exclusive(&cfil_lck_rw); 2075 2076 cfc->cf_flags |= CFF_FLOW_CONTROLLED; 2077 2078 cfil_rw_unlock_exclusive(&cfil_lck_rw); 2079 } else { 2080 if (error != 0) 2081 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail); 2082 2083 cfil_rw_unlock_shared(&cfil_lck_rw); 2084 } 2085 return (error); 2086} 2087 2088static int 2089cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing) 2090{ 2091 errno_t error = 0; 2092 struct mbuf *msg = NULL; 2093 struct cfil_entry *entry; 2094 struct cfe_buf *entrybuf; 2095 struct cfil_msg_hdr msg_disconnected; 2096 struct content_filter *cfc; 2097 2098 socket_lock_assert_owned(so); 2099 2100 cfil_rw_lock_shared(&cfil_lck_rw); 2101 2102 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2103 if (outgoing) 2104 entrybuf = &entry->cfe_snd; 2105 else 2106 entrybuf = &entry->cfe_rcv; 2107 2108 cfc = entry->cfe_filter; 2109 if (cfc == NULL) 2110 goto done; 2111 2112 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", 2113 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); 2114 2115 /* 2116 * Send the disconnection event once 2117 */ 2118 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) || 2119 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) { 2120 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent", 2121 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2122 goto done; 2123 } 2124 2125 /* 2126 * We're not disconnected as long as some data is waiting 2127 * to be delivered to the filter 2128 */ 2129 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) { 2130 CFIL_LOG(LOG_INFO, "so %llx control queue not empty", 2131 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2132 error = EBUSY; 2133 goto done; 2134 } 2135 /* Would be wasteful to try when flow controlled */ 2136 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { 2137 error = ENOBUFS; 2138 goto done; 2139 } 2140 2141 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr)); 2142 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr); 2143 msg_disconnected.cfm_version = CFM_VERSION_CURRENT; 2144 msg_disconnected.cfm_type = CFM_TYPE_EVENT; 2145 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT : 2146 CFM_OP_DISCONNECT_IN; 2147 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; 2148 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, 2149 entry->cfe_filter->cf_kcunit, 2150 &msg_disconnected, 2151 sizeof(struct cfil_msg_hdr), 2152 CTL_DATA_EOR); 2153 if (error != 0) { 2154 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); 2155 mbuf_freem(msg); 2156 goto done; 2157 } 2158 microuptime(&entry->cfe_last_event); 2159 2160 /* Remember we have sent the disconnection message */ 2161 if (outgoing) { 2162 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT; 2163 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok); 2164 } else { 2165 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN; 2166 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok); 2167 } 2168done: 2169 if (error == ENOBUFS) { 2170 entry->cfe_flags |= CFEF_FLOW_CONTROLLED; 2171 OSIncrementAtomic( 2172 &cfil_stats.cfs_disconnect_event_flow_control); 2173 2174 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) 2175 cfil_rw_lock_exclusive(&cfil_lck_rw); 2176 2177 cfc->cf_flags |= CFF_FLOW_CONTROLLED; 2178 2179 cfil_rw_unlock_exclusive(&cfil_lck_rw); 2180 } else { 2181 if (error != 0) 2182 OSIncrementAtomic( 2183 &cfil_stats.cfs_disconnect_event_fail); 2184 2185 cfil_rw_unlock_shared(&cfil_lck_rw); 2186 } 2187 return (error); 2188} 2189 2190int 2191cfil_dispatch_closed_event(struct socket *so, int kcunit) 2192{ 2193 struct cfil_entry *entry; 2194 struct cfil_msg_hdr msg_closed; 2195 errno_t error = 0; 2196 struct content_filter *cfc; 2197 2198 socket_lock_assert_owned(so); 2199 2200 cfil_rw_lock_shared(&cfil_lck_rw); 2201 2202 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2203 cfc = entry->cfe_filter; 2204 if (cfc == NULL) 2205 goto done; 2206 2207 CFIL_LOG(LOG_INFO, "so %llx kcunit %d", 2208 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); 2209 2210 /* Would be wasteful to try when flow controlled */ 2211 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { 2212 error = ENOBUFS; 2213 goto done; 2214 } 2215 /* 2216 * Send a single closed message per filter 2217 */ 2218 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) 2219 goto done; 2220 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) 2221 goto done; 2222 2223 bzero(&msg_closed, sizeof(struct cfil_msg_hdr)); 2224 msg_closed.cfm_len = sizeof(struct cfil_msg_hdr); 2225 msg_closed.cfm_version = CFM_VERSION_CURRENT; 2226 msg_closed.cfm_type = CFM_TYPE_EVENT; 2227 msg_closed.cfm_op = CFM_OP_SOCKET_CLOSED; 2228 msg_closed.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id; 2229 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref, 2230 entry->cfe_filter->cf_kcunit, 2231 &msg_closed, 2232 sizeof(struct cfil_msg_hdr), 2233 CTL_DATA_EOR); 2234 if (error != 0) { 2235 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", 2236 error); 2237 goto done; 2238 } 2239 microuptime(&entry->cfe_last_event); 2240 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED; 2241 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok); 2242done: 2243 /* We can recover from flow control */ 2244 if (error == ENOBUFS) { 2245 entry->cfe_flags |= CFEF_FLOW_CONTROLLED; 2246 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control); 2247 2248 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) 2249 cfil_rw_lock_exclusive(&cfil_lck_rw); 2250 2251 cfc->cf_flags |= CFF_FLOW_CONTROLLED; 2252 2253 cfil_rw_unlock_exclusive(&cfil_lck_rw); 2254 } else { 2255 if (error != 0) 2256 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail); 2257 2258 cfil_rw_unlock_shared(&cfil_lck_rw); 2259 } 2260 2261 return (error); 2262} 2263 2264static void 2265fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46, 2266 struct in6_addr *ip6, u_int16_t port) 2267{ 2268 struct sockaddr_in6 *sin6 = &sin46->sin6; 2269 2270 sin6->sin6_family = AF_INET6; 2271 sin6->sin6_len = sizeof(*sin6); 2272 sin6->sin6_port = port; 2273 sin6->sin6_addr = *ip6; 2274 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) { 2275 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 2276 sin6->sin6_addr.s6_addr16[1] = 0; 2277 } 2278} 2279 2280static void 2281fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46, 2282 struct in_addr ip, u_int16_t port) 2283{ 2284 struct sockaddr_in *sin = &sin46->sin; 2285 2286 sin->sin_family = AF_INET; 2287 sin->sin_len = sizeof(*sin); 2288 sin->sin_port = port; 2289 sin->sin_addr.s_addr = ip.s_addr; 2290} 2291 2292static int 2293cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing, 2294 struct mbuf *data, unsigned int copyoffset, unsigned int copylen) 2295{ 2296 errno_t error = 0; 2297 struct mbuf *copy = NULL; 2298 struct mbuf *msg = NULL; 2299 unsigned int one = 1; 2300 struct cfil_msg_data_event *data_req; 2301 size_t hdrsize; 2302 struct inpcb *inp = (struct inpcb *)so->so_pcb; 2303 struct cfil_entry *entry; 2304 struct cfe_buf *entrybuf; 2305 struct content_filter *cfc; 2306 2307 cfil_rw_lock_shared(&cfil_lck_rw); 2308 2309 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2310 if (outgoing) 2311 entrybuf = &entry->cfe_snd; 2312 else 2313 entrybuf = &entry->cfe_rcv; 2314 2315 cfc = entry->cfe_filter; 2316 if (cfc == NULL) 2317 goto done; 2318 2319 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", 2320 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); 2321 2322 socket_lock_assert_owned(so); 2323 2324 /* Would be wasteful to try */ 2325 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) { 2326 error = ENOBUFS; 2327 goto done; 2328 } 2329 2330 /* Make a copy of the data to pass to kernel control socket */ 2331 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, 2332 M_COPYM_NOOP_HDR); 2333 if (copy == NULL) { 2334 CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); 2335 error = ENOMEM; 2336 goto done; 2337 } 2338 2339 /* We need an mbuf packet for the message header */ 2340 hdrsize = sizeof(struct cfil_msg_data_event); 2341 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg); 2342 if (error != 0) { 2343 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed"); 2344 m_freem(copy); 2345 /* 2346 * ENOBUFS is to indicate flow control 2347 */ 2348 error = ENOMEM; 2349 goto done; 2350 } 2351 mbuf_setlen(msg, hdrsize); 2352 mbuf_pkthdr_setlen(msg, hdrsize + copylen); 2353 msg->m_next = copy; 2354 data_req = (struct cfil_msg_data_event *)mbuf_data(msg); 2355 bzero(data_req, hdrsize); 2356 data_req->cfd_msghdr.cfm_len = hdrsize + copylen; 2357 data_req->cfd_msghdr.cfm_version = 1; 2358 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT; 2359 data_req->cfd_msghdr.cfm_op = 2360 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN; 2361 data_req->cfd_msghdr.cfm_sock_id = 2362 entry->cfe_cfil_info->cfi_sock_id; 2363 data_req->cfd_start_offset = entrybuf->cfe_peeked; 2364 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen; 2365 2366 /* 2367 * TBD: 2368 * For non connected sockets need to copy addresses from passed 2369 * parameters 2370 */ 2371 if (inp->inp_vflag & INP_IPV6) { 2372 if (outgoing) { 2373 fill_ip6_sockaddr_4_6(&data_req->cfc_src, 2374 &inp->in6p_laddr, inp->inp_lport); 2375 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, 2376 &inp->in6p_faddr, inp->inp_fport); 2377 } else { 2378 fill_ip6_sockaddr_4_6(&data_req->cfc_src, 2379 &inp->in6p_faddr, inp->inp_fport); 2380 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, 2381 &inp->in6p_laddr, inp->inp_lport); 2382 } 2383 } else if (inp->inp_vflag & INP_IPV4) { 2384 if (outgoing) { 2385 fill_ip_sockaddr_4_6(&data_req->cfc_src, 2386 inp->inp_laddr, inp->inp_lport); 2387 fill_ip_sockaddr_4_6(&data_req->cfc_dst, 2388 inp->inp_faddr, inp->inp_fport); 2389 } else { 2390 fill_ip_sockaddr_4_6(&data_req->cfc_src, 2391 inp->inp_faddr, inp->inp_fport); 2392 fill_ip_sockaddr_4_6(&data_req->cfc_dst, 2393 inp->inp_laddr, inp->inp_lport); 2394 } 2395 } 2396 2397 /* Pass the message to the content filter */ 2398 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref, 2399 entry->cfe_filter->cf_kcunit, 2400 msg, CTL_DATA_EOR); 2401 if (error != 0) { 2402 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error); 2403 mbuf_freem(msg); 2404 goto done; 2405 } 2406 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED; 2407 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok); 2408done: 2409 if (error == ENOBUFS) { 2410 entry->cfe_flags |= CFEF_FLOW_CONTROLLED; 2411 OSIncrementAtomic( 2412 &cfil_stats.cfs_data_event_flow_control); 2413 2414 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) 2415 cfil_rw_lock_exclusive(&cfil_lck_rw); 2416 2417 cfc->cf_flags |= CFF_FLOW_CONTROLLED; 2418 2419 cfil_rw_unlock_exclusive(&cfil_lck_rw); 2420 } else { 2421 if (error != 0) 2422 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail); 2423 2424 cfil_rw_unlock_shared(&cfil_lck_rw); 2425 } 2426 return (error); 2427} 2428 2429/* 2430 * Process the queue of data waiting to be delivered to content filter 2431 */ 2432static int 2433cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing) 2434{ 2435 errno_t error = 0; 2436 struct mbuf *data, *tmp = NULL; 2437 unsigned int datalen = 0, copylen = 0, copyoffset = 0; 2438 struct cfil_entry *entry; 2439 struct cfe_buf *entrybuf; 2440 uint64_t currentoffset = 0; 2441 2442 if (so->so_cfil == NULL) 2443 return (0); 2444 2445 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", 2446 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); 2447 2448 socket_lock_assert_owned(so); 2449 2450 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2451 if (outgoing) 2452 entrybuf = &entry->cfe_snd; 2453 else 2454 entrybuf = &entry->cfe_rcv; 2455 2456 /* Send attached message if not yet done */ 2457 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) { 2458 error = cfil_dispatch_attach_event(so, kcunit); 2459 if (error != 0) { 2460 /* We can recover from flow control */ 2461 if (error == ENOBUFS || error == ENOMEM) 2462 error = 0; 2463 goto done; 2464 } 2465 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) { 2466 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started); 2467 goto done; 2468 } 2469 CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu", 2470 entrybuf->cfe_pass_offset, 2471 entrybuf->cfe_peeked, 2472 entrybuf->cfe_peek_offset); 2473 2474 /* Move all data that can pass */ 2475 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL && 2476 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) { 2477 datalen = cfil_data_length(data, NULL); 2478 tmp = data; 2479 2480 if (entrybuf->cfe_ctl_q.q_start + datalen <= 2481 entrybuf->cfe_pass_offset) { 2482 /* 2483 * The first mbuf can fully pass 2484 */ 2485 copylen = datalen; 2486 } else { 2487 /* 2488 * The first mbuf can partially pass 2489 */ 2490 copylen = entrybuf->cfe_pass_offset - 2491 entrybuf->cfe_ctl_q.q_start; 2492 } 2493 VERIFY(copylen <= datalen); 2494 2495 CFIL_LOG(LOG_DEBUG, 2496 "%llx first %llu peeked %llu pass %llu peek %llu" 2497 "datalen %u copylen %u", 2498 (uint64_t)VM_KERNEL_ADDRPERM(tmp), 2499 entrybuf->cfe_ctl_q.q_start, 2500 entrybuf->cfe_peeked, 2501 entrybuf->cfe_pass_offset, 2502 entrybuf->cfe_peek_offset, 2503 datalen, copylen); 2504 2505 /* 2506 * Data that passes has been peeked at explicitly or 2507 * implicitly 2508 */ 2509 if (entrybuf->cfe_ctl_q.q_start + copylen > 2510 entrybuf->cfe_peeked) 2511 entrybuf->cfe_peeked = 2512 entrybuf->cfe_ctl_q.q_start + copylen; 2513 /* 2514 * Stop on partial pass 2515 */ 2516 if (copylen < datalen) 2517 break; 2518 2519 /* All good, move full data from ctl queue to pending queue */ 2520 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen); 2521 2522 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen); 2523 if (outgoing) 2524 OSAddAtomic64(datalen, 2525 &cfil_stats.cfs_pending_q_out_enqueued); 2526 else 2527 OSAddAtomic64(datalen, 2528 &cfil_stats.cfs_pending_q_in_enqueued); 2529 } 2530 CFIL_INFO_VERIFY(so->so_cfil); 2531 if (tmp != NULL) 2532 CFIL_LOG(LOG_DEBUG, 2533 "%llx first %llu peeked %llu pass %llu peek %llu" 2534 "datalen %u copylen %u", 2535 (uint64_t)VM_KERNEL_ADDRPERM(tmp), 2536 entrybuf->cfe_ctl_q.q_start, 2537 entrybuf->cfe_peeked, 2538 entrybuf->cfe_pass_offset, 2539 entrybuf->cfe_peek_offset, 2540 datalen, copylen); 2541 tmp = NULL; 2542 2543 /* Now deal with remaining data the filter wants to peek at */ 2544 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q), 2545 currentoffset = entrybuf->cfe_ctl_q.q_start; 2546 data != NULL && currentoffset < entrybuf->cfe_peek_offset; 2547 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data), 2548 currentoffset += datalen) { 2549 datalen = cfil_data_length(data, NULL); 2550 tmp = data; 2551 2552 /* We've already peeked at this mbuf */ 2553 if (currentoffset + datalen <= entrybuf->cfe_peeked) 2554 continue; 2555 /* 2556 * The data in the first mbuf may have been 2557 * partially peeked at 2558 */ 2559 copyoffset = entrybuf->cfe_peeked - currentoffset; 2560 VERIFY(copyoffset < datalen); 2561 copylen = datalen - copyoffset; 2562 VERIFY(copylen <= datalen); 2563 /* 2564 * Do not copy more than needed 2565 */ 2566 if (currentoffset + copyoffset + copylen > 2567 entrybuf->cfe_peek_offset) { 2568 copylen = entrybuf->cfe_peek_offset - 2569 (currentoffset + copyoffset); 2570 } 2571 2572 CFIL_LOG(LOG_DEBUG, 2573 "%llx current %llu peeked %llu pass %llu peek %llu" 2574 "datalen %u copylen %u copyoffset %u", 2575 (uint64_t)VM_KERNEL_ADDRPERM(tmp), 2576 currentoffset, 2577 entrybuf->cfe_peeked, 2578 entrybuf->cfe_pass_offset, 2579 entrybuf->cfe_peek_offset, 2580 datalen, copylen, copyoffset); 2581 2582 /* 2583 * Stop if there is nothing more to peek at 2584 */ 2585 if (copylen == 0) 2586 break; 2587 /* 2588 * Let the filter get a peek at this span of data 2589 */ 2590 error = cfil_dispatch_data_event(so, kcunit, 2591 outgoing, data, copyoffset, copylen); 2592 if (error != 0) { 2593 /* On error, leave data in ctl_q */ 2594 break; 2595 } 2596 entrybuf->cfe_peeked += copylen; 2597 if (outgoing) 2598 OSAddAtomic64(copylen, 2599 &cfil_stats.cfs_ctl_q_out_peeked); 2600 else 2601 OSAddAtomic64(copylen, 2602 &cfil_stats.cfs_ctl_q_in_peeked); 2603 2604 /* Stop when data could not be fully peeked at */ 2605 if (copylen + copyoffset < datalen) 2606 break; 2607 } 2608 CFIL_INFO_VERIFY(so->so_cfil); 2609 if (tmp != NULL) 2610 CFIL_LOG(LOG_DEBUG, 2611 "%llx first %llu peeked %llu pass %llu peek %llu" 2612 "datalen %u copylen %u copyoffset %u", 2613 (uint64_t)VM_KERNEL_ADDRPERM(tmp), 2614 currentoffset, 2615 entrybuf->cfe_peeked, 2616 entrybuf->cfe_pass_offset, 2617 entrybuf->cfe_peek_offset, 2618 datalen, copylen, copyoffset); 2619 2620 /* 2621 * Process data that has passed the filter 2622 */ 2623 error = cfil_service_pending_queue(so, kcunit, outgoing); 2624 if (error != 0) { 2625 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d", 2626 error); 2627 goto done; 2628 } 2629 2630 /* 2631 * Dispatch disconnect events that could not be sent 2632 */ 2633 if (so->so_cfil == NULL) 2634 goto done; 2635 else if (outgoing) { 2636 if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) && 2637 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) 2638 cfil_dispatch_disconnect_event(so, kcunit, 1); 2639 } else { 2640 if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) && 2641 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) 2642 cfil_dispatch_disconnect_event(so, kcunit, 0); 2643 } 2644 2645done: 2646 CFIL_LOG(LOG_DEBUG, 2647 "first %llu peeked %llu pass %llu peek %llu", 2648 entrybuf->cfe_ctl_q.q_start, 2649 entrybuf->cfe_peeked, 2650 entrybuf->cfe_pass_offset, 2651 entrybuf->cfe_peek_offset); 2652 2653 CFIL_INFO_VERIFY(so->so_cfil); 2654 return (error); 2655} 2656 2657/* 2658 * cfil_data_filter() 2659 * 2660 * Process data for a content filter installed on a socket 2661 */ 2662int 2663cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing, 2664 struct mbuf *data, uint64_t datalen) 2665{ 2666 errno_t error = 0; 2667 struct cfil_entry *entry; 2668 struct cfe_buf *entrybuf; 2669 2670 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", 2671 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); 2672 2673 socket_lock_assert_owned(so); 2674 2675 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2676 if (outgoing) 2677 entrybuf = &entry->cfe_snd; 2678 else 2679 entrybuf = &entry->cfe_rcv; 2680 2681 /* Are we attached to the filter? */ 2682 if (entry->cfe_filter == NULL) { 2683 error = 0; 2684 goto done; 2685 } 2686 2687 /* Dispatch to filters */ 2688 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen); 2689 if (outgoing) 2690 OSAddAtomic64(datalen, 2691 &cfil_stats.cfs_ctl_q_out_enqueued); 2692 else 2693 OSAddAtomic64(datalen, 2694 &cfil_stats.cfs_ctl_q_in_enqueued); 2695 2696 error = cfil_data_service_ctl_q(so, kcunit, outgoing); 2697 if (error != 0) { 2698 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", 2699 error); 2700 } 2701 /* 2702 * We have to return EJUSTRETURN in all cases to avoid double free 2703 * by socket layer 2704 */ 2705 error = EJUSTRETURN; 2706done: 2707 CFIL_INFO_VERIFY(so->so_cfil); 2708 2709 CFIL_LOG(LOG_INFO, "return %d", error); 2710 return (error); 2711} 2712 2713/* 2714 * cfil_service_inject_queue() re-inject data that passed the 2715 * content filters 2716 */ 2717static int 2718cfil_service_inject_queue(struct socket *so, int outgoing) 2719{ 2720 mbuf_t data; 2721 unsigned int datalen; 2722 int mbcnt; 2723 unsigned int copylen; 2724 errno_t error = 0; 2725 struct mbuf *copy = NULL; 2726 struct cfi_buf *cfi_buf; 2727 struct cfil_queue *inject_q; 2728 int need_rwakeup = 0; 2729 2730 if (so->so_cfil == NULL) 2731 return (0); 2732 2733 CFIL_LOG(LOG_INFO, "so %llx outgoing %d", 2734 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); 2735 2736 socket_lock_assert_owned(so); 2737 2738 if (outgoing) { 2739 cfi_buf = &so->so_cfil->cfi_snd; 2740 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT; 2741 } else { 2742 cfi_buf = &so->so_cfil->cfi_rcv; 2743 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN; 2744 } 2745 inject_q = &cfi_buf->cfi_inject_q; 2746 2747 while ((data = cfil_queue_first(inject_q)) != NULL) { 2748 datalen = cfil_data_length(data, &mbcnt); 2749 2750 CFIL_LOG(LOG_INFO, "data %llx datalen %u", 2751 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen); 2752 2753 /* Make a copy in case of injection error */ 2754 copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT, 2755 M_COPYM_COPY_HDR); 2756 if (copy == NULL) { 2757 CFIL_LOG(LOG_ERR, "m_copym_mode() failed"); 2758 error = ENOMEM; 2759 break; 2760 } 2761 2762 if ((copylen = m_length(copy)) != datalen) 2763 panic("%s so %p copylen %d != datalen %d", 2764 __func__, so, copylen, datalen); 2765 2766 if (outgoing) { 2767 socket_unlock(so, 0); 2768 2769 /* 2770 * Set both DONTWAIT and NBIO flags are we really 2771 * do not want to block 2772 */ 2773 error = sosend(so, NULL, NULL, 2774 copy, NULL, 2775 MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO); 2776 2777 socket_lock(so, 0); 2778 2779 if (error != 0) { 2780 CFIL_LOG(LOG_ERR, "sosend() failed %d", 2781 error); 2782 } 2783 } else { 2784 copy->m_flags |= M_SKIPCFIL; 2785 2786 /* 2787 * NOTE: 2788 * This work only because we support plain TCP 2789 * For UDP, RAWIP, MPTCP and message TCP we'll 2790 * need to call the appropriate sbappendxxx() 2791 * of fix sock_inject_data_in() 2792 */ 2793 if (sbappendstream(&so->so_rcv, copy)) 2794 need_rwakeup = 1; 2795 } 2796 2797 /* Need to reassess if filter is still attached after unlock */ 2798 if (so->so_cfil == NULL) { 2799 CFIL_LOG(LOG_ERR, "so %llx cfil detached", 2800 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2801 OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached); 2802 error = 0; 2803 break; 2804 } 2805 if (error != 0) 2806 break; 2807 2808 /* Injection successful */ 2809 cfil_queue_remove(inject_q, data, datalen); 2810 mbuf_freem(data); 2811 2812 cfi_buf->cfi_pending_first += datalen; 2813 cfi_buf->cfi_pending_mbcnt -= mbcnt; 2814 cfil_info_buf_verify(cfi_buf); 2815 2816 if (outgoing) 2817 OSAddAtomic64(datalen, 2818 &cfil_stats.cfs_inject_q_out_passed); 2819 else 2820 OSAddAtomic64(datalen, 2821 &cfil_stats.cfs_inject_q_in_passed); 2822 } 2823 2824 /* A single wakeup is for several packets is more efficient */ 2825 if (need_rwakeup) 2826 sorwakeup(so); 2827 2828 if (error != 0 && so->so_cfil) { 2829 if (error == ENOBUFS) 2830 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs); 2831 if (error == ENOMEM) 2832 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem); 2833 2834 if (outgoing) { 2835 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT; 2836 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail); 2837 } else { 2838 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN; 2839 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail); 2840 } 2841 } 2842 2843 /* 2844 * Notify 2845 */ 2846 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) { 2847 cfil_sock_notify_shutdown(so, SHUT_WR); 2848 if (cfil_sock_data_pending(&so->so_snd) == 0) 2849 soshutdownlock_final(so, SHUT_WR); 2850 } 2851 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { 2852 if (cfil_filters_attached(so) == 0) { 2853 CFIL_LOG(LOG_INFO, "so %llx waking", 2854 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2855 wakeup((caddr_t)&so->so_cfil); 2856 } 2857 } 2858 2859 CFIL_INFO_VERIFY(so->so_cfil); 2860 2861 return (error); 2862} 2863 2864static int 2865cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing) 2866{ 2867 uint64_t passlen, curlen; 2868 mbuf_t data; 2869 unsigned int datalen; 2870 errno_t error = 0; 2871 struct cfil_entry *entry; 2872 struct cfe_buf *entrybuf; 2873 struct cfil_queue *pending_q; 2874 2875 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d", 2876 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing); 2877 2878 socket_lock_assert_owned(so); 2879 2880 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2881 if (outgoing) 2882 entrybuf = &entry->cfe_snd; 2883 else 2884 entrybuf = &entry->cfe_rcv; 2885 2886 pending_q = &entrybuf->cfe_pending_q; 2887 2888 passlen = entrybuf->cfe_pass_offset - pending_q->q_start; 2889 2890 /* 2891 * Locate the chunks of data that we can pass to the next filter 2892 * A data chunk must be on mbuf boundaries 2893 */ 2894 curlen = 0; 2895 while ((data = cfil_queue_first(pending_q)) != NULL) { 2896 datalen = cfil_data_length(data, NULL); 2897 2898 CFIL_LOG(LOG_INFO, 2899 "data %llx datalen %u passlen %llu curlen %llu", 2900 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, 2901 passlen, curlen); 2902 2903 if (curlen + datalen > passlen) 2904 break; 2905 2906 cfil_queue_remove(pending_q, data, datalen); 2907 2908 curlen += datalen; 2909 2910 for (kcunit += 1; 2911 kcunit <= MAX_CONTENT_FILTER; 2912 kcunit++) { 2913 error = cfil_data_filter(so, kcunit, outgoing, 2914 data, datalen); 2915 /* 0 means passed so we can continue */ 2916 if (error != 0) 2917 break; 2918 } 2919 /* When data has passed all filters, re-inject */ 2920 if (error == 0) { 2921 if (outgoing) { 2922 cfil_queue_enqueue( 2923 &so->so_cfil->cfi_snd.cfi_inject_q, 2924 data, datalen); 2925 OSAddAtomic64(datalen, 2926 &cfil_stats.cfs_inject_q_out_enqueued); 2927 } else { 2928 cfil_queue_enqueue( 2929 &so->so_cfil->cfi_rcv.cfi_inject_q, 2930 data, datalen); 2931 OSAddAtomic64(datalen, 2932 &cfil_stats.cfs_inject_q_in_enqueued); 2933 } 2934 } 2935 } 2936 2937 CFIL_INFO_VERIFY(so->so_cfil); 2938 2939 return (error); 2940} 2941 2942int 2943cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing, 2944 uint64_t pass_offset, uint64_t peek_offset) 2945{ 2946 errno_t error = 0; 2947 struct cfil_entry *entry; 2948 struct cfe_buf *entrybuf; 2949 int updated = 0; 2950 2951 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset); 2952 2953 socket_lock_assert_owned(so); 2954 2955 if (so->so_cfil == NULL) { 2956 CFIL_LOG(LOG_ERR, "so %llx cfil detached", 2957 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2958 error = 0; 2959 goto done; 2960 } else if (so->so_cfil->cfi_flags & CFIF_DROP) { 2961 CFIL_LOG(LOG_ERR, "so %llx drop set", 2962 (uint64_t)VM_KERNEL_ADDRPERM(so)); 2963 error = EPIPE; 2964 goto done; 2965 } 2966 2967 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 2968 if (outgoing) 2969 entrybuf = &entry->cfe_snd; 2970 else 2971 entrybuf = &entry->cfe_rcv; 2972 2973 /* Record updated offsets for this content filter */ 2974 if (pass_offset > entrybuf->cfe_pass_offset) { 2975 entrybuf->cfe_pass_offset = pass_offset; 2976 2977 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) 2978 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; 2979 updated = 1; 2980 } else { 2981 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu", 2982 pass_offset, entrybuf->cfe_pass_offset); 2983 } 2984 /* Filter does not want or need to see data that's allowed to pass */ 2985 if (peek_offset > entrybuf->cfe_pass_offset && 2986 peek_offset > entrybuf->cfe_peek_offset) { 2987 entrybuf->cfe_peek_offset = peek_offset; 2988 updated = 1; 2989 } 2990 /* Nothing to do */ 2991 if (updated == 0) 2992 goto done; 2993 2994 /* Move data held in control queue to pending queue if needed */ 2995 error = cfil_data_service_ctl_q(so, kcunit, outgoing); 2996 if (error != 0) { 2997 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d", 2998 error); 2999 goto done; 3000 } 3001 error = EJUSTRETURN; 3002 3003done: 3004 /* 3005 * The filter is effectively detached when pass all from both sides 3006 * or when the socket is closed and no more data is waiting 3007 * to be delivered to the filter 3008 */ 3009 if (so->so_cfil != NULL && 3010 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET && 3011 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) || 3012 ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && 3013 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) && 3014 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) { 3015 entry->cfe_flags |= CFEF_CFIL_DETACHED; 3016 CFIL_LOG(LOG_INFO, "so %llx detached %u", 3017 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); 3018 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) && 3019 cfil_filters_attached(so) == 0) { 3020 CFIL_LOG(LOG_INFO, "so %llx waking", 3021 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3022 wakeup((caddr_t)&so->so_cfil); 3023 } 3024 } 3025 CFIL_INFO_VERIFY(so->so_cfil); 3026 CFIL_LOG(LOG_INFO, "return %d", error); 3027 return (error); 3028} 3029 3030/* 3031 * Update pass offset for socket when no data is pending 3032 */ 3033static int 3034cfil_set_socket_pass_offset(struct socket *so, int outgoing) 3035{ 3036 struct cfi_buf *cfi_buf; 3037 struct cfil_entry *entry; 3038 struct cfe_buf *entrybuf; 3039 uint32_t kcunit; 3040 uint64_t pass_offset = 0; 3041 3042 if (so->so_cfil == NULL) 3043 return (0); 3044 3045 CFIL_LOG(LOG_INFO, "so %llx outgoing %d", 3046 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); 3047 3048 socket_lock_assert_owned(so); 3049 3050 if (outgoing) 3051 cfi_buf = &so->so_cfil->cfi_snd; 3052 else 3053 cfi_buf = &so->so_cfil->cfi_rcv; 3054 3055 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) { 3056 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3057 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3058 3059 /* Are we attached to a filter? */ 3060 if (entry->cfe_filter == NULL) 3061 continue; 3062 3063 if (outgoing) 3064 entrybuf = &entry->cfe_snd; 3065 else 3066 entrybuf = &entry->cfe_rcv; 3067 3068 if (pass_offset == 0 || 3069 entrybuf->cfe_pass_offset < pass_offset) 3070 pass_offset = entrybuf->cfe_pass_offset; 3071 } 3072 cfi_buf->cfi_pass_offset = pass_offset; 3073 } 3074 3075 return (0); 3076} 3077 3078int 3079cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing, 3080 uint64_t pass_offset, uint64_t peek_offset) 3081{ 3082 errno_t error = 0; 3083 3084 CFIL_LOG(LOG_INFO, ""); 3085 3086 socket_lock_assert_owned(so); 3087 3088 error = cfil_acquire_sockbuf(so, outgoing); 3089 if (error != 0) { 3090 CFIL_LOG(LOG_INFO, "so %llx %s dropped", 3091 (uint64_t)VM_KERNEL_ADDRPERM(so), 3092 outgoing ? "out" : "in"); 3093 goto release; 3094 } 3095 3096 error = cfil_update_data_offsets(so, kcunit, outgoing, 3097 pass_offset, peek_offset); 3098 3099 cfil_service_inject_queue(so, outgoing); 3100 3101 cfil_set_socket_pass_offset(so, outgoing); 3102release: 3103 CFIL_INFO_VERIFY(so->so_cfil); 3104 cfil_release_sockbuf(so, outgoing); 3105 3106 return (error); 3107} 3108 3109 3110static void 3111cfil_flush_queues(struct socket *so) 3112{ 3113 struct cfil_entry *entry; 3114 int kcunit; 3115 uint64_t drained; 3116 3117 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3118 goto done; 3119 3120 socket_lock_assert_owned(so); 3121 3122 /* 3123 * Flush the output queues and ignore errors as long as 3124 * we are attached 3125 */ 3126 (void) cfil_acquire_sockbuf(so, 1); 3127 if (so->so_cfil != NULL) { 3128 drained = 0; 3129 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3130 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3131 3132 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q); 3133 drained += cfil_queue_drain( 3134 &entry->cfe_snd.cfe_pending_q); 3135 } 3136 drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q); 3137 if (drained) { 3138 if (so->so_cfil->cfi_flags & CFIF_DROP) 3139 OSIncrementAtomic( 3140 &cfil_stats.cfs_flush_out_drop); 3141 else 3142 OSIncrementAtomic( 3143 &cfil_stats.cfs_flush_out_close); 3144 } 3145 } 3146 cfil_release_sockbuf(so, 1); 3147 3148 /* 3149 * Flush the input queues 3150 */ 3151 (void) cfil_acquire_sockbuf(so, 0); 3152 if (so->so_cfil != NULL) { 3153 drained = 0; 3154 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3155 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3156 3157 drained += cfil_queue_drain( 3158 &entry->cfe_rcv.cfe_ctl_q); 3159 drained += cfil_queue_drain( 3160 &entry->cfe_rcv.cfe_pending_q); 3161 } 3162 drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q); 3163 if (drained) { 3164 if (so->so_cfil->cfi_flags & CFIF_DROP) 3165 OSIncrementAtomic( 3166 &cfil_stats.cfs_flush_in_drop); 3167 else 3168 OSIncrementAtomic( 3169 &cfil_stats.cfs_flush_in_close); 3170 } 3171 } 3172 cfil_release_sockbuf(so, 0); 3173done: 3174 CFIL_INFO_VERIFY(so->so_cfil); 3175} 3176 3177int 3178cfil_action_drop(struct socket *so, uint32_t kcunit) 3179{ 3180 errno_t error = 0; 3181 struct cfil_entry *entry; 3182 struct proc *p; 3183 3184 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3185 goto done; 3186 3187 socket_lock_assert_owned(so); 3188 3189 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3190 3191 /* Are we attached to the filter? */ 3192 if (entry->cfe_filter == NULL) 3193 goto done; 3194 3195 so->so_cfil->cfi_flags |= CFIF_DROP; 3196 3197 p = current_proc(); 3198 3199 /* Force the socket to be marked defunct */ 3200 error = sosetdefunct(p, so, 3201 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, 1); 3202 3203 /* Flush the socket buffer and disconnect */ 3204 if (error == 0) 3205 error = sodefunct(p, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL); 3206 3207 /* The filter is done, mark as detached */ 3208 entry->cfe_flags |= CFEF_CFIL_DETACHED; 3209 CFIL_LOG(LOG_INFO, "so %llx detached %u", 3210 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit); 3211 3212 /* Pending data needs to go */ 3213 cfil_flush_queues(so); 3214 3215 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) { 3216 if (cfil_filters_attached(so) == 0) { 3217 CFIL_LOG(LOG_INFO, "so %llx waking", 3218 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3219 wakeup((caddr_t)&so->so_cfil); 3220 } 3221 } 3222done: 3223 return (error); 3224} 3225 3226static int 3227cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen) 3228{ 3229 struct cfil_entry *entry; 3230 struct cfe_buf *entrybuf; 3231 uint32_t kcunit; 3232 3233 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u", 3234 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen); 3235 3236 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3237 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3238 3239 /* Are we attached to the filter? */ 3240 if (entry->cfe_filter == NULL) 3241 continue; 3242 3243 if (outgoing) 3244 entrybuf = &entry->cfe_snd; 3245 else 3246 entrybuf = &entry->cfe_rcv; 3247 3248 entrybuf->cfe_ctl_q.q_start += datalen; 3249 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start; 3250 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start; 3251 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) 3252 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset; 3253 3254 entrybuf->cfe_ctl_q.q_end += datalen; 3255 3256 entrybuf->cfe_pending_q.q_start += datalen; 3257 entrybuf->cfe_pending_q.q_end += datalen; 3258 } 3259 CFIL_INFO_VERIFY(so->so_cfil); 3260 return (0); 3261} 3262 3263int 3264cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to, 3265 struct mbuf *data, struct mbuf *control, uint32_t flags) 3266{ 3267#pragma unused(to, control, flags) 3268 errno_t error = 0; 3269 unsigned int datalen; 3270 int mbcnt; 3271 int kcunit; 3272 struct cfi_buf *cfi_buf; 3273 3274 if (so->so_cfil == NULL) { 3275 CFIL_LOG(LOG_ERR, "so %llx cfil detached", 3276 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3277 error = 0; 3278 goto done; 3279 } else if (so->so_cfil->cfi_flags & CFIF_DROP) { 3280 CFIL_LOG(LOG_ERR, "so %llx drop set", 3281 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3282 error = EPIPE; 3283 goto done; 3284 } 3285 3286 datalen = cfil_data_length(data, &mbcnt); 3287 3288 CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx", 3289 (uint64_t)VM_KERNEL_ADDRPERM(so), 3290 outgoing ? "out" : "in", 3291 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags, 3292 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt)); 3293 3294 if (outgoing) 3295 cfi_buf = &so->so_cfil->cfi_snd; 3296 else 3297 cfi_buf = &so->so_cfil->cfi_rcv; 3298 3299 cfi_buf->cfi_pending_last += datalen; 3300 cfi_buf->cfi_pending_mbcnt += mbcnt; 3301 cfil_info_buf_verify(cfi_buf); 3302 3303 CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu", 3304 (uint64_t)VM_KERNEL_ADDRPERM(so), 3305 cfi_buf->cfi_pending_last, 3306 cfi_buf->cfi_pass_offset); 3307 3308 /* Fast path when below pass offset */ 3309 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) { 3310 cfil_update_entry_offsets(so, outgoing, datalen); 3311 } else { 3312 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3313 error = cfil_data_filter(so, kcunit, outgoing, data, 3314 datalen); 3315 /* 0 means passed so continue with next filter */ 3316 if (error != 0) 3317 break; 3318 } 3319 } 3320 3321 /* Move cursor if no filter claimed the data */ 3322 if (error == 0) { 3323 cfi_buf->cfi_pending_first += datalen; 3324 cfi_buf->cfi_pending_mbcnt -= mbcnt; 3325 cfil_info_buf_verify(cfi_buf); 3326 } 3327done: 3328 CFIL_INFO_VERIFY(so->so_cfil); 3329 3330 return (error); 3331} 3332 3333/* 3334 * Callback from socket layer sosendxxx() 3335 */ 3336int 3337cfil_sock_data_out(struct socket *so, struct sockaddr *to, 3338 struct mbuf *data, struct mbuf *control, uint32_t flags) 3339{ 3340 int error = 0; 3341 3342 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3343 return (0); 3344 3345 socket_lock_assert_owned(so); 3346 3347 if (so->so_cfil->cfi_flags & CFIF_DROP) { 3348 CFIL_LOG(LOG_ERR, "so %llx drop set", 3349 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3350 return (EPIPE); 3351 } 3352 if (control != NULL) { 3353 CFIL_LOG(LOG_ERR, "so %llx control", 3354 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3355 OSIncrementAtomic(&cfil_stats.cfs_data_out_control); 3356 } 3357 if ((flags & MSG_OOB)) { 3358 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", 3359 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3360 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob); 3361 } 3362 if ((so->so_snd.sb_flags & SB_LOCK) == 0) 3363 panic("so %p SB_LOCK not set", so); 3364 3365 if (so->so_snd.sb_cfil_thread != NULL) 3366 panic("%s sb_cfil_thread %p not NULL", __func__, 3367 so->so_snd.sb_cfil_thread); 3368 3369 error = cfil_data_common(so, 1, to, data, control, flags); 3370 3371 return (error); 3372} 3373 3374/* 3375 * Callback from socket layer sbappendxxx() 3376 */ 3377int 3378cfil_sock_data_in(struct socket *so, struct sockaddr *from, 3379 struct mbuf *data, struct mbuf *control, uint32_t flags) 3380{ 3381 int error = 0; 3382 3383 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3384 return (0); 3385 3386 socket_lock_assert_owned(so); 3387 3388 if (so->so_cfil->cfi_flags & CFIF_DROP) { 3389 CFIL_LOG(LOG_ERR, "so %llx drop set", 3390 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3391 return (EPIPE); 3392 } 3393 if (control != NULL) { 3394 CFIL_LOG(LOG_ERR, "so %llx control", 3395 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3396 OSIncrementAtomic(&cfil_stats.cfs_data_in_control); 3397 } 3398 if (data->m_type == MT_OOBDATA) { 3399 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB", 3400 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3401 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob); 3402 } 3403 error = cfil_data_common(so, 0, from, data, control, flags); 3404 3405 return (error); 3406} 3407 3408/* 3409 * Callback from socket layer soshutdownxxx() 3410 * 3411 * We may delay the shutdown write if there's outgoing data in process. 3412 * 3413 * There is no point in delaying the shutdown read because the process 3414 * indicated that it does not want to read anymore data. 3415 */ 3416int 3417cfil_sock_shutdown(struct socket *so, int *how) 3418{ 3419 int error = 0; 3420 3421 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3422 goto done; 3423 3424 socket_lock_assert_owned(so); 3425 3426 CFIL_LOG(LOG_INFO, "so %llx how %d", 3427 (uint64_t)VM_KERNEL_ADDRPERM(so), *how); 3428 3429 /* 3430 * Check the state of the socket before the content filter 3431 */ 3432 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) { 3433 /* read already shut down */ 3434 error = ENOTCONN; 3435 goto done; 3436 } 3437 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) { 3438 /* write already shut down */ 3439 error = ENOTCONN; 3440 goto done; 3441 } 3442 3443 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) { 3444 CFIL_LOG(LOG_ERR, "so %llx drop set", 3445 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3446 goto done; 3447 } 3448 3449 /* 3450 * shutdown read: SHUT_RD or SHUT_RDWR 3451 */ 3452 if (*how != SHUT_WR) { 3453 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) { 3454 error = ENOTCONN; 3455 goto done; 3456 } 3457 so->so_cfil->cfi_flags |= CFIF_SHUT_RD; 3458 cfil_sock_notify_shutdown(so, SHUT_RD); 3459 } 3460 /* 3461 * shutdown write: SHUT_WR or SHUT_RDWR 3462 */ 3463 if (*how != SHUT_RD) { 3464 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) { 3465 error = ENOTCONN; 3466 goto done; 3467 } 3468 so->so_cfil->cfi_flags |= CFIF_SHUT_WR; 3469 cfil_sock_notify_shutdown(so, SHUT_WR); 3470 /* 3471 * When outgoing data is pending, we delay the shutdown at the 3472 * protocol level until the content filters give the final 3473 * verdict on the pending data. 3474 */ 3475 if (cfil_sock_data_pending(&so->so_snd) != 0) { 3476 /* 3477 * When shutting down the read and write sides at once 3478 * we can proceed to the final shutdown of the read 3479 * side. Otherwise, we just return. 3480 */ 3481 if (*how == SHUT_WR) { 3482 error = EJUSTRETURN; 3483 } else if (*how == SHUT_RDWR) { 3484 *how = SHUT_RD; 3485 } 3486 } 3487 } 3488done: 3489 return (error); 3490} 3491 3492/* 3493 * This is called when the socket is closed and there is no more 3494 * opportunity for filtering 3495 */ 3496void 3497cfil_sock_is_closed(struct socket *so) 3498{ 3499 errno_t error = 0; 3500 int kcunit; 3501 3502 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3503 return; 3504 3505 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); 3506 3507 socket_lock_assert_owned(so); 3508 3509 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3510 /* Let the filters know of the closing */ 3511 error = cfil_dispatch_closed_event(so, kcunit); 3512 } 3513 3514 /* Last chance to push passed data out */ 3515 error = cfil_acquire_sockbuf(so, 1); 3516 if (error == 0) 3517 cfil_service_inject_queue(so, 1); 3518 cfil_release_sockbuf(so, 1); 3519 3520 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED; 3521 3522 /* Pending data needs to go */ 3523 cfil_flush_queues(so); 3524 3525 CFIL_INFO_VERIFY(so->so_cfil); 3526} 3527 3528/* 3529 * This is called when the socket is disconnected so let the filters 3530 * know about the disconnection and that no more data will come 3531 * 3532 * The how parameter has the same values as soshutown() 3533 */ 3534void 3535cfil_sock_notify_shutdown(struct socket *so, int how) 3536{ 3537 errno_t error = 0; 3538 int kcunit; 3539 3540 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3541 return; 3542 3543 CFIL_LOG(LOG_INFO, "so %llx how %d", 3544 (uint64_t)VM_KERNEL_ADDRPERM(so), how); 3545 3546 socket_lock_assert_owned(so); 3547 3548 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3549 /* Disconnect incoming side */ 3550 if (how != SHUT_WR) 3551 error = cfil_dispatch_disconnect_event(so, kcunit, 0); 3552 /* Disconnect outgoing side */ 3553 if (how != SHUT_RD) 3554 error = cfil_dispatch_disconnect_event(so, kcunit, 1); 3555 } 3556} 3557 3558static int 3559cfil_filters_attached(struct socket *so) 3560{ 3561 struct cfil_entry *entry; 3562 uint32_t kcunit; 3563 int attached = 0; 3564 3565 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3566 return (0); 3567 3568 socket_lock_assert_owned(so); 3569 3570 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) { 3571 entry = &so->so_cfil->cfi_entries[kcunit - 1]; 3572 3573 /* Are we attached to the filter? */ 3574 if (entry->cfe_filter == NULL) 3575 continue; 3576 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) 3577 continue; 3578 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) 3579 continue; 3580 attached = 1; 3581 break; 3582 } 3583 3584 return (attached); 3585} 3586 3587/* 3588 * This is called when the socket is closed and we are waiting for 3589 * the filters to gives the final pass or drop 3590 */ 3591void 3592cfil_sock_close_wait(struct socket *so) 3593{ 3594 lck_mtx_t *mutex_held; 3595 struct timespec ts; 3596 int error; 3597 3598 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3599 return; 3600 3601 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so)); 3602 3603 if (so->so_proto->pr_getlock != NULL) 3604 mutex_held = (*so->so_proto->pr_getlock)(so, 0); 3605 else 3606 mutex_held = so->so_proto->pr_domain->dom_mtx; 3607 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 3608 3609 while (cfil_filters_attached(so)) { 3610 /* 3611 * Notify the filters we are going away so they can detach 3612 */ 3613 cfil_sock_notify_shutdown(so, SHUT_RDWR); 3614 3615 /* 3616 * Make sure we need to wait after the filter are notified 3617 * of the disconnection 3618 */ 3619 if (cfil_filters_attached(so) == 0) 3620 break; 3621 3622 CFIL_LOG(LOG_INFO, "so %llx waiting", 3623 (uint64_t)VM_KERNEL_ADDRPERM(so)); 3624 3625 ts.tv_sec = cfil_close_wait_timeout / 1000; 3626 ts.tv_nsec = (cfil_close_wait_timeout % 1000) * 3627 NSEC_PER_USEC * 1000; 3628 3629 OSIncrementAtomic(&cfil_stats.cfs_close_wait); 3630 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT; 3631 error = msleep((caddr_t)&so->so_cfil, mutex_held, 3632 PSOCK | PCATCH, "cfil_sock_close_wait", &ts); 3633 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT; 3634 3635 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d", 3636 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0)); 3637 3638 /* 3639 * Force close in case of timeout 3640 */ 3641 if (error != 0) { 3642 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout); 3643 break; 3644 } 3645 } 3646 3647} 3648 3649/* 3650 * Returns the size of the data held by the content filter by using 3651 */ 3652int32_t 3653cfil_sock_data_pending(struct sockbuf *sb) 3654{ 3655 struct socket *so = sb->sb_so; 3656 uint64_t pending = 0; 3657 3658 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) { 3659 struct cfi_buf *cfi_buf; 3660 3661 socket_lock_assert_owned(so); 3662 3663 if ((sb->sb_flags & SB_RECV) == 0) 3664 cfi_buf = &so->so_cfil->cfi_snd; 3665 else 3666 cfi_buf = &so->so_cfil->cfi_rcv; 3667 3668 pending = cfi_buf->cfi_pending_last - 3669 cfi_buf->cfi_pending_first; 3670 3671 /* 3672 * If we are limited by the "chars of mbufs used" roughly 3673 * adjust so we won't overcommit 3674 */ 3675 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) 3676 pending = cfi_buf->cfi_pending_mbcnt; 3677 } 3678 3679 VERIFY(pending < INT32_MAX); 3680 3681 return (int32_t)(pending); 3682} 3683 3684/* 3685 * Return the socket buffer space used by data being held by content filters 3686 * so processes won't clog the socket buffer 3687 */ 3688int32_t 3689cfil_sock_data_space(struct sockbuf *sb) 3690{ 3691 struct socket *so = sb->sb_so; 3692 uint64_t pending = 0; 3693 3694 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL && 3695 so->so_snd.sb_cfil_thread != current_thread()) { 3696 struct cfi_buf *cfi_buf; 3697 3698 socket_lock_assert_owned(so); 3699 3700 if ((sb->sb_flags & SB_RECV) == 0) 3701 cfi_buf = &so->so_cfil->cfi_snd; 3702 else 3703 cfi_buf = &so->so_cfil->cfi_rcv; 3704 3705 pending = cfi_buf->cfi_pending_last - 3706 cfi_buf->cfi_pending_first; 3707 3708 /* 3709 * If we are limited by the "chars of mbufs used" roughly 3710 * adjust so we won't overcommit 3711 */ 3712 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) 3713 pending = cfi_buf->cfi_pending_mbcnt; 3714 } 3715 3716 VERIFY(pending < INT32_MAX); 3717 3718 return (int32_t)(pending); 3719} 3720 3721/* 3722 * A callback from the socket and protocol layer when data becomes 3723 * available in the socket buffer to give a chance for the content filter 3724 * to re-inject data that was held back 3725 */ 3726void 3727cfil_sock_buf_update(struct sockbuf *sb) 3728{ 3729 int outgoing; 3730 int error; 3731 struct socket *so = sb->sb_so; 3732 3733 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) 3734 return; 3735 3736 if (!cfil_sbtrim) 3737 return; 3738 3739 socket_lock_assert_owned(so); 3740 3741 if ((sb->sb_flags & SB_RECV) == 0) { 3742 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) 3743 return; 3744 outgoing = 1; 3745 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry); 3746 } else { 3747 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) 3748 return; 3749 outgoing = 0; 3750 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry); 3751 } 3752 3753 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d", 3754 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing); 3755 3756 error = cfil_acquire_sockbuf(so, outgoing); 3757 if (error == 0) 3758 cfil_service_inject_queue(so, outgoing); 3759 cfil_release_sockbuf(so, outgoing); 3760} 3761 3762int 3763sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2, 3764 struct sysctl_req *req) 3765{ 3766#pragma unused(oidp, arg1, arg2) 3767 int error = 0; 3768 size_t len = 0; 3769 u_int32_t i; 3770 3771 /* Read only */ 3772 if (req->newptr != USER_ADDR_NULL) 3773 return (EPERM); 3774 3775 cfil_rw_lock_shared(&cfil_lck_rw); 3776 3777 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) { 3778 struct cfil_filter_stat filter_stat; 3779 struct content_filter *cfc = content_filters[i]; 3780 3781 if (cfc == NULL) 3782 continue; 3783 3784 /* If just asking for the size */ 3785 if (req->oldptr == USER_ADDR_NULL) { 3786 len += sizeof(struct cfil_filter_stat); 3787 continue; 3788 } 3789 3790 bzero(&filter_stat, sizeof(struct cfil_filter_stat)); 3791 filter_stat.cfs_len = sizeof(struct cfil_filter_stat); 3792 filter_stat.cfs_filter_id = cfc->cf_kcunit; 3793 filter_stat.cfs_flags = cfc->cf_flags; 3794 filter_stat.cfs_sock_count = cfc->cf_sock_count; 3795 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit; 3796 3797 error = SYSCTL_OUT(req, &filter_stat, 3798 sizeof (struct cfil_filter_stat)); 3799 if (error != 0) 3800 break; 3801 } 3802 /* If just asking for the size */ 3803 if (req->oldptr == USER_ADDR_NULL) 3804 req->oldidx = len; 3805 3806 cfil_rw_unlock_shared(&cfil_lck_rw); 3807 3808 return (error); 3809} 3810 3811static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2, 3812 struct sysctl_req *req) 3813{ 3814#pragma unused(oidp, arg1, arg2) 3815 int error = 0; 3816 u_int32_t i; 3817 struct cfil_info *cfi; 3818 3819 /* Read only */ 3820 if (req->newptr != USER_ADDR_NULL) 3821 return (EPERM); 3822 3823 cfil_rw_lock_shared(&cfil_lck_rw); 3824 3825 /* 3826 * If just asking for the size, 3827 */ 3828 if (req->oldptr == USER_ADDR_NULL) { 3829 req->oldidx = cfil_sock_attached_count * 3830 sizeof(struct cfil_sock_stat); 3831 /* Bump the length in case new sockets gets attached */ 3832 req->oldidx += req->oldidx >> 3; 3833 goto done; 3834 } 3835 3836 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) { 3837 struct cfil_entry *entry; 3838 struct cfil_sock_stat stat; 3839 struct socket *so = cfi->cfi_so; 3840 3841 bzero(&stat, sizeof(struct cfil_sock_stat)); 3842 stat.cfs_len = sizeof(struct cfil_sock_stat); 3843 stat.cfs_sock_id = cfi->cfi_sock_id; 3844 stat.cfs_flags = cfi->cfi_flags; 3845 3846 if (so != NULL) { 3847 stat.cfs_pid = so->last_pid; 3848 memcpy(stat.cfs_uuid, so->last_uuid, 3849 sizeof(uuid_t)); 3850 if (so->so_flags & SOF_DELEGATED) { 3851 stat.cfs_e_pid = so->e_pid; 3852 memcpy(stat.cfs_e_uuid, so->e_uuid, 3853 sizeof(uuid_t)); 3854 } else { 3855 stat.cfs_e_pid = so->last_pid; 3856 memcpy(stat.cfs_e_uuid, so->last_uuid, 3857 sizeof(uuid_t)); 3858 } 3859 } 3860 3861 stat.cfs_snd.cbs_pending_first = 3862 cfi->cfi_snd.cfi_pending_first; 3863 stat.cfs_snd.cbs_pending_last = 3864 cfi->cfi_snd.cfi_pending_last; 3865 stat.cfs_snd.cbs_inject_q_len = 3866 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q); 3867 stat.cfs_snd.cbs_pass_offset = 3868 cfi->cfi_snd.cfi_pass_offset; 3869 3870 stat.cfs_rcv.cbs_pending_first = 3871 cfi->cfi_rcv.cfi_pending_first; 3872 stat.cfs_rcv.cbs_pending_last = 3873 cfi->cfi_rcv.cfi_pending_last; 3874 stat.cfs_rcv.cbs_inject_q_len = 3875 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q); 3876 stat.cfs_rcv.cbs_pass_offset = 3877 cfi->cfi_rcv.cfi_pass_offset; 3878 3879 for (i = 0; i < MAX_CONTENT_FILTER; i++) { 3880 struct cfil_entry_stat *estat; 3881 struct cfe_buf *ebuf; 3882 struct cfe_buf_stat *sbuf; 3883 3884 entry = &cfi->cfi_entries[i]; 3885 3886 estat = &stat.ces_entries[i]; 3887 3888 estat->ces_len = sizeof(struct cfil_entry_stat); 3889 estat->ces_filter_id = entry->cfe_filter ? 3890 entry->cfe_filter->cf_kcunit : 0; 3891 estat->ces_flags = entry->cfe_flags; 3892 estat->ces_necp_control_unit = 3893 entry->cfe_necp_control_unit; 3894 3895 estat->ces_last_event.tv_sec = 3896 (int64_t)entry->cfe_last_event.tv_sec; 3897 estat->ces_last_event.tv_usec = 3898 (int64_t)entry->cfe_last_event.tv_usec; 3899 3900 estat->ces_last_action.tv_sec = 3901 (int64_t)entry->cfe_last_action.tv_sec; 3902 estat->ces_last_action.tv_usec = 3903 (int64_t)entry->cfe_last_action.tv_usec; 3904 3905 ebuf = &entry->cfe_snd; 3906 sbuf = &estat->ces_snd; 3907 sbuf->cbs_pending_first = 3908 cfil_queue_offset_first(&ebuf->cfe_pending_q); 3909 sbuf->cbs_pending_last = 3910 cfil_queue_offset_last(&ebuf->cfe_pending_q); 3911 sbuf->cbs_ctl_first = 3912 cfil_queue_offset_first(&ebuf->cfe_ctl_q); 3913 sbuf->cbs_ctl_last = 3914 cfil_queue_offset_last(&ebuf->cfe_ctl_q); 3915 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; 3916 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; 3917 sbuf->cbs_peeked = ebuf->cfe_peeked; 3918 3919 ebuf = &entry->cfe_rcv; 3920 sbuf = &estat->ces_rcv; 3921 sbuf->cbs_pending_first = 3922 cfil_queue_offset_first(&ebuf->cfe_pending_q); 3923 sbuf->cbs_pending_last = 3924 cfil_queue_offset_last(&ebuf->cfe_pending_q); 3925 sbuf->cbs_ctl_first = 3926 cfil_queue_offset_first(&ebuf->cfe_ctl_q); 3927 sbuf->cbs_ctl_last = 3928 cfil_queue_offset_last(&ebuf->cfe_ctl_q); 3929 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset; 3930 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset; 3931 sbuf->cbs_peeked = ebuf->cfe_peeked; 3932 } 3933 error = SYSCTL_OUT(req, &stat, 3934 sizeof (struct cfil_sock_stat)); 3935 if (error != 0) 3936 break; 3937 } 3938done: 3939 cfil_rw_unlock_shared(&cfil_lck_rw); 3940 3941 return (error); 3942} 3943