1/* 2 * Copyright (c) 1999-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 30 * support for mandatory and extensible security protections. This notice 31 * is included in support of clause 2.2 (b) of the Apple Public License, 32 * Version 2.0. 33 */ 34#include <stddef.h> 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/kernel.h> 39#include <sys/malloc.h> 40#include <sys/mbuf.h> 41#include <sys/socket.h> 42#include <sys/domain.h> 43#include <sys/user.h> 44#include <sys/random.h> 45#include <sys/socketvar.h> 46#include <net/if_dl.h> 47#include <net/if.h> 48#include <net/route.h> 49#include <net/if_var.h> 50#include <net/dlil.h> 51#include <net/if_arp.h> 52#include <net/iptap.h> 53#include <net/pktap.h> 54#include <sys/kern_event.h> 55#include <sys/kdebug.h> 56#include <sys/mcache.h> 57#include <sys/syslog.h> 58#include <sys/protosw.h> 59#include <sys/priv.h> 60 61#include <kern/assert.h> 62#include <kern/task.h> 63#include <kern/thread.h> 64#include <kern/sched_prim.h> 65#include <kern/locks.h> 66#include <kern/zalloc.h> 67 68#include <net/kpi_protocol.h> 69#include <net/if_types.h> 70#include <net/if_llreach.h> 71#include <net/kpi_interfacefilter.h> 72#include <net/classq/classq.h> 73#include <net/classq/classq_sfb.h> 74#include <net/flowhash.h> 75#include <net/ntstat.h> 76 77#if INET 78#include <netinet/in_var.h> 79#include <netinet/igmp_var.h> 80#include <netinet/ip_var.h> 81#include <netinet/tcp.h> 82#include <netinet/tcp_var.h> 83#include <netinet/udp.h> 84#include <netinet/udp_var.h> 85#include <netinet/if_ether.h> 86#include <netinet/in_pcb.h> 87#endif /* INET */ 88 89#if INET6 90#include <netinet6/in6_var.h> 91#include <netinet6/nd6.h> 92#include <netinet6/mld6_var.h> 93#include <netinet6/scope6_var.h> 94#endif /* INET6 */ 95 96#include <libkern/OSAtomic.h> 97#include <libkern/tree.h> 98 99#include <dev/random/randomdev.h> 100#include <machine/machine_routines.h> 101 102#include <mach/thread_act.h> 103#include <mach/sdt.h> 104 105#if CONFIG_MACF 106#include <sys/kauth.h> 107#include <security/mac_framework.h> 108#include <net/ethernet.h> 109#include <net/firewire.h> 110#endif 111 112#if PF 113#include <net/pfvar.h> 114#endif /* PF */ 115#if PF_ALTQ 116#include <net/altq/altq.h> 117#endif /* PF_ALTQ */ 118#include <net/pktsched/pktsched.h> 119 120#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) 121#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) 122#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) 123#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) 124#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) 125 126#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ 127#define MAX_LINKADDR 4 /* LONGWORDS */ 128#define M_NKE M_IFADDR 129 130#if 1 131#define DLIL_PRINTF printf 132#else 133#define DLIL_PRINTF kprintf 134#endif 135 136#define IF_DATA_REQUIRE_ALIGNED_64(f) \ 137 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t))) 138 139#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \ 140 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t))) 141 142enum { 143 kProtoKPI_v1 = 1, 144 kProtoKPI_v2 = 2 145}; 146 147/* 148 * List of if_proto structures in if_proto_hash[] is protected by 149 * the ifnet lock. The rest of the fields are initialized at protocol 150 * attach time and never change, thus no lock required as long as 151 * a reference to it is valid, via if_proto_ref(). 152 */ 153struct if_proto { 154 SLIST_ENTRY(if_proto) next_hash; 155 u_int32_t refcount; 156 u_int32_t detached; 157 struct ifnet *ifp; 158 protocol_family_t protocol_family; 159 int proto_kpi; 160 union { 161 struct { 162 proto_media_input input; 163 proto_media_preout pre_output; 164 proto_media_event event; 165 proto_media_ioctl ioctl; 166 proto_media_detached detached; 167 proto_media_resolve_multi resolve_multi; 168 proto_media_send_arp send_arp; 169 } v1; 170 struct { 171 proto_media_input_v2 input; 172 proto_media_preout pre_output; 173 proto_media_event event; 174 proto_media_ioctl ioctl; 175 proto_media_detached detached; 176 proto_media_resolve_multi resolve_multi; 177 proto_media_send_arp send_arp; 178 } v2; 179 } kpi; 180}; 181 182SLIST_HEAD(proto_hash_entry, if_proto); 183 184#define DLIL_SDLMAXLEN 64 185#define DLIL_SDLDATALEN \ 186 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0])) 187 188struct dlil_ifnet { 189 struct ifnet dl_if; /* public ifnet */ 190 /* 191 * DLIL private fields, protected by dl_if_lock 192 */ 193 decl_lck_mtx_data(, dl_if_lock); 194 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */ 195 u_int32_t dl_if_flags; /* flags (below) */ 196 u_int32_t dl_if_refcnt; /* refcnt */ 197 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */ 198 void *dl_if_uniqueid; /* unique interface id */ 199 size_t dl_if_uniqueid_len; /* length of the unique id */ 200 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ 201 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */ 202 struct { 203 struct ifaddr ifa; /* lladdr ifa */ 204 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */ 205 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */ 206 } dl_if_lladdr; 207 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */ 208 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */ 209 ctrace_t dl_if_attach; /* attach PC stacktrace */ 210 ctrace_t dl_if_detach; /* detach PC stacktrace */ 211}; 212 213/* Values for dl_if_flags (private to DLIL) */ 214#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */ 215#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */ 216#define DLIF_DEBUG 0x4 /* has debugging info */ 217 218#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */ 219 220/* For gdb */ 221__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE; 222 223struct dlil_ifnet_dbg { 224 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */ 225 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */ 226 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */ 227 /* 228 * Circular lists of ifnet_{reference,release} callers. 229 */ 230 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE]; 231 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE]; 232}; 233 234#define DLIL_TO_IFP(s) (&s->dl_if) 235#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s) 236 237struct ifnet_filter { 238 TAILQ_ENTRY(ifnet_filter) filt_next; 239 u_int32_t filt_skip; 240 u_int32_t filt_flags; 241 ifnet_t filt_ifp; 242 const char *filt_name; 243 void *filt_cookie; 244 protocol_family_t filt_protocol; 245 iff_input_func filt_input; 246 iff_output_func filt_output; 247 iff_event_func filt_event; 248 iff_ioctl_func filt_ioctl; 249 iff_detached_func filt_detached; 250}; 251 252struct proto_input_entry; 253 254static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head; 255static lck_grp_t *dlil_lock_group; 256lck_grp_t *ifnet_lock_group; 257static lck_grp_t *ifnet_head_lock_group; 258static lck_grp_t *ifnet_snd_lock_group; 259static lck_grp_t *ifnet_rcv_lock_group; 260lck_attr_t *ifnet_lock_attr; 261decl_lck_rw_data(static, ifnet_head_lock); 262decl_lck_mtx_data(static, dlil_ifnet_lock); 263u_int32_t dlil_filter_disable_tso_count = 0; 264 265#if DEBUG 266static unsigned int ifnet_debug = 1; /* debugging (enabled) */ 267#else 268static unsigned int ifnet_debug; /* debugging (disabled) */ 269#endif /* !DEBUG */ 270static unsigned int dlif_size; /* size of dlil_ifnet to allocate */ 271static unsigned int dlif_bufsize; /* size of dlif_size + headroom */ 272static struct zone *dlif_zone; /* zone for dlil_ifnet */ 273 274#define DLIF_ZONE_MAX 64 /* maximum elements in zone */ 275#define DLIF_ZONE_NAME "ifnet" /* zone name */ 276 277static unsigned int dlif_filt_size; /* size of ifnet_filter */ 278static struct zone *dlif_filt_zone; /* zone for ifnet_filter */ 279 280#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */ 281#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */ 282 283static unsigned int dlif_phash_size; /* size of ifnet proto hash table */ 284static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */ 285 286#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */ 287#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */ 288 289static unsigned int dlif_proto_size; /* size of if_proto */ 290static struct zone *dlif_proto_zone; /* zone for if_proto */ 291 292#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */ 293#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */ 294 295static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ 296static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ 297static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ 298 299#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */ 300#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ 301 302static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ 303static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ 304static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ 305 306#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */ 307#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ 308 309/* 310 * Updating this variable should be done by first acquiring the global 311 * radix node head (rnh_lock), in tandem with settting/clearing the 312 * PR_AGGDRAIN for routedomain. 313 */ 314u_int32_t ifnet_aggressive_drainers; 315static u_int32_t net_rtref; 316 317static struct dlil_main_threading_info dlil_main_input_thread_info; 318__private_extern__ struct dlil_threading_info *dlil_main_input_thread = 319 (struct dlil_threading_info *)&dlil_main_input_thread_info; 320 321static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg); 322static int dlil_detach_filter_internal(interface_filter_t filter, int detached); 323static void dlil_if_trace(struct dlil_ifnet *, int); 324static void if_proto_ref(struct if_proto *); 325static void if_proto_free(struct if_proto *); 326static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t); 327static int dlil_ifp_proto_count(struct ifnet *); 328static void if_flt_monitor_busy(struct ifnet *); 329static void if_flt_monitor_unbusy(struct ifnet *); 330static void if_flt_monitor_enter(struct ifnet *); 331static void if_flt_monitor_leave(struct ifnet *); 332static int dlil_interface_filters_input(struct ifnet *, struct mbuf **, 333 char **, protocol_family_t); 334static int dlil_interface_filters_output(struct ifnet *, struct mbuf **, 335 protocol_family_t); 336static struct ifaddr *dlil_alloc_lladdr(struct ifnet *, 337 const struct sockaddr_dl *); 338static int ifnet_lookup(struct ifnet *); 339static void if_purgeaddrs(struct ifnet *); 340 341static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t, 342 struct mbuf *, char *); 343static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t, 344 struct mbuf *); 345static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t, 346 mbuf_t *, const struct sockaddr *, void *, char *, char *); 347static void ifproto_media_event(struct ifnet *, protocol_family_t, 348 const struct kev_msg *); 349static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t, 350 unsigned long, void *); 351static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *, 352 struct sockaddr_dl *, size_t); 353static errno_t ifproto_media_send_arp(struct ifnet *, u_short, 354 const struct sockaddr_dl *, const struct sockaddr *, 355 const struct sockaddr_dl *, const struct sockaddr *); 356 357static errno_t ifp_if_output(struct ifnet *, struct mbuf *); 358static void ifp_if_start(struct ifnet *); 359static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t, 360 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *); 361static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *); 362static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *, 363 protocol_family_t *); 364static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t, 365 const struct ifnet_demux_desc *, u_int32_t); 366static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t); 367static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *); 368static errno_t ifp_if_framer(struct ifnet *, struct mbuf **, 369 const struct sockaddr *, const char *, const char *); 370static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **, 371 const struct sockaddr *, const char *, const char *, 372 u_int32_t *, u_int32_t *); 373static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func); 374static void ifp_if_free(struct ifnet *); 375static void ifp_if_event(struct ifnet *, const struct kev_msg *); 376static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *); 377static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *); 378 379static void dlil_main_input_thread_func(void *, wait_result_t); 380static void dlil_input_thread_func(void *, wait_result_t); 381static void dlil_rxpoll_input_thread_func(void *, wait_result_t); 382static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *); 383static void dlil_terminate_input_thread(struct dlil_threading_info *); 384static void dlil_input_stats_add(const struct ifnet_stat_increment_param *, 385 struct dlil_threading_info *, boolean_t); 386static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *); 387static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *, 388 u_int32_t, ifnet_model_t, boolean_t); 389static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *, 390 const struct ifnet_stat_increment_param *, boolean_t, boolean_t); 391 392#if DEBUG 393static void dlil_verify_sum16(void); 394#endif /* DEBUG */ 395static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t, 396 protocol_family_t); 397static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *, 398 protocol_family_t); 399 400static void ifnet_detacher_thread_func(void *, wait_result_t); 401static int ifnet_detacher_thread_cont(int); 402static void ifnet_detach_final(struct ifnet *); 403static void ifnet_detaching_enqueue(struct ifnet *); 404static struct ifnet *ifnet_detaching_dequeue(void); 405 406static void ifnet_start_thread_fn(void *, wait_result_t); 407static void ifnet_poll_thread_fn(void *, wait_result_t); 408static void ifnet_poll(struct ifnet *); 409 410static void ifp_src_route_copyout(struct ifnet *, struct route *); 411static void ifp_src_route_copyin(struct ifnet *, struct route *); 412#if INET6 413static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *); 414static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *); 415#endif /* INET6 */ 416 417static int sysctl_rxpoll SYSCTL_HANDLER_ARGS; 418static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS; 419static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS; 420static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS; 421static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS; 422static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS; 423static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS; 424static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; 425static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; 426static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; 427static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; 428 429/* The following are protected by dlil_ifnet_lock */ 430static TAILQ_HEAD(, ifnet) ifnet_detaching_head; 431static u_int32_t ifnet_detaching_cnt; 432static void *ifnet_delayed_run; /* wait channel for detaching thread */ 433 434decl_lck_mtx_data(static, ifnet_fc_lock); 435 436static uint32_t ifnet_flowhash_seed; 437 438struct ifnet_flowhash_key { 439 char ifk_name[IFNAMSIZ]; 440 uint32_t ifk_unit; 441 uint32_t ifk_flags; 442 uint32_t ifk_eflags; 443 uint32_t ifk_capabilities; 444 uint32_t ifk_capenable; 445 uint32_t ifk_output_sched_model; 446 uint32_t ifk_rand1; 447 uint32_t ifk_rand2; 448}; 449 450/* Flow control entry per interface */ 451struct ifnet_fc_entry { 452 RB_ENTRY(ifnet_fc_entry) ifce_entry; 453 u_int32_t ifce_flowhash; 454 struct ifnet *ifce_ifp; 455}; 456 457static uint32_t ifnet_calc_flowhash(struct ifnet *); 458static int ifce_cmp(const struct ifnet_fc_entry *, 459 const struct ifnet_fc_entry *); 460static int ifnet_fc_add(struct ifnet *); 461static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t); 462static void ifnet_fc_entry_free(struct ifnet_fc_entry *); 463 464/* protected by ifnet_fc_lock */ 465RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree; 466RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); 467RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); 468 469static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */ 470static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */ 471 472#define IFNET_FC_ZONE_NAME "ifnet_fc_zone" 473#define IFNET_FC_ZONE_MAX 32 474 475extern void bpfdetach(struct ifnet*); 476extern void proto_input_run(void); 477 478extern uint32_t udp_count_opportunistic(unsigned int ifindex, 479 u_int32_t flags); 480extern uint32_t tcp_count_opportunistic(unsigned int ifindex, 481 u_int32_t flags); 482 483__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); 484 485#if CONFIG_MACF 486int dlil_lladdr_ckreq = 0; 487#endif 488 489#if DEBUG 490int dlil_verbose = 1; 491#else 492int dlil_verbose = 0; 493#endif /* DEBUG */ 494#if IFNET_INPUT_SANITY_CHK 495/* sanity checking of input packet lists received */ 496static u_int32_t dlil_input_sanity_check = 0; 497#endif /* IFNET_INPUT_SANITY_CHK */ 498/* rate limit debug messages */ 499struct timespec dlil_dbgrate = { 1, 0 }; 500 501SYSCTL_DECL(_net_link_generic_system); 502 503#if CONFIG_MACF 504SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq, 505 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0, 506 "Require MACF system info check to expose link-layer address"); 507#endif 508 509SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, 510 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages"); 511 512#define IF_SNDQ_MINLEN 32 513u_int32_t if_sndq_maxlen = IFQ_MAXLEN; 514SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen, 515 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN, 516 sysctl_sndq_maxlen, "I", "Default transmit queue max length"); 517 518#define IF_RCVQ_MINLEN 32 519#define IF_RCVQ_MAXLEN 256 520u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN; 521SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, 522 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, 523 sysctl_rcvq_maxlen, "I", "Default receive queue max length"); 524 525#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ 526static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; 527SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay, 528 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY, 529 "ilog2 of EWMA decay rate of avg inbound packets"); 530 531#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */ 532#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ 533static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME; 534SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, 535 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, 536 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime, 537 "Q", "input poll mode freeze time"); 538 539#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ 540#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ 541static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME; 542SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, 543 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, 544 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime, 545 "Q", "input poll sampling time"); 546 547#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */ 548#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */ 549static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME; 550SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, 551 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, 552 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time, 553 "Q", "input poll interval (time)"); 554 555#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ 556static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; 557SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts, 558 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts, 559 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)"); 560 561#define IF_RXPOLL_WLOWAT 10 562static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT; 563SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, 564 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, 565 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat, 566 "I", "input poll wakeup low watermark"); 567 568#define IF_RXPOLL_WHIWAT 100 569static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT; 570SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, 571 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, 572 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat, 573 "I", "input poll wakeup high watermark"); 574 575static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ 576SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max, 577 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0, 578 "max packets per poll call"); 579 580static u_int32_t if_rxpoll = 1; 581SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll, 582 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0, 583 sysctl_rxpoll, "I", "enable opportunistic input polling"); 584 585u_int32_t if_bw_smoothing_val = 3; 586SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val, 587 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, ""); 588 589u_int32_t if_bw_measure_size = 10; 590SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size, 591 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, ""); 592 593static u_int32_t cur_dlil_input_threads = 0; 594SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads, 595 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0, 596 "Current number of DLIL input threads"); 597 598#if IFNET_INPUT_SANITY_CHK 599SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, 600 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0, 601 "Turn on sanity checking in DLIL input"); 602#endif /* IFNET_INPUT_SANITY_CHK */ 603 604static u_int32_t if_flowadv = 1; 605SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory, 606 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1, 607 "enable flow-advisory mechanism"); 608 609static u_int32_t if_delaybased_queue = 1; 610SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue, 611 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1, 612 "enable delay based dynamic queue sizing"); 613 614static uint64_t hwcksum_in_invalidated = 0; 615SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 616 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED, 617 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum"); 618 619uint32_t hwcksum_dbg = 0; 620SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg, 621 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0, 622 "enable hardware cksum debugging"); 623 624#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ 625#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ 626#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ 627#define HWCKSUM_DBG_MASK \ 628 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \ 629 HWCKSUM_DBG_FINALIZE_FORCED) 630 631static uint32_t hwcksum_dbg_mode = 0; 632SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode, 633 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode, 634 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode"); 635 636static uint64_t hwcksum_dbg_partial_forced = 0; 637SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 638 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED, 639 &hwcksum_dbg_partial_forced, "packets forced using partial cksum"); 640 641static uint64_t hwcksum_dbg_partial_forced_bytes = 0; 642SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 643 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, 644 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum"); 645 646static uint32_t hwcksum_dbg_partial_rxoff_forced = 0; 647SYSCTL_PROC(_net_link_generic_system, OID_AUTO, 648 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 649 &hwcksum_dbg_partial_rxoff_forced, 0, 650 sysctl_hwcksum_dbg_partial_rxoff_forced, "I", 651 "forced partial cksum rx offset"); 652 653static uint32_t hwcksum_dbg_partial_rxoff_adj = 0; 654SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj, 655 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj, 656 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I", 657 "adjusted partial cksum rx offset"); 658 659static uint64_t hwcksum_dbg_verified = 0; 660SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 661 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED, 662 &hwcksum_dbg_verified, "packets verified for having good checksum"); 663 664static uint64_t hwcksum_dbg_bad_cksum = 0; 665SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 666 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, 667 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum"); 668 669static uint64_t hwcksum_dbg_bad_rxoff = 0; 670SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 671 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED, 672 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff"); 673 674static uint64_t hwcksum_dbg_adjusted = 0; 675SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 676 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED, 677 &hwcksum_dbg_adjusted, "packets with rxoff adjusted"); 678 679static uint64_t hwcksum_dbg_finalized_hdr = 0; 680SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 681 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED, 682 &hwcksum_dbg_finalized_hdr, "finalized headers"); 683 684static uint64_t hwcksum_dbg_finalized_data = 0; 685SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, 686 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED, 687 &hwcksum_dbg_finalized_data, "finalized payloads"); 688 689uint32_t hwcksum_tx = 1; 690SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx, 691 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0, 692 "enable transmit hardware checksum offload"); 693 694uint32_t hwcksum_rx = 1; 695SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx, 696 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0, 697 "enable receive hardware checksum offload"); 698 699unsigned int net_rxpoll = 1; 700unsigned int net_affinity = 1; 701static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); 702 703extern u_int32_t inject_buckets; 704 705static lck_grp_attr_t *dlil_grp_attributes = NULL; 706static lck_attr_t *dlil_lck_attributes = NULL; 707 708 709#define DLIL_INPUT_CHECK(m, ifp) { \ 710 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ 711 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \ 712 !(mbuf_flags(m) & MBUF_PKTHDR)) { \ 713 panic_plain("%s: invalid mbuf %p\n", __func__, m); \ 714 /* NOTREACHED */ \ 715 } \ 716} 717 718#define DLIL_EWMA(old, new, decay) do { \ 719 u_int32_t _avg; \ 720 if ((_avg = (old)) > 0) \ 721 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \ 722 else \ 723 _avg = (new); \ 724 (old) = _avg; \ 725} while (0) 726 727#define MBPS (1ULL * 1000 * 1000) 728#define GBPS (MBPS * 1000) 729 730struct rxpoll_time_tbl { 731 u_int64_t speed; /* downlink speed */ 732 u_int32_t plowat; /* packets low watermark */ 733 u_int32_t phiwat; /* packets high watermark */ 734 u_int32_t blowat; /* bytes low watermark */ 735 u_int32_t bhiwat; /* bytes high watermark */ 736}; 737 738static struct rxpoll_time_tbl rxpoll_tbl[] = { 739 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) }, 740 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) }, 741 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 742 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 743 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 744 { 0, 0, 0, 0, 0 } 745}; 746 747int 748proto_hash_value(u_int32_t protocol_family) 749{ 750 /* 751 * dlil_proto_unplumb_all() depends on the mapping between 752 * the hash bucket index and the protocol family defined 753 * here; future changes must be applied there as well. 754 */ 755 switch(protocol_family) { 756 case PF_INET: 757 return (0); 758 case PF_INET6: 759 return (1); 760 case PF_VLAN: 761 return (2); 762 case PF_UNSPEC: 763 default: 764 return (3); 765 } 766} 767 768/* 769 * Caller must already be holding ifnet lock. 770 */ 771static struct if_proto * 772find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family) 773{ 774 struct if_proto *proto = NULL; 775 u_int32_t i = proto_hash_value(protocol_family); 776 777 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); 778 779 if (ifp->if_proto_hash != NULL) 780 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 781 782 while (proto != NULL && proto->protocol_family != protocol_family) 783 proto = SLIST_NEXT(proto, next_hash); 784 785 if (proto != NULL) 786 if_proto_ref(proto); 787 788 return (proto); 789} 790 791static void 792if_proto_ref(struct if_proto *proto) 793{ 794 atomic_add_32(&proto->refcount, 1); 795} 796 797extern void if_rtproto_del(struct ifnet *ifp, int protocol); 798 799static void 800if_proto_free(struct if_proto *proto) 801{ 802 u_int32_t oldval; 803 struct ifnet *ifp = proto->ifp; 804 u_int32_t proto_family = proto->protocol_family; 805 struct kev_dl_proto_data ev_pr_data; 806 807 oldval = atomic_add_32_ov(&proto->refcount, -1); 808 if (oldval > 1) 809 return; 810 811 /* No more reference on this, protocol must have been detached */ 812 VERIFY(proto->detached); 813 814 if (proto->proto_kpi == kProtoKPI_v1) { 815 if (proto->kpi.v1.detached) 816 proto->kpi.v1.detached(ifp, proto->protocol_family); 817 } 818 if (proto->proto_kpi == kProtoKPI_v2) { 819 if (proto->kpi.v2.detached) 820 proto->kpi.v2.detached(ifp, proto->protocol_family); 821 } 822 823 /* 824 * Cleanup routes that may still be in the routing table for that 825 * interface/protocol pair. 826 */ 827 if_rtproto_del(ifp, proto_family); 828 829 /* 830 * The reserved field carries the number of protocol still attached 831 * (subject to change) 832 */ 833 ifnet_lock_shared(ifp); 834 ev_pr_data.proto_family = proto_family; 835 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); 836 ifnet_lock_done(ifp); 837 838 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, 839 (struct net_event_data *)&ev_pr_data, 840 sizeof(struct kev_dl_proto_data)); 841 842 zfree(dlif_proto_zone, proto); 843} 844 845__private_extern__ void 846ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what) 847{ 848 unsigned int type = 0; 849 int ass = 1; 850 851 switch (what) { 852 case IFNET_LCK_ASSERT_EXCLUSIVE: 853 type = LCK_RW_ASSERT_EXCLUSIVE; 854 break; 855 856 case IFNET_LCK_ASSERT_SHARED: 857 type = LCK_RW_ASSERT_SHARED; 858 break; 859 860 case IFNET_LCK_ASSERT_OWNED: 861 type = LCK_RW_ASSERT_HELD; 862 break; 863 864 case IFNET_LCK_ASSERT_NOTOWNED: 865 /* nothing to do here for RW lock; bypass assert */ 866 ass = 0; 867 break; 868 869 default: 870 panic("bad ifnet assert type: %d", what); 871 /* NOTREACHED */ 872 } 873 if (ass) 874 lck_rw_assert(&ifp->if_lock, type); 875} 876 877__private_extern__ void 878ifnet_lock_shared(struct ifnet *ifp) 879{ 880 lck_rw_lock_shared(&ifp->if_lock); 881} 882 883__private_extern__ void 884ifnet_lock_exclusive(struct ifnet *ifp) 885{ 886 lck_rw_lock_exclusive(&ifp->if_lock); 887} 888 889__private_extern__ void 890ifnet_lock_done(struct ifnet *ifp) 891{ 892 lck_rw_done(&ifp->if_lock); 893} 894 895#if INET6 896__private_extern__ void 897if_inet6data_lock_shared(struct ifnet *ifp) 898{ 899 lck_rw_lock_shared(&ifp->if_inet6data_lock); 900} 901 902__private_extern__ void 903if_inet6data_lock_exclusive(struct ifnet *ifp) 904{ 905 lck_rw_lock_exclusive(&ifp->if_inet6data_lock); 906} 907 908__private_extern__ void 909if_inet6data_lock_done(struct ifnet *ifp) 910{ 911 lck_rw_done(&ifp->if_inet6data_lock); 912} 913#endif 914 915__private_extern__ void 916ifnet_head_lock_shared(void) 917{ 918 lck_rw_lock_shared(&ifnet_head_lock); 919} 920 921__private_extern__ void 922ifnet_head_lock_exclusive(void) 923{ 924 lck_rw_lock_exclusive(&ifnet_head_lock); 925} 926 927__private_extern__ void 928ifnet_head_done(void) 929{ 930 lck_rw_done(&ifnet_head_lock); 931} 932 933/* 934 * Caller must already be holding ifnet lock. 935 */ 936static int 937dlil_ifp_proto_count(struct ifnet * ifp) 938{ 939 int i, count = 0; 940 941 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); 942 943 if (ifp->if_proto_hash == NULL) 944 goto done; 945 946 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 947 struct if_proto *proto; 948 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { 949 count++; 950 } 951 } 952done: 953 return (count); 954} 955 956__private_extern__ void 957dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, 958 u_int32_t event_code, struct net_event_data *event_data, 959 u_int32_t event_data_len) 960{ 961 struct net_event_data ev_data; 962 struct kev_msg ev_msg; 963 964 bzero(&ev_msg, sizeof (ev_msg)); 965 bzero(&ev_data, sizeof (ev_data)); 966 /* 967 * a net event always starts with a net_event_data structure 968 * but the caller can generate a simple net event or 969 * provide a longer event structure to post 970 */ 971 ev_msg.vendor_code = KEV_VENDOR_APPLE; 972 ev_msg.kev_class = KEV_NETWORK_CLASS; 973 ev_msg.kev_subclass = event_subclass; 974 ev_msg.event_code = event_code; 975 976 if (event_data == NULL) { 977 event_data = &ev_data; 978 event_data_len = sizeof(struct net_event_data); 979 } 980 981 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); 982 event_data->if_family = ifp->if_family; 983 event_data->if_unit = (u_int32_t) ifp->if_unit; 984 985 ev_msg.dv[0].data_length = event_data_len; 986 ev_msg.dv[0].data_ptr = event_data; 987 ev_msg.dv[1].data_length = 0; 988 989 dlil_event_internal(ifp, &ev_msg); 990} 991 992__private_extern__ int 993dlil_alloc_local_stats(struct ifnet *ifp) 994{ 995 int ret = EINVAL; 996 void *buf, *base, **pbuf; 997 998 if (ifp == NULL) 999 goto end; 1000 1001 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) { 1002 /* allocate tcpstat_local structure */ 1003 buf = zalloc(dlif_tcpstat_zone); 1004 if (buf == NULL) { 1005 ret = ENOMEM; 1006 goto end; 1007 } 1008 bzero(buf, dlif_tcpstat_bufsize); 1009 1010 /* Get the 64-bit aligned base address for this object */ 1011 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 1012 sizeof (u_int64_t)); 1013 VERIFY(((intptr_t)base + dlif_tcpstat_size) <= 1014 ((intptr_t)buf + dlif_tcpstat_bufsize)); 1015 1016 /* 1017 * Wind back a pointer size from the aligned base and 1018 * save the original address so we can free it later. 1019 */ 1020 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1021 *pbuf = buf; 1022 ifp->if_tcp_stat = base; 1023 1024 /* allocate udpstat_local structure */ 1025 buf = zalloc(dlif_udpstat_zone); 1026 if (buf == NULL) { 1027 ret = ENOMEM; 1028 goto end; 1029 } 1030 bzero(buf, dlif_udpstat_bufsize); 1031 1032 /* Get the 64-bit aligned base address for this object */ 1033 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 1034 sizeof (u_int64_t)); 1035 VERIFY(((intptr_t)base + dlif_udpstat_size) <= 1036 ((intptr_t)buf + dlif_udpstat_bufsize)); 1037 1038 /* 1039 * Wind back a pointer size from the aligned base and 1040 * save the original address so we can free it later. 1041 */ 1042 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1043 *pbuf = buf; 1044 ifp->if_udp_stat = base; 1045 1046 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) && 1047 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t))); 1048 1049 ret = 0; 1050 } 1051 1052end: 1053 if (ret != 0) { 1054 if (ifp->if_tcp_stat != NULL) { 1055 pbuf = (void **) 1056 ((intptr_t)ifp->if_tcp_stat - sizeof (void *)); 1057 zfree(dlif_tcpstat_zone, *pbuf); 1058 ifp->if_tcp_stat = NULL; 1059 } 1060 if (ifp->if_udp_stat != NULL) { 1061 pbuf = (void **) 1062 ((intptr_t)ifp->if_udp_stat - sizeof (void *)); 1063 zfree(dlif_udpstat_zone, *pbuf); 1064 ifp->if_udp_stat = NULL; 1065 } 1066 } 1067 1068 return (ret); 1069} 1070 1071static int 1072dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) 1073{ 1074 thread_continue_t func; 1075 u_int32_t limit; 1076 int error; 1077 1078 /* NULL ifp indicates the main input thread, called at dlil_init time */ 1079 if (ifp == NULL) { 1080 func = dlil_main_input_thread_func; 1081 VERIFY(inp == dlil_main_input_thread); 1082 (void) strlcat(inp->input_name, 1083 "main_input", DLIL_THREADNAME_LEN); 1084 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { 1085 func = dlil_rxpoll_input_thread_func; 1086 VERIFY(inp != dlil_main_input_thread); 1087 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, 1088 "%s_input_poll", if_name(ifp)); 1089 } else { 1090 func = dlil_input_thread_func; 1091 VERIFY(inp != dlil_main_input_thread); 1092 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, 1093 "%s_input", if_name(ifp)); 1094 } 1095 VERIFY(inp->input_thr == THREAD_NULL); 1096 1097 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes); 1098 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes); 1099 1100 inp->mode = IFNET_MODEL_INPUT_POLL_OFF; 1101 inp->ifp = ifp; /* NULL for main input thread */ 1102 1103 net_timerclear(&inp->mode_holdtime); 1104 net_timerclear(&inp->mode_lasttime); 1105 net_timerclear(&inp->sample_holdtime); 1106 net_timerclear(&inp->sample_lasttime); 1107 net_timerclear(&inp->dbg_lasttime); 1108 1109 /* 1110 * For interfaces that support opportunistic polling, set the 1111 * low and high watermarks for outstanding inbound packets/bytes. 1112 * Also define freeze times for transitioning between modes 1113 * and updating the average. 1114 */ 1115 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { 1116 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN); 1117 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE); 1118 } else { 1119 limit = (u_int32_t)-1; 1120 } 1121 1122 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit); 1123 if (inp == dlil_main_input_thread) { 1124 struct dlil_main_threading_info *inpm = 1125 (struct dlil_main_threading_info *)inp; 1126 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit); 1127 } 1128 1129 error = kernel_thread_start(func, inp, &inp->input_thr); 1130 if (error == KERN_SUCCESS) { 1131 ml_thread_policy(inp->input_thr, MACHINE_GROUP, 1132 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); 1133 /* 1134 * We create an affinity set so that the matching workloop 1135 * thread or the starter thread (for loopback) can be 1136 * scheduled on the same processor set as the input thread. 1137 */ 1138 if (net_affinity) { 1139 struct thread *tp = inp->input_thr; 1140 u_int32_t tag; 1141 /* 1142 * Randomize to reduce the probability 1143 * of affinity tag namespace collision. 1144 */ 1145 read_random(&tag, sizeof (tag)); 1146 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) { 1147 thread_reference(tp); 1148 inp->tag = tag; 1149 inp->net_affinity = TRUE; 1150 } 1151 } 1152 } else if (inp == dlil_main_input_thread) { 1153 panic_plain("%s: couldn't create main input thread", __func__); 1154 /* NOTREACHED */ 1155 } else { 1156 panic_plain("%s: couldn't create %s input thread", __func__, 1157 if_name(ifp)); 1158 /* NOTREACHED */ 1159 } 1160 OSAddAtomic(1, &cur_dlil_input_threads); 1161 1162 return (error); 1163} 1164 1165static void 1166dlil_terminate_input_thread(struct dlil_threading_info *inp) 1167{ 1168 struct ifnet *ifp; 1169 1170 VERIFY(current_thread() == inp->input_thr); 1171 VERIFY(inp != dlil_main_input_thread); 1172 1173 OSAddAtomic(-1, &cur_dlil_input_threads); 1174 1175 lck_mtx_destroy(&inp->input_lck, inp->lck_grp); 1176 lck_grp_free(inp->lck_grp); 1177 1178 inp->input_waiting = 0; 1179 inp->wtot = 0; 1180 bzero(inp->input_name, sizeof (inp->input_name)); 1181 ifp = inp->ifp; 1182 inp->ifp = NULL; 1183 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts)); 1184 qlimit(&inp->rcvq_pkts) = 0; 1185 bzero(&inp->stats, sizeof (inp->stats)); 1186 1187 VERIFY(!inp->net_affinity); 1188 inp->input_thr = THREAD_NULL; 1189 VERIFY(inp->wloop_thr == THREAD_NULL); 1190 VERIFY(inp->poll_thr == THREAD_NULL); 1191 VERIFY(inp->tag == 0); 1192 1193 inp->mode = IFNET_MODEL_INPUT_POLL_OFF; 1194 bzero(&inp->tstats, sizeof (inp->tstats)); 1195 bzero(&inp->pstats, sizeof (inp->pstats)); 1196 bzero(&inp->sstats, sizeof (inp->sstats)); 1197 1198 net_timerclear(&inp->mode_holdtime); 1199 net_timerclear(&inp->mode_lasttime); 1200 net_timerclear(&inp->sample_holdtime); 1201 net_timerclear(&inp->sample_lasttime); 1202 net_timerclear(&inp->dbg_lasttime); 1203 1204#if IFNET_INPUT_SANITY_CHK 1205 inp->input_mbuf_cnt = 0; 1206#endif /* IFNET_INPUT_SANITY_CHK */ 1207 1208 if (dlil_verbose) { 1209 printf("%s: input thread terminated\n", 1210 if_name(ifp)); 1211 } 1212 1213 /* for the extra refcnt from kernel_thread_start() */ 1214 thread_deallocate(current_thread()); 1215 1216 /* this is the end */ 1217 thread_terminate(current_thread()); 1218 /* NOTREACHED */ 1219} 1220 1221static kern_return_t 1222dlil_affinity_set(struct thread *tp, u_int32_t tag) 1223{ 1224 thread_affinity_policy_data_t policy; 1225 1226 bzero(&policy, sizeof (policy)); 1227 policy.affinity_tag = tag; 1228 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY, 1229 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT)); 1230} 1231 1232void 1233dlil_init(void) 1234{ 1235 thread_t thread = THREAD_NULL; 1236 1237 /* 1238 * The following fields must be 64-bit aligned for atomic operations. 1239 */ 1240 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); 1241 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) 1242 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); 1243 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); 1244 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); 1245 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); 1246 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); 1247 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); 1248 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); 1249 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); 1250 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); 1251 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); 1252 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); 1253 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); 1254 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); 1255 1256 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); 1257 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) 1258 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); 1259 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); 1260 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); 1261 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); 1262 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); 1263 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); 1264 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); 1265 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); 1266 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); 1267 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); 1268 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); 1269 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); 1270 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); 1271 1272 /* 1273 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts. 1274 */ 1275 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP); 1276 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP); 1277 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP); 1278 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT); 1279 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT); 1280 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6); 1281 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6); 1282 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT); 1283 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL); 1284 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING); 1285 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU); 1286 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4); 1287 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6); 1288 1289 /* 1290 * ... as well as the mbuf checksum flags counterparts. 1291 */ 1292 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP); 1293 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP); 1294 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP); 1295 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS); 1296 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT); 1297 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6); 1298 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6); 1299 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6); 1300 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL); 1301 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING); 1302 1303 /* 1304 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info. 1305 */ 1306 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN); 1307 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN); 1308 1309 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL); 1310 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY); 1311 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER); 1312 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE); 1313 1314 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY); 1315 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY); 1316 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE); 1317 1318 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY); 1319 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK); 1320 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET); 1321 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP); 1322 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN); 1323 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN); 1324 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP); 1325 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC); 1326 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC); 1327 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP); 1328 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF); 1329 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH); 1330 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF); 1331 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE); 1332 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND); 1333 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR); 1334 1335 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY); 1336 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB); 1337 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH); 1338 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI); 1339 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); 1340 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED); 1341 1342 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); 1343 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); 1344 1345 PE_parse_boot_argn("net_affinity", &net_affinity, 1346 sizeof (net_affinity)); 1347 1348 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll)); 1349 1350 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref)); 1351 1352 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug)); 1353 1354 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) : 1355 sizeof (struct dlil_ifnet_dbg); 1356 /* Enforce 64-bit alignment for dlil_ifnet structure */ 1357 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t); 1358 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t)); 1359 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize, 1360 0, DLIF_ZONE_NAME); 1361 if (dlif_zone == NULL) { 1362 panic_plain("%s: failed allocating %s", __func__, 1363 DLIF_ZONE_NAME); 1364 /* NOTREACHED */ 1365 } 1366 zone_change(dlif_zone, Z_EXPAND, TRUE); 1367 zone_change(dlif_zone, Z_CALLERACCT, FALSE); 1368 1369 dlif_filt_size = sizeof (struct ifnet_filter); 1370 dlif_filt_zone = zinit(dlif_filt_size, 1371 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME); 1372 if (dlif_filt_zone == NULL) { 1373 panic_plain("%s: failed allocating %s", __func__, 1374 DLIF_FILT_ZONE_NAME); 1375 /* NOTREACHED */ 1376 } 1377 zone_change(dlif_filt_zone, Z_EXPAND, TRUE); 1378 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE); 1379 1380 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS; 1381 dlif_phash_zone = zinit(dlif_phash_size, 1382 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME); 1383 if (dlif_phash_zone == NULL) { 1384 panic_plain("%s: failed allocating %s", __func__, 1385 DLIF_PHASH_ZONE_NAME); 1386 /* NOTREACHED */ 1387 } 1388 zone_change(dlif_phash_zone, Z_EXPAND, TRUE); 1389 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE); 1390 1391 dlif_proto_size = sizeof (struct if_proto); 1392 dlif_proto_zone = zinit(dlif_proto_size, 1393 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME); 1394 if (dlif_proto_zone == NULL) { 1395 panic_plain("%s: failed allocating %s", __func__, 1396 DLIF_PROTO_ZONE_NAME); 1397 /* NOTREACHED */ 1398 } 1399 zone_change(dlif_proto_zone, Z_EXPAND, TRUE); 1400 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE); 1401 1402 dlif_tcpstat_size = sizeof (struct tcpstat_local); 1403 /* Enforce 64-bit alignment for tcpstat_local structure */ 1404 dlif_tcpstat_bufsize = 1405 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t); 1406 dlif_tcpstat_bufsize = 1407 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t)); 1408 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize, 1409 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0, 1410 DLIF_TCPSTAT_ZONE_NAME); 1411 if (dlif_tcpstat_zone == NULL) { 1412 panic_plain("%s: failed allocating %s", __func__, 1413 DLIF_TCPSTAT_ZONE_NAME); 1414 /* NOTREACHED */ 1415 } 1416 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE); 1417 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE); 1418 1419 dlif_udpstat_size = sizeof (struct udpstat_local); 1420 /* Enforce 64-bit alignment for udpstat_local structure */ 1421 dlif_udpstat_bufsize = 1422 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t); 1423 dlif_udpstat_bufsize = 1424 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t)); 1425 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize, 1426 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0, 1427 DLIF_UDPSTAT_ZONE_NAME); 1428 if (dlif_udpstat_zone == NULL) { 1429 panic_plain("%s: failed allocating %s", __func__, 1430 DLIF_UDPSTAT_ZONE_NAME); 1431 /* NOTREACHED */ 1432 } 1433 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE); 1434 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE); 1435 1436 ifnet_llreach_init(); 1437 1438 TAILQ_INIT(&dlil_ifnet_head); 1439 TAILQ_INIT(&ifnet_head); 1440 TAILQ_INIT(&ifnet_detaching_head); 1441 1442 /* Setup the lock groups we will use */ 1443 dlil_grp_attributes = lck_grp_attr_alloc_init(); 1444 1445 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks", 1446 dlil_grp_attributes); 1447 ifnet_lock_group = lck_grp_alloc_init("ifnet locks", 1448 dlil_grp_attributes); 1449 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock", 1450 dlil_grp_attributes); 1451 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks", 1452 dlil_grp_attributes); 1453 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks", 1454 dlil_grp_attributes); 1455 1456 /* Setup the lock attributes we will use */ 1457 dlil_lck_attributes = lck_attr_alloc_init(); 1458 1459 ifnet_lock_attr = lck_attr_alloc_init(); 1460 1461 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group, 1462 dlil_lck_attributes); 1463 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes); 1464 1465 /* Setup interface flow control related items */ 1466 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes); 1467 1468 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry); 1469 ifnet_fc_zone = zinit(ifnet_fc_zone_size, 1470 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME); 1471 if (ifnet_fc_zone == NULL) { 1472 panic_plain("%s: failed allocating %s", __func__, 1473 IFNET_FC_ZONE_NAME); 1474 /* NOTREACHED */ 1475 } 1476 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE); 1477 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE); 1478 1479 /* Initialize interface address subsystem */ 1480 ifa_init(); 1481 1482#if PF 1483 /* Initialize the packet filter */ 1484 pfinit(); 1485#endif /* PF */ 1486 1487 /* Initialize queue algorithms */ 1488 classq_init(); 1489 1490 /* Initialize packet schedulers */ 1491 pktsched_init(); 1492 1493 /* Initialize flow advisory subsystem */ 1494 flowadv_init(); 1495 1496 /* Initialize the pktap virtual interface */ 1497 pktap_init(); 1498 1499#if DEBUG 1500 /* Run self-tests */ 1501 dlil_verify_sum16(); 1502#endif /* DEBUG */ 1503 1504 /* 1505 * Create and start up the main DLIL input thread and the interface 1506 * detacher threads once everything is initialized. 1507 */ 1508 dlil_create_input_thread(NULL, dlil_main_input_thread); 1509 1510 if (kernel_thread_start(ifnet_detacher_thread_func, 1511 NULL, &thread) != KERN_SUCCESS) { 1512 panic_plain("%s: couldn't create detacher thread", __func__); 1513 /* NOTREACHED */ 1514 } 1515 thread_deallocate(thread); 1516} 1517 1518static void 1519if_flt_monitor_busy(struct ifnet *ifp) 1520{ 1521 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1522 1523 ++ifp->if_flt_busy; 1524 VERIFY(ifp->if_flt_busy != 0); 1525} 1526 1527static void 1528if_flt_monitor_unbusy(struct ifnet *ifp) 1529{ 1530 if_flt_monitor_leave(ifp); 1531} 1532 1533static void 1534if_flt_monitor_enter(struct ifnet *ifp) 1535{ 1536 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1537 1538 while (ifp->if_flt_busy) { 1539 ++ifp->if_flt_waiters; 1540 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock, 1541 (PZERO - 1), "if_flt_monitor", NULL); 1542 } 1543 if_flt_monitor_busy(ifp); 1544} 1545 1546static void 1547if_flt_monitor_leave(struct ifnet *ifp) 1548{ 1549 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1550 1551 VERIFY(ifp->if_flt_busy != 0); 1552 --ifp->if_flt_busy; 1553 1554 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) { 1555 ifp->if_flt_waiters = 0; 1556 wakeup(&ifp->if_flt_head); 1557 } 1558} 1559 1560__private_extern__ int 1561dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, 1562 interface_filter_t *filter_ref, u_int32_t flags) 1563{ 1564 int retval = 0; 1565 struct ifnet_filter *filter = NULL; 1566 1567 ifnet_head_lock_shared(); 1568 /* Check that the interface is in the global list */ 1569 if (!ifnet_lookup(ifp)) { 1570 retval = ENXIO; 1571 goto done; 1572 } 1573 1574 filter = zalloc(dlif_filt_zone); 1575 if (filter == NULL) { 1576 retval = ENOMEM; 1577 goto done; 1578 } 1579 bzero(filter, dlif_filt_size); 1580 1581 /* refcnt held above during lookup */ 1582 filter->filt_flags = flags; 1583 filter->filt_ifp = ifp; 1584 filter->filt_cookie = if_filter->iff_cookie; 1585 filter->filt_name = if_filter->iff_name; 1586 filter->filt_protocol = if_filter->iff_protocol; 1587 filter->filt_input = if_filter->iff_input; 1588 filter->filt_output = if_filter->iff_output; 1589 filter->filt_event = if_filter->iff_event; 1590 filter->filt_ioctl = if_filter->iff_ioctl; 1591 filter->filt_detached = if_filter->iff_detached; 1592 1593 lck_mtx_lock(&ifp->if_flt_lock); 1594 if_flt_monitor_enter(ifp); 1595 1596 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1597 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next); 1598 1599 if_flt_monitor_leave(ifp); 1600 lck_mtx_unlock(&ifp->if_flt_lock); 1601 1602 *filter_ref = filter; 1603 1604 /* 1605 * Bump filter count and route_generation ID to let TCP 1606 * know it shouldn't do TSO on this connection 1607 */ 1608 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { 1609 OSAddAtomic(1, &dlil_filter_disable_tso_count); 1610 routegenid_update(); 1611 } 1612 if (dlil_verbose) { 1613 printf("%s: %s filter attached\n", if_name(ifp), 1614 if_filter->iff_name); 1615 } 1616done: 1617 ifnet_head_done(); 1618 if (retval != 0 && ifp != NULL) { 1619 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n", 1620 if_name(ifp), if_filter->iff_name, retval); 1621 } 1622 if (retval != 0 && filter != NULL) 1623 zfree(dlif_filt_zone, filter); 1624 1625 return (retval); 1626} 1627 1628static int 1629dlil_detach_filter_internal(interface_filter_t filter, int detached) 1630{ 1631 int retval = 0; 1632 1633 if (detached == 0) { 1634 ifnet_t ifp = NULL; 1635 1636 ifnet_head_lock_shared(); 1637 TAILQ_FOREACH(ifp, &ifnet_head, if_link) { 1638 interface_filter_t entry = NULL; 1639 1640 lck_mtx_lock(&ifp->if_flt_lock); 1641 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) { 1642 if (entry != filter || entry->filt_skip) 1643 continue; 1644 /* 1645 * We've found a match; since it's possible 1646 * that the thread gets blocked in the monitor, 1647 * we do the lock dance. Interface should 1648 * not be detached since we still have a use 1649 * count held during filter attach. 1650 */ 1651 entry->filt_skip = 1; /* skip input/output */ 1652 lck_mtx_unlock(&ifp->if_flt_lock); 1653 ifnet_head_done(); 1654 1655 lck_mtx_lock(&ifp->if_flt_lock); 1656 if_flt_monitor_enter(ifp); 1657 lck_mtx_assert(&ifp->if_flt_lock, 1658 LCK_MTX_ASSERT_OWNED); 1659 1660 /* Remove the filter from the list */ 1661 TAILQ_REMOVE(&ifp->if_flt_head, filter, 1662 filt_next); 1663 1664 if_flt_monitor_leave(ifp); 1665 lck_mtx_unlock(&ifp->if_flt_lock); 1666 if (dlil_verbose) { 1667 printf("%s: %s filter detached\n", 1668 if_name(ifp), filter->filt_name); 1669 } 1670 goto destroy; 1671 } 1672 lck_mtx_unlock(&ifp->if_flt_lock); 1673 } 1674 ifnet_head_done(); 1675 1676 /* filter parameter is not a valid filter ref */ 1677 retval = EINVAL; 1678 goto done; 1679 } 1680 1681 if (dlil_verbose) 1682 printf("%s filter detached\n", filter->filt_name); 1683 1684destroy: 1685 1686 /* Call the detached function if there is one */ 1687 if (filter->filt_detached) 1688 filter->filt_detached(filter->filt_cookie, filter->filt_ifp); 1689 1690 /* Free the filter */ 1691 zfree(dlif_filt_zone, filter); 1692 1693 /* 1694 * Decrease filter count and route_generation ID to let TCP 1695 * know it should reevalute doing TSO or not 1696 */ 1697 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { 1698 OSAddAtomic(-1, &dlil_filter_disable_tso_count); 1699 routegenid_update(); 1700 } 1701done: 1702 if (retval != 0) { 1703 DLIL_PRINTF("failed to detach %s filter (err=%d)\n", 1704 filter->filt_name, retval); 1705 } 1706 return (retval); 1707} 1708 1709__private_extern__ void 1710dlil_detach_filter(interface_filter_t filter) 1711{ 1712 if (filter == NULL) 1713 return; 1714 dlil_detach_filter_internal(filter, 0); 1715} 1716 1717/* 1718 * Main input thread: 1719 * 1720 * a) handles all inbound packets for lo0 1721 * b) handles all inbound packets for interfaces with no dedicated 1722 * input thread (e.g. anything but Ethernet/PDP or those that support 1723 * opportunistic polling.) 1724 * c) protocol registrations 1725 * d) packet injections 1726 */ 1727static void 1728dlil_main_input_thread_func(void *v, wait_result_t w) 1729{ 1730#pragma unused(w) 1731 struct dlil_main_threading_info *inpm = v; 1732 struct dlil_threading_info *inp = v; 1733 1734 VERIFY(inp == dlil_main_input_thread); 1735 VERIFY(inp->ifp == NULL); 1736 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 1737 1738 while (1) { 1739 struct mbuf *m = NULL, *m_loop = NULL; 1740 u_int32_t m_cnt, m_cnt_loop; 1741 boolean_t proto_req; 1742 1743 lck_mtx_lock_spin(&inp->input_lck); 1744 1745 /* Wait until there is work to be done */ 1746 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { 1747 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1748 (void) msleep(&inp->input_waiting, &inp->input_lck, 1749 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1750 } 1751 1752 inp->input_waiting |= DLIL_INPUT_RUNNING; 1753 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1754 1755 /* Main input thread cannot be terminated */ 1756 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE)); 1757 1758 proto_req = (inp->input_waiting & 1759 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)); 1760 1761 /* Packets for non-dedicated interfaces other than lo0 */ 1762 m_cnt = qlen(&inp->rcvq_pkts); 1763 m = _getq_all(&inp->rcvq_pkts); 1764 1765 /* Packets exclusive to lo0 */ 1766 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); 1767 m_loop = _getq_all(&inpm->lo_rcvq_pkts); 1768 1769 inp->wtot = 0; 1770 1771 lck_mtx_unlock(&inp->input_lck); 1772 1773 /* 1774 * NOTE warning %%% attention !!!! 1775 * We should think about putting some thread starvation 1776 * safeguards if we deal with long chains of packets. 1777 */ 1778 if (m_loop != NULL) 1779 dlil_input_packet_list_extended(lo_ifp, m_loop, 1780 m_cnt_loop, inp->mode); 1781 1782 if (m != NULL) 1783 dlil_input_packet_list_extended(NULL, m, 1784 m_cnt, inp->mode); 1785 1786 if (proto_req) 1787 proto_input_run(); 1788 } 1789 1790 /* NOTREACHED */ 1791 VERIFY(0); /* we should never get here */ 1792} 1793 1794/* 1795 * Input thread for interfaces with legacy input model. 1796 */ 1797static void 1798dlil_input_thread_func(void *v, wait_result_t w) 1799{ 1800#pragma unused(w) 1801 struct dlil_threading_info *inp = v; 1802 struct ifnet *ifp = inp->ifp; 1803 1804 VERIFY(inp != dlil_main_input_thread); 1805 VERIFY(ifp != NULL); 1806 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll); 1807 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 1808 1809 while (1) { 1810 struct mbuf *m = NULL; 1811 u_int32_t m_cnt; 1812 1813 lck_mtx_lock_spin(&inp->input_lck); 1814 1815 /* Wait until there is work to be done */ 1816 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { 1817 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1818 (void) msleep(&inp->input_waiting, &inp->input_lck, 1819 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1820 } 1821 1822 inp->input_waiting |= DLIL_INPUT_RUNNING; 1823 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1824 1825 /* 1826 * Protocol registration and injection must always use 1827 * the main input thread; in theory the latter can utilize 1828 * the corresponding input thread where the packet arrived 1829 * on, but that requires our knowing the interface in advance 1830 * (and the benefits might not worth the trouble.) 1831 */ 1832 VERIFY(!(inp->input_waiting & 1833 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); 1834 1835 /* Packets for this interface */ 1836 m_cnt = qlen(&inp->rcvq_pkts); 1837 m = _getq_all(&inp->rcvq_pkts); 1838 1839 if (inp->input_waiting & DLIL_INPUT_TERMINATE) { 1840 lck_mtx_unlock(&inp->input_lck); 1841 1842 /* Free up pending packets */ 1843 if (m != NULL) 1844 mbuf_freem_list(m); 1845 1846 dlil_terminate_input_thread(inp); 1847 /* NOTREACHED */ 1848 return; 1849 } 1850 1851 inp->wtot = 0; 1852 1853 dlil_input_stats_sync(ifp, inp); 1854 1855 lck_mtx_unlock(&inp->input_lck); 1856 1857 /* 1858 * NOTE warning %%% attention !!!! 1859 * We should think about putting some thread starvation 1860 * safeguards if we deal with long chains of packets. 1861 */ 1862 if (m != NULL) 1863 dlil_input_packet_list_extended(NULL, m, 1864 m_cnt, inp->mode); 1865 } 1866 1867 /* NOTREACHED */ 1868 VERIFY(0); /* we should never get here */ 1869} 1870 1871/* 1872 * Input thread for interfaces with opportunistic polling input model. 1873 */ 1874static void 1875dlil_rxpoll_input_thread_func(void *v, wait_result_t w) 1876{ 1877#pragma unused(w) 1878 struct dlil_threading_info *inp = v; 1879 struct ifnet *ifp = inp->ifp; 1880 struct timespec ts; 1881 1882 VERIFY(inp != dlil_main_input_thread); 1883 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL)); 1884 1885 while (1) { 1886 struct mbuf *m = NULL; 1887 u_int32_t m_cnt, m_size, poll_req = 0; 1888 ifnet_model_t mode; 1889 struct timespec now, delta; 1890 u_int64_t ival; 1891 1892 lck_mtx_lock_spin(&inp->input_lck); 1893 1894 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) 1895 ival = IF_RXPOLL_INTERVALTIME_MIN; 1896 1897 /* Link parameters changed? */ 1898 if (ifp->if_poll_update != 0) { 1899 ifp->if_poll_update = 0; 1900 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE); 1901 } 1902 1903 /* Current operating mode */ 1904 mode = inp->mode; 1905 1906 /* Wait until there is work to be done */ 1907 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { 1908 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1909 (void) msleep(&inp->input_waiting, &inp->input_lck, 1910 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1911 } 1912 1913 inp->input_waiting |= DLIL_INPUT_RUNNING; 1914 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1915 1916 /* 1917 * Protocol registration and injection must always use 1918 * the main input thread; in theory the latter can utilize 1919 * the corresponding input thread where the packet arrived 1920 * on, but that requires our knowing the interface in advance 1921 * (and the benefits might not worth the trouble.) 1922 */ 1923 VERIFY(!(inp->input_waiting & 1924 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); 1925 1926 if (inp->input_waiting & DLIL_INPUT_TERMINATE) { 1927 /* Free up pending packets */ 1928 _flushq(&inp->rcvq_pkts); 1929 lck_mtx_unlock(&inp->input_lck); 1930 1931 dlil_terminate_input_thread(inp); 1932 /* NOTREACHED */ 1933 return; 1934 } 1935 1936 /* Total count of all packets */ 1937 m_cnt = qlen(&inp->rcvq_pkts); 1938 1939 /* Total bytes of all packets */ 1940 m_size = qsize(&inp->rcvq_pkts); 1941 1942 /* Packets for this interface */ 1943 m = _getq_all(&inp->rcvq_pkts); 1944 VERIFY(m != NULL || m_cnt == 0); 1945 1946 nanouptime(&now); 1947 if (!net_timerisset(&inp->sample_lasttime)) 1948 *(&inp->sample_lasttime) = *(&now); 1949 1950 net_timersub(&now, &inp->sample_lasttime, &delta); 1951 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) { 1952 u_int32_t ptot, btot; 1953 1954 /* Accumulate statistics for current sampling */ 1955 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size); 1956 1957 if (net_timercmp(&delta, &inp->sample_holdtime, <)) 1958 goto skip; 1959 1960 *(&inp->sample_lasttime) = *(&now); 1961 1962 /* Calculate min/max of inbound bytes */ 1963 btot = (u_int32_t)inp->sstats.bytes; 1964 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot) 1965 inp->rxpoll_bmin = btot; 1966 if (btot > inp->rxpoll_bmax) 1967 inp->rxpoll_bmax = btot; 1968 1969 /* Calculate EWMA of inbound bytes */ 1970 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay); 1971 1972 /* Calculate min/max of inbound packets */ 1973 ptot = (u_int32_t)inp->sstats.packets; 1974 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot) 1975 inp->rxpoll_pmin = ptot; 1976 if (ptot > inp->rxpoll_pmax) 1977 inp->rxpoll_pmax = ptot; 1978 1979 /* Calculate EWMA of inbound packets */ 1980 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay); 1981 1982 /* Reset sampling statistics */ 1983 PKTCNTR_CLEAR(&inp->sstats); 1984 1985 /* Calculate EWMA of wakeup requests */ 1986 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay); 1987 inp->wtot = 0; 1988 1989 if (dlil_verbose) { 1990 if (!net_timerisset(&inp->dbg_lasttime)) 1991 *(&inp->dbg_lasttime) = *(&now); 1992 net_timersub(&now, &inp->dbg_lasttime, &delta); 1993 if (net_timercmp(&delta, &dlil_dbgrate, >=)) { 1994 *(&inp->dbg_lasttime) = *(&now); 1995 printf("%s: [%s] pkts avg %d max %d " 1996 "limits [%d/%d], wreq avg %d " 1997 "limits [%d/%d], bytes avg %d " 1998 "limits [%d/%d]\n", if_name(ifp), 1999 (inp->mode == 2000 IFNET_MODEL_INPUT_POLL_ON) ? 2001 "ON" : "OFF", inp->rxpoll_pavg, 2002 inp->rxpoll_pmax, 2003 inp->rxpoll_plowat, 2004 inp->rxpoll_phiwat, 2005 inp->rxpoll_wavg, 2006 inp->rxpoll_wlowat, 2007 inp->rxpoll_whiwat, 2008 inp->rxpoll_bavg, 2009 inp->rxpoll_blowat, 2010 inp->rxpoll_bhiwat); 2011 } 2012 } 2013 2014 /* Perform mode transition, if necessary */ 2015 if (!net_timerisset(&inp->mode_lasttime)) 2016 *(&inp->mode_lasttime) = *(&now); 2017 2018 net_timersub(&now, &inp->mode_lasttime, &delta); 2019 if (net_timercmp(&delta, &inp->mode_holdtime, <)) 2020 goto skip; 2021 2022 if (inp->rxpoll_pavg <= inp->rxpoll_plowat && 2023 inp->rxpoll_bavg <= inp->rxpoll_blowat && 2024 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) { 2025 mode = IFNET_MODEL_INPUT_POLL_OFF; 2026 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat && 2027 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat || 2028 inp->rxpoll_wavg >= inp->rxpoll_whiwat) && 2029 inp->mode != IFNET_MODEL_INPUT_POLL_ON) { 2030 mode = IFNET_MODEL_INPUT_POLL_ON; 2031 } 2032 2033 if (mode != inp->mode) { 2034 inp->mode = mode; 2035 *(&inp->mode_lasttime) = *(&now); 2036 poll_req++; 2037 } 2038 } 2039skip: 2040 dlil_input_stats_sync(ifp, inp); 2041 2042 lck_mtx_unlock(&inp->input_lck); 2043 2044 /* 2045 * If there's a mode change and interface is still attached, 2046 * perform a downcall to the driver for the new mode. Also 2047 * hold an IO refcnt on the interface to prevent it from 2048 * being detached (will be release below.) 2049 */ 2050 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) { 2051 struct ifnet_model_params p = { mode, { 0 } }; 2052 errno_t err; 2053 2054 if (dlil_verbose) { 2055 printf("%s: polling is now %s, " 2056 "pkts avg %d max %d limits [%d/%d], " 2057 "wreq avg %d limits [%d/%d], " 2058 "bytes avg %d limits [%d/%d]\n", 2059 if_name(ifp), 2060 (mode == IFNET_MODEL_INPUT_POLL_ON) ? 2061 "ON" : "OFF", inp->rxpoll_pavg, 2062 inp->rxpoll_pmax, inp->rxpoll_plowat, 2063 inp->rxpoll_phiwat, inp->rxpoll_wavg, 2064 inp->rxpoll_wlowat, inp->rxpoll_whiwat, 2065 inp->rxpoll_bavg, inp->rxpoll_blowat, 2066 inp->rxpoll_bhiwat); 2067 } 2068 2069 if ((err = ((*ifp->if_input_ctl)(ifp, 2070 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) { 2071 printf("%s: error setting polling mode " 2072 "to %s (%d)\n", if_name(ifp), 2073 (mode == IFNET_MODEL_INPUT_POLL_ON) ? 2074 "ON" : "OFF", err); 2075 } 2076 2077 switch (mode) { 2078 case IFNET_MODEL_INPUT_POLL_OFF: 2079 ifnet_set_poll_cycle(ifp, NULL); 2080 inp->rxpoll_offreq++; 2081 if (err != 0) 2082 inp->rxpoll_offerr++; 2083 break; 2084 2085 case IFNET_MODEL_INPUT_POLL_ON: 2086 net_nsectimer(&ival, &ts); 2087 ifnet_set_poll_cycle(ifp, &ts); 2088 ifnet_poll(ifp); 2089 inp->rxpoll_onreq++; 2090 if (err != 0) 2091 inp->rxpoll_onerr++; 2092 break; 2093 2094 default: 2095 VERIFY(0); 2096 /* NOTREACHED */ 2097 } 2098 2099 /* Release the IO refcnt */ 2100 ifnet_decr_iorefcnt(ifp); 2101 } 2102 2103 /* 2104 * NOTE warning %%% attention !!!! 2105 * We should think about putting some thread starvation 2106 * safeguards if we deal with long chains of packets. 2107 */ 2108 if (m != NULL) 2109 dlil_input_packet_list_extended(NULL, m, m_cnt, mode); 2110 } 2111 2112 /* NOTREACHED */ 2113 VERIFY(0); /* we should never get here */ 2114} 2115 2116/* 2117 * Must be called on an attached ifnet (caller is expected to check.) 2118 * Caller may pass NULL for poll parameters to indicate "auto-tuning." 2119 */ 2120errno_t 2121dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p, 2122 boolean_t locked) 2123{ 2124 struct dlil_threading_info *inp; 2125 u_int64_t sample_holdtime, inbw; 2126 2127 VERIFY(ifp != NULL); 2128 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) 2129 return (ENXIO); 2130 2131 if (p != NULL) { 2132 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) || 2133 (p->packets_lowat != 0 && p->packets_hiwat == 0)) 2134 return (EINVAL); 2135 if (p->packets_lowat != 0 && /* hiwat must be non-zero */ 2136 p->packets_lowat >= p->packets_hiwat) 2137 return (EINVAL); 2138 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) || 2139 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) 2140 return (EINVAL); 2141 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */ 2142 p->bytes_lowat >= p->bytes_hiwat) 2143 return (EINVAL); 2144 if (p->interval_time != 0 && 2145 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) 2146 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN; 2147 } 2148 2149 if (!locked) 2150 lck_mtx_lock(&inp->input_lck); 2151 2152 lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED); 2153 2154 /* 2155 * Normally, we'd reset the parameters to the auto-tuned values 2156 * if the the input thread detects a change in link rate. If the 2157 * driver provides its own parameters right after a link rate 2158 * changes, but before the input thread gets to run, we want to 2159 * make sure to keep the driver's values. Clearing if_poll_update 2160 * will achieve that. 2161 */ 2162 if (p != NULL && !locked && ifp->if_poll_update != 0) 2163 ifp->if_poll_update = 0; 2164 2165 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) { 2166 sample_holdtime = 0; /* polling is disabled */ 2167 inp->rxpoll_wlowat = inp->rxpoll_plowat = 2168 inp->rxpoll_blowat = 0; 2169 inp->rxpoll_whiwat = inp->rxpoll_phiwat = 2170 inp->rxpoll_bhiwat = (u_int32_t)-1; 2171 inp->rxpoll_plim = 0; 2172 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN; 2173 } else { 2174 u_int32_t plowat, phiwat, blowat, bhiwat, plim; 2175 u_int64_t ival; 2176 unsigned int n, i; 2177 2178 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) { 2179 if (inbw < rxpoll_tbl[i].speed) 2180 break; 2181 n = i; 2182 } 2183 /* auto-tune if caller didn't specify a value */ 2184 plowat = ((p == NULL || p->packets_lowat == 0) ? 2185 rxpoll_tbl[n].plowat : p->packets_lowat); 2186 phiwat = ((p == NULL || p->packets_hiwat == 0) ? 2187 rxpoll_tbl[n].phiwat : p->packets_hiwat); 2188 blowat = ((p == NULL || p->bytes_lowat == 0) ? 2189 rxpoll_tbl[n].blowat : p->bytes_lowat); 2190 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ? 2191 rxpoll_tbl[n].bhiwat : p->bytes_hiwat); 2192 plim = ((p == NULL || p->packets_limit == 0) ? 2193 if_rxpoll_max : p->packets_limit); 2194 ival = ((p == NULL || p->interval_time == 0) ? 2195 if_rxpoll_interval_time : p->interval_time); 2196 2197 VERIFY(plowat != 0 && phiwat != 0); 2198 VERIFY(blowat != 0 && bhiwat != 0); 2199 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN); 2200 2201 sample_holdtime = if_rxpoll_sample_holdtime; 2202 inp->rxpoll_wlowat = if_rxpoll_wlowat; 2203 inp->rxpoll_whiwat = if_rxpoll_whiwat; 2204 inp->rxpoll_plowat = plowat; 2205 inp->rxpoll_phiwat = phiwat; 2206 inp->rxpoll_blowat = blowat; 2207 inp->rxpoll_bhiwat = bhiwat; 2208 inp->rxpoll_plim = plim; 2209 inp->rxpoll_ival = ival; 2210 } 2211 2212 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime); 2213 net_nsectimer(&sample_holdtime, &inp->sample_holdtime); 2214 2215 if (dlil_verbose) { 2216 printf("%s: speed %llu bps, sample per %llu nsec, " 2217 "poll interval %llu nsec, pkts per poll %u, " 2218 "pkt limits [%u/%u], wreq limits [%u/%u], " 2219 "bytes limits [%u/%u]\n", if_name(ifp), 2220 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim, 2221 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat, 2222 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat); 2223 } 2224 2225 if (!locked) 2226 lck_mtx_unlock(&inp->input_lck); 2227 2228 return (0); 2229} 2230 2231/* 2232 * Must be called on an attached ifnet (caller is expected to check.) 2233 */ 2234errno_t 2235dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p) 2236{ 2237 struct dlil_threading_info *inp; 2238 2239 VERIFY(ifp != NULL && p != NULL); 2240 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) 2241 return (ENXIO); 2242 2243 bzero(p, sizeof (*p)); 2244 2245 lck_mtx_lock(&inp->input_lck); 2246 p->packets_limit = inp->rxpoll_plim; 2247 p->packets_lowat = inp->rxpoll_plowat; 2248 p->packets_hiwat = inp->rxpoll_phiwat; 2249 p->bytes_lowat = inp->rxpoll_blowat; 2250 p->bytes_hiwat = inp->rxpoll_bhiwat; 2251 p->interval_time = inp->rxpoll_ival; 2252 lck_mtx_unlock(&inp->input_lck); 2253 2254 return (0); 2255} 2256 2257errno_t 2258ifnet_input(struct ifnet *ifp, struct mbuf *m_head, 2259 const struct ifnet_stat_increment_param *s) 2260{ 2261 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE)); 2262} 2263 2264errno_t 2265ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head, 2266 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s) 2267{ 2268 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE)); 2269} 2270 2271static errno_t 2272ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, 2273 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll) 2274{ 2275 struct thread *tp = current_thread(); 2276 struct mbuf *last; 2277 struct dlil_threading_info *inp; 2278 u_int32_t m_cnt = 0, m_size = 0; 2279 2280 if ((m_head == NULL && !poll) || (s == NULL && ext)) { 2281 if (m_head != NULL) 2282 mbuf_freem_list(m_head); 2283 return (EINVAL); 2284 } 2285 2286 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll)); 2287 VERIFY(m_tail == NULL || ext); 2288 VERIFY(s != NULL || !ext); 2289 2290 /* 2291 * Drop the packet(s) if the parameters are invalid, or if the 2292 * interface is no longer attached; else hold an IO refcnt to 2293 * prevent it from being detached (will be released below.) 2294 */ 2295 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) { 2296 if (m_head != NULL) 2297 mbuf_freem_list(m_head); 2298 return (EINVAL); 2299 } 2300 2301 if (m_tail == NULL) { 2302 last = m_head; 2303 while (m_head != NULL) { 2304#if IFNET_INPUT_SANITY_CHK 2305 if (dlil_input_sanity_check != 0) 2306 DLIL_INPUT_CHECK(last, ifp); 2307#endif /* IFNET_INPUT_SANITY_CHK */ 2308 m_cnt++; 2309 m_size += m_length(last); 2310 if (mbuf_nextpkt(last) == NULL) 2311 break; 2312 last = mbuf_nextpkt(last); 2313 } 2314 m_tail = last; 2315 } else { 2316#if IFNET_INPUT_SANITY_CHK 2317 if (dlil_input_sanity_check != 0) { 2318 last = m_head; 2319 while (1) { 2320 DLIL_INPUT_CHECK(last, ifp); 2321 m_cnt++; 2322 m_size += m_length(last); 2323 if (mbuf_nextpkt(last) == NULL) 2324 break; 2325 last = mbuf_nextpkt(last); 2326 } 2327 } else { 2328 m_cnt = s->packets_in; 2329 m_size = s->bytes_in; 2330 last = m_tail; 2331 } 2332#else 2333 m_cnt = s->packets_in; 2334 m_size = s->bytes_in; 2335 last = m_tail; 2336#endif /* IFNET_INPUT_SANITY_CHK */ 2337 } 2338 2339 if (last != m_tail) { 2340 panic_plain("%s: invalid input packet chain for %s, " 2341 "tail mbuf %p instead of %p\n", __func__, if_name(ifp), 2342 m_tail, last); 2343 } 2344 2345 /* 2346 * Assert packet count only for the extended variant, for backwards 2347 * compatibility, since this came directly from the device driver. 2348 * Relax this assertion for input bytes, as the driver may have 2349 * included the link-layer headers in the computation; hence 2350 * m_size is just an approximation. 2351 */ 2352 if (ext && s->packets_in != m_cnt) { 2353 panic_plain("%s: input packet count mismatch for %s, " 2354 "%d instead of %d\n", __func__, if_name(ifp), 2355 s->packets_in, m_cnt); 2356 } 2357 2358 if ((inp = ifp->if_inp) == NULL) 2359 inp = dlil_main_input_thread; 2360 2361 /* 2362 * If there is a matching DLIL input thread associated with an 2363 * affinity set, associate this thread with the same set. We 2364 * will only do this once. 2365 */ 2366 lck_mtx_lock_spin(&inp->input_lck); 2367 if (inp != dlil_main_input_thread && inp->net_affinity && 2368 ((!poll && inp->wloop_thr == THREAD_NULL) || 2369 (poll && inp->poll_thr == THREAD_NULL))) { 2370 u_int32_t tag = inp->tag; 2371 2372 if (poll) { 2373 VERIFY(inp->poll_thr == THREAD_NULL); 2374 inp->poll_thr = tp; 2375 } else { 2376 VERIFY(inp->wloop_thr == THREAD_NULL); 2377 inp->wloop_thr = tp; 2378 } 2379 lck_mtx_unlock(&inp->input_lck); 2380 2381 /* Associate the current thread with the new affinity tag */ 2382 (void) dlil_affinity_set(tp, tag); 2383 2384 /* 2385 * Take a reference on the current thread; during detach, 2386 * we will need to refer to it in order ot tear down its 2387 * affinity. 2388 */ 2389 thread_reference(tp); 2390 lck_mtx_lock_spin(&inp->input_lck); 2391 } 2392 2393 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0)); 2394 2395 /* 2396 * Because of loopbacked multicast we cannot stuff the ifp in 2397 * the rcvif of the packet header: loopback (lo0) packets use a 2398 * dedicated list so that we can later associate them with lo_ifp 2399 * on their way up the stack. Packets for other interfaces without 2400 * dedicated input threads go to the regular list. 2401 */ 2402 if (m_head != NULL) { 2403 if (inp == dlil_main_input_thread && ifp == lo_ifp) { 2404 struct dlil_main_threading_info *inpm = 2405 (struct dlil_main_threading_info *)inp; 2406 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, 2407 m_cnt, m_size); 2408 } else { 2409 _addq_multi(&inp->rcvq_pkts, m_head, m_tail, 2410 m_cnt, m_size); 2411 } 2412 } 2413 2414#if IFNET_INPUT_SANITY_CHK 2415 if (dlil_input_sanity_check != 0) { 2416 u_int32_t count; 2417 struct mbuf *m0; 2418 2419 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) 2420 count++; 2421 2422 if (count != m_cnt) { 2423 panic_plain("%s: invalid packet count %d " 2424 "(expected %d)\n", if_name(ifp), 2425 count, m_cnt); 2426 /* NOTREACHED */ 2427 } 2428 2429 inp->input_mbuf_cnt += m_cnt; 2430 } 2431#endif /* IFNET_INPUT_SANITY_CHK */ 2432 2433 if (s != NULL) { 2434 dlil_input_stats_add(s, inp, poll); 2435 /* 2436 * If we're using the main input thread, synchronize the 2437 * stats now since we have the interface context. All 2438 * other cases involving dedicated input threads will 2439 * have their stats synchronized there. 2440 */ 2441 if (inp == dlil_main_input_thread) 2442 dlil_input_stats_sync(ifp, inp); 2443 } 2444 2445 inp->input_waiting |= DLIL_INPUT_WAITING; 2446 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { 2447 inp->wtot++; 2448 wakeup_one((caddr_t)&inp->input_waiting); 2449 } 2450 lck_mtx_unlock(&inp->input_lck); 2451 2452 if (ifp != lo_ifp) { 2453 /* Release the IO refcnt */ 2454 ifnet_decr_iorefcnt(ifp); 2455 } 2456 2457 return (0); 2458} 2459 2460static void 2461ifnet_start_common(struct ifnet *ifp, int resetfc) 2462{ 2463 if (!(ifp->if_eflags & IFEF_TXSTART)) 2464 return; 2465 /* 2466 * If the starter thread is inactive, signal it to do work, 2467 * unless the interface is being flow controlled from below, 2468 * e.g. a virtual interface being flow controlled by a real 2469 * network interface beneath it. 2470 */ 2471 lck_mtx_lock_spin(&ifp->if_start_lock); 2472 if (resetfc) { 2473 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED; 2474 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) { 2475 lck_mtx_unlock(&ifp->if_start_lock); 2476 return; 2477 } 2478 ifp->if_start_req++; 2479 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) { 2480 wakeup_one((caddr_t)&ifp->if_start_thread); 2481 } 2482 lck_mtx_unlock(&ifp->if_start_lock); 2483} 2484 2485void 2486ifnet_start(struct ifnet *ifp) 2487{ 2488 ifnet_start_common(ifp, 0); 2489} 2490 2491static void 2492ifnet_start_thread_fn(void *v, wait_result_t w) 2493{ 2494#pragma unused(w) 2495 struct ifnet *ifp = v; 2496 char ifname[IFNAMSIZ + 1]; 2497 struct timespec *ts = NULL; 2498 struct ifclassq *ifq = &ifp->if_snd; 2499 2500 /* 2501 * Treat the dedicated starter thread for lo0 as equivalent to 2502 * the driver workloop thread; if net_affinity is enabled for 2503 * the main input thread, associate this starter thread to it 2504 * by binding them with the same affinity tag. This is done 2505 * only once (as we only have one lo_ifp which never goes away.) 2506 */ 2507 if (ifp == lo_ifp) { 2508 struct dlil_threading_info *inp = dlil_main_input_thread; 2509 struct thread *tp = current_thread(); 2510 2511 lck_mtx_lock(&inp->input_lck); 2512 if (inp->net_affinity) { 2513 u_int32_t tag = inp->tag; 2514 2515 VERIFY(inp->wloop_thr == THREAD_NULL); 2516 VERIFY(inp->poll_thr == THREAD_NULL); 2517 inp->wloop_thr = tp; 2518 lck_mtx_unlock(&inp->input_lck); 2519 2520 /* Associate this thread with the affinity tag */ 2521 (void) dlil_affinity_set(tp, tag); 2522 } else { 2523 lck_mtx_unlock(&inp->input_lck); 2524 } 2525 } 2526 2527 snprintf(ifname, sizeof (ifname), "%s_starter", 2528 if_name(ifp)); 2529 2530 lck_mtx_lock_spin(&ifp->if_start_lock); 2531 2532 for (;;) { 2533 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock, 2534 (PZERO - 1) | PSPIN, ifname, ts); 2535 2536 /* interface is detached? */ 2537 if (ifp->if_start_thread == THREAD_NULL) { 2538 ifnet_set_start_cycle(ifp, NULL); 2539 lck_mtx_unlock(&ifp->if_start_lock); 2540 ifnet_purge(ifp); 2541 2542 if (dlil_verbose) { 2543 printf("%s: starter thread terminated\n", 2544 if_name(ifp)); 2545 } 2546 2547 /* for the extra refcnt from kernel_thread_start() */ 2548 thread_deallocate(current_thread()); 2549 /* this is the end */ 2550 thread_terminate(current_thread()); 2551 /* NOTREACHED */ 2552 return; 2553 } 2554 2555 ifp->if_start_active = 1; 2556 for (;;) { 2557 u_int32_t req = ifp->if_start_req; 2558 2559 lck_mtx_unlock(&ifp->if_start_lock); 2560 /* invoke the driver's start routine */ 2561 ((*ifp->if_start)(ifp)); 2562 lck_mtx_lock_spin(&ifp->if_start_lock); 2563 2564 /* if there's no pending request, we're done */ 2565 if (req == ifp->if_start_req) 2566 break; 2567 } 2568 ifp->if_start_req = 0; 2569 ifp->if_start_active = 0; 2570 /* 2571 * Wakeup N ns from now if rate-controlled by TBR, and if 2572 * there are still packets in the send queue which haven't 2573 * been dequeued so far; else sleep indefinitely (ts = NULL) 2574 * until ifnet_start() is called again. 2575 */ 2576 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ? 2577 &ifp->if_start_cycle : NULL); 2578 2579 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) 2580 ts = NULL; 2581 } 2582 2583 /* NOTREACHED */ 2584} 2585 2586void 2587ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts) 2588{ 2589 if (ts == NULL) 2590 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle)); 2591 else 2592 *(&ifp->if_start_cycle) = *ts; 2593 2594 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) 2595 printf("%s: restart interval set to %lu nsec\n", 2596 if_name(ifp), ts->tv_nsec); 2597} 2598 2599static void 2600ifnet_poll(struct ifnet *ifp) 2601{ 2602 /* 2603 * If the poller thread is inactive, signal it to do work. 2604 */ 2605 lck_mtx_lock_spin(&ifp->if_poll_lock); 2606 ifp->if_poll_req++; 2607 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) { 2608 wakeup_one((caddr_t)&ifp->if_poll_thread); 2609 } 2610 lck_mtx_unlock(&ifp->if_poll_lock); 2611} 2612 2613static void 2614ifnet_poll_thread_fn(void *v, wait_result_t w) 2615{ 2616#pragma unused(w) 2617 struct dlil_threading_info *inp; 2618 struct ifnet *ifp = v; 2619 char ifname[IFNAMSIZ + 1]; 2620 struct timespec *ts = NULL; 2621 struct ifnet_stat_increment_param s; 2622 2623 snprintf(ifname, sizeof (ifname), "%s_poller", 2624 if_name(ifp)); 2625 bzero(&s, sizeof (s)); 2626 2627 lck_mtx_lock_spin(&ifp->if_poll_lock); 2628 2629 inp = ifp->if_inp; 2630 VERIFY(inp != NULL); 2631 2632 for (;;) { 2633 if (ifp->if_poll_thread != THREAD_NULL) { 2634 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock, 2635 (PZERO - 1) | PSPIN, ifname, ts); 2636 } 2637 2638 /* interface is detached (maybe while asleep)? */ 2639 if (ifp->if_poll_thread == THREAD_NULL) { 2640 ifnet_set_poll_cycle(ifp, NULL); 2641 lck_mtx_unlock(&ifp->if_poll_lock); 2642 2643 if (dlil_verbose) { 2644 printf("%s: poller thread terminated\n", 2645 if_name(ifp)); 2646 } 2647 2648 /* for the extra refcnt from kernel_thread_start() */ 2649 thread_deallocate(current_thread()); 2650 /* this is the end */ 2651 thread_terminate(current_thread()); 2652 /* NOTREACHED */ 2653 return; 2654 } 2655 2656 ifp->if_poll_active = 1; 2657 for (;;) { 2658 struct mbuf *m_head, *m_tail; 2659 u_int32_t m_lim, m_cnt, m_totlen; 2660 u_int16_t req = ifp->if_poll_req; 2661 2662 lck_mtx_unlock(&ifp->if_poll_lock); 2663 2664 /* 2665 * If no longer attached, there's nothing to do; 2666 * else hold an IO refcnt to prevent the interface 2667 * from being detached (will be released below.) 2668 */ 2669 if (!ifnet_is_attached(ifp, 1)) { 2670 lck_mtx_lock_spin(&ifp->if_poll_lock); 2671 break; 2672 } 2673 2674 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim : 2675 MAX((qlimit(&inp->rcvq_pkts)), 2676 (inp->rxpoll_phiwat << 2)); 2677 2678 if (dlil_verbose > 1) { 2679 printf("%s: polling up to %d pkts, " 2680 "pkts avg %d max %d, wreq avg %d, " 2681 "bytes avg %d\n", 2682 if_name(ifp), m_lim, 2683 inp->rxpoll_pavg, inp->rxpoll_pmax, 2684 inp->rxpoll_wavg, inp->rxpoll_bavg); 2685 } 2686 2687 /* invoke the driver's input poll routine */ 2688 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, 2689 &m_cnt, &m_totlen)); 2690 2691 if (m_head != NULL) { 2692 VERIFY(m_tail != NULL && m_cnt > 0); 2693 2694 if (dlil_verbose > 1) { 2695 printf("%s: polled %d pkts, " 2696 "pkts avg %d max %d, wreq avg %d, " 2697 "bytes avg %d\n", 2698 if_name(ifp), m_cnt, 2699 inp->rxpoll_pavg, inp->rxpoll_pmax, 2700 inp->rxpoll_wavg, inp->rxpoll_bavg); 2701 } 2702 2703 /* stats are required for extended variant */ 2704 s.packets_in = m_cnt; 2705 s.bytes_in = m_totlen; 2706 2707 (void) ifnet_input_common(ifp, m_head, m_tail, 2708 &s, TRUE, TRUE); 2709 } else { 2710 if (dlil_verbose > 1) { 2711 printf("%s: no packets, " 2712 "pkts avg %d max %d, wreq avg %d, " 2713 "bytes avg %d\n", 2714 if_name(ifp), inp->rxpoll_pavg, 2715 inp->rxpoll_pmax, inp->rxpoll_wavg, 2716 inp->rxpoll_bavg); 2717 } 2718 2719 (void) ifnet_input_common(ifp, NULL, NULL, 2720 NULL, FALSE, TRUE); 2721 } 2722 2723 /* Release the io ref count */ 2724 ifnet_decr_iorefcnt(ifp); 2725 2726 lck_mtx_lock_spin(&ifp->if_poll_lock); 2727 2728 /* if there's no pending request, we're done */ 2729 if (req == ifp->if_poll_req) 2730 break; 2731 } 2732 ifp->if_poll_req = 0; 2733 ifp->if_poll_active = 0; 2734 2735 /* 2736 * Wakeup N ns from now, else sleep indefinitely (ts = NULL) 2737 * until ifnet_poll() is called again. 2738 */ 2739 ts = &ifp->if_poll_cycle; 2740 if (ts->tv_sec == 0 && ts->tv_nsec == 0) 2741 ts = NULL; 2742 } 2743 2744 /* NOTREACHED */ 2745} 2746 2747void 2748ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts) 2749{ 2750 if (ts == NULL) 2751 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle)); 2752 else 2753 *(&ifp->if_poll_cycle) = *ts; 2754 2755 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) 2756 printf("%s: poll interval set to %lu nsec\n", 2757 if_name(ifp), ts->tv_nsec); 2758} 2759 2760void 2761ifnet_purge(struct ifnet *ifp) 2762{ 2763 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) 2764 if_qflush(ifp, 0); 2765} 2766 2767void 2768ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev) 2769{ 2770 IFCQ_LOCK_ASSERT_HELD(ifq); 2771 2772 if (!(IFCQ_IS_READY(ifq))) 2773 return; 2774 2775 if (IFCQ_TBR_IS_ENABLED(ifq)) { 2776 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw, 2777 ifq->ifcq_tbr.tbr_percent, 0 }; 2778 (void) ifclassq_tbr_set(ifq, &tb, FALSE); 2779 } 2780 2781 ifclassq_update(ifq, ev); 2782} 2783 2784void 2785ifnet_update_rcv(struct ifnet *ifp, cqev_t ev) 2786{ 2787 switch (ev) { 2788 case CLASSQ_EV_LINK_BANDWIDTH: 2789 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) 2790 ifp->if_poll_update++; 2791 break; 2792 2793 default: 2794 break; 2795 } 2796} 2797 2798errno_t 2799ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model) 2800{ 2801 struct ifclassq *ifq; 2802 u_int32_t omodel; 2803 errno_t err; 2804 2805 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED && 2806 model != IFNET_SCHED_MODEL_NORMAL)) 2807 return (EINVAL); 2808 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2809 return (ENXIO); 2810 2811 ifq = &ifp->if_snd; 2812 IFCQ_LOCK(ifq); 2813 omodel = ifp->if_output_sched_model; 2814 ifp->if_output_sched_model = model; 2815 if ((err = ifclassq_pktsched_setup(ifq)) != 0) 2816 ifp->if_output_sched_model = omodel; 2817 IFCQ_UNLOCK(ifq); 2818 2819 return (err); 2820} 2821 2822errno_t 2823ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) 2824{ 2825 if (ifp == NULL) 2826 return (EINVAL); 2827 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2828 return (ENXIO); 2829 2830 ifclassq_set_maxlen(&ifp->if_snd, maxqlen); 2831 2832 return (0); 2833} 2834 2835errno_t 2836ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) 2837{ 2838 if (ifp == NULL || maxqlen == NULL) 2839 return (EINVAL); 2840 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2841 return (ENXIO); 2842 2843 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd); 2844 2845 return (0); 2846} 2847 2848errno_t 2849ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts) 2850{ 2851 errno_t err; 2852 2853 if (ifp == NULL || pkts == NULL) 2854 err = EINVAL; 2855 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2856 err = ENXIO; 2857 else 2858 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC, 2859 pkts, NULL); 2860 2861 return (err); 2862} 2863 2864errno_t 2865ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc, 2866 u_int32_t *pkts, u_int32_t *bytes) 2867{ 2868 errno_t err; 2869 2870 if (ifp == NULL || !MBUF_VALID_SC(sc) || 2871 (pkts == NULL && bytes == NULL)) 2872 err = EINVAL; 2873 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2874 err = ENXIO; 2875 else 2876 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes); 2877 2878 return (err); 2879} 2880 2881errno_t 2882ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) 2883{ 2884 struct dlil_threading_info *inp; 2885 2886 if (ifp == NULL) 2887 return (EINVAL); 2888 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) 2889 return (ENXIO); 2890 2891 if (maxqlen == 0) 2892 maxqlen = if_rcvq_maxlen; 2893 else if (maxqlen < IF_RCVQ_MINLEN) 2894 maxqlen = IF_RCVQ_MINLEN; 2895 2896 inp = ifp->if_inp; 2897 lck_mtx_lock(&inp->input_lck); 2898 qlimit(&inp->rcvq_pkts) = maxqlen; 2899 lck_mtx_unlock(&inp->input_lck); 2900 2901 return (0); 2902} 2903 2904errno_t 2905ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) 2906{ 2907 struct dlil_threading_info *inp; 2908 2909 if (ifp == NULL || maxqlen == NULL) 2910 return (EINVAL); 2911 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) 2912 return (ENXIO); 2913 2914 inp = ifp->if_inp; 2915 lck_mtx_lock(&inp->input_lck); 2916 *maxqlen = qlimit(&inp->rcvq_pkts); 2917 lck_mtx_unlock(&inp->input_lck); 2918 return (0); 2919} 2920 2921errno_t 2922ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) 2923{ 2924 int error; 2925 2926 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) || 2927 m->m_nextpkt != NULL) { 2928 if (m != NULL) 2929 m_freem_list(m); 2930 return (EINVAL); 2931 } else if (!(ifp->if_eflags & IFEF_TXSTART) || 2932 !(ifp->if_refflags & IFRF_ATTACHED)) { 2933 /* flag tested without lock for performance */ 2934 m_freem(m); 2935 return (ENXIO); 2936 } else if (!(ifp->if_flags & IFF_UP)) { 2937 m_freem(m); 2938 return (ENETDOWN); 2939 } 2940 2941 /* enqueue the packet */ 2942 error = ifclassq_enqueue(&ifp->if_snd, m); 2943 2944 /* 2945 * Tell the driver to start dequeueing; do this even when the queue 2946 * for the packet is suspended (EQSUSPENDED), as the driver could still 2947 * be dequeueing from other unsuspended queues. 2948 */ 2949 if (error == 0 || error == EQFULL || error == EQSUSPENDED) 2950 ifnet_start(ifp); 2951 2952 return (error); 2953} 2954 2955errno_t 2956ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp) 2957{ 2958 errno_t rc; 2959 if (ifp == NULL || mp == NULL) 2960 return (EINVAL); 2961 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2962 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) 2963 return (ENXIO); 2964 if (!ifnet_is_attached(ifp, 1)) 2965 return (ENXIO); 2966 rc = ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL); 2967 ifnet_decr_iorefcnt(ifp); 2968 2969 return (rc); 2970} 2971 2972errno_t 2973ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc, 2974 struct mbuf **mp) 2975{ 2976 errno_t rc; 2977 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) 2978 return (EINVAL); 2979 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2980 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) 2981 return (ENXIO); 2982 if (!ifnet_is_attached(ifp, 1)) 2983 return (ENXIO); 2984 2985 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL); 2986 ifnet_decr_iorefcnt(ifp); 2987 return (rc); 2988} 2989 2990errno_t 2991ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head, 2992 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) 2993{ 2994 errno_t rc; 2995 if (ifp == NULL || head == NULL || limit < 1) 2996 return (EINVAL); 2997 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2998 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) 2999 return (ENXIO); 3000 if (!ifnet_is_attached(ifp, 1)) 3001 return (ENXIO); 3002 3003 rc = ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len); 3004 ifnet_decr_iorefcnt(ifp); 3005 return (rc); 3006} 3007 3008errno_t 3009ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, 3010 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, 3011 u_int32_t *len) 3012{ 3013 errno_t rc; 3014 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc)) 3015 return (EINVAL); 3016 else if (!(ifp->if_eflags & IFEF_TXSTART) || 3017 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) 3018 return (ENXIO); 3019 if (!ifnet_is_attached(ifp, 1)) 3020 return (ENXIO); 3021 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head, 3022 tail, cnt, len); 3023 ifnet_decr_iorefcnt(ifp); 3024 return (rc); 3025} 3026 3027errno_t 3028ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m, 3029 const struct sockaddr *dest, const char *dest_linkaddr, 3030 const char *frame_type, u_int32_t *pre, u_int32_t *post) 3031{ 3032 if (pre != NULL) 3033 *pre = 0; 3034 if (post != NULL) 3035 *post = 0; 3036 3037 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type)); 3038} 3039 3040static int 3041dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, 3042 char **frame_header_p, protocol_family_t protocol_family) 3043{ 3044 struct ifnet_filter *filter; 3045 3046 /* 3047 * Pass the inbound packet to the interface filters 3048 */ 3049 lck_mtx_lock_spin(&ifp->if_flt_lock); 3050 /* prevent filter list from changing in case we drop the lock */ 3051 if_flt_monitor_busy(ifp); 3052 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 3053 int result; 3054 3055 if (!filter->filt_skip && filter->filt_input != NULL && 3056 (filter->filt_protocol == 0 || 3057 filter->filt_protocol == protocol_family)) { 3058 lck_mtx_unlock(&ifp->if_flt_lock); 3059 3060 result = (*filter->filt_input)(filter->filt_cookie, 3061 ifp, protocol_family, m_p, frame_header_p); 3062 3063 lck_mtx_lock_spin(&ifp->if_flt_lock); 3064 if (result != 0) { 3065 /* we're done with the filter list */ 3066 if_flt_monitor_unbusy(ifp); 3067 lck_mtx_unlock(&ifp->if_flt_lock); 3068 return (result); 3069 } 3070 } 3071 } 3072 /* we're done with the filter list */ 3073 if_flt_monitor_unbusy(ifp); 3074 lck_mtx_unlock(&ifp->if_flt_lock); 3075 3076 /* 3077 * Strip away M_PROTO1 bit prior to sending packet up the stack as 3078 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1 3079 */ 3080 if (*m_p != NULL) 3081 (*m_p)->m_flags &= ~M_PROTO1; 3082 3083 return (0); 3084} 3085 3086static int 3087dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, 3088 protocol_family_t protocol_family) 3089{ 3090 struct ifnet_filter *filter; 3091 3092 /* 3093 * Pass the outbound packet to the interface filters 3094 */ 3095 lck_mtx_lock_spin(&ifp->if_flt_lock); 3096 /* prevent filter list from changing in case we drop the lock */ 3097 if_flt_monitor_busy(ifp); 3098 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 3099 int result; 3100 3101 if (!filter->filt_skip && filter->filt_output != NULL && 3102 (filter->filt_protocol == 0 || 3103 filter->filt_protocol == protocol_family)) { 3104 lck_mtx_unlock(&ifp->if_flt_lock); 3105 3106 result = filter->filt_output(filter->filt_cookie, ifp, 3107 protocol_family, m_p); 3108 3109 lck_mtx_lock_spin(&ifp->if_flt_lock); 3110 if (result != 0) { 3111 /* we're done with the filter list */ 3112 if_flt_monitor_unbusy(ifp); 3113 lck_mtx_unlock(&ifp->if_flt_lock); 3114 return (result); 3115 } 3116 } 3117 } 3118 /* we're done with the filter list */ 3119 if_flt_monitor_unbusy(ifp); 3120 lck_mtx_unlock(&ifp->if_flt_lock); 3121 3122 return (0); 3123} 3124 3125static void 3126dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) 3127{ 3128 int error; 3129 3130 if (ifproto->proto_kpi == kProtoKPI_v1) { 3131 /* Version 1 protocols get one packet at a time */ 3132 while (m != NULL) { 3133 char * frame_header; 3134 mbuf_t next_packet; 3135 3136 next_packet = m->m_nextpkt; 3137 m->m_nextpkt = NULL; 3138 frame_header = m->m_pkthdr.pkt_hdr; 3139 m->m_pkthdr.pkt_hdr = NULL; 3140 error = (*ifproto->kpi.v1.input)(ifproto->ifp, 3141 ifproto->protocol_family, m, frame_header); 3142 if (error != 0 && error != EJUSTRETURN) 3143 m_freem(m); 3144 m = next_packet; 3145 } 3146 } else if (ifproto->proto_kpi == kProtoKPI_v2) { 3147 /* Version 2 protocols support packet lists */ 3148 error = (*ifproto->kpi.v2.input)(ifproto->ifp, 3149 ifproto->protocol_family, m); 3150 if (error != 0 && error != EJUSTRETURN) 3151 m_freem_list(m); 3152 } 3153 return; 3154} 3155 3156static void 3157dlil_input_stats_add(const struct ifnet_stat_increment_param *s, 3158 struct dlil_threading_info *inp, boolean_t poll) 3159{ 3160 struct ifnet_stat_increment_param *d = &inp->stats; 3161 3162 if (s->packets_in != 0) 3163 d->packets_in += s->packets_in; 3164 if (s->bytes_in != 0) 3165 d->bytes_in += s->bytes_in; 3166 if (s->errors_in != 0) 3167 d->errors_in += s->errors_in; 3168 3169 if (s->packets_out != 0) 3170 d->packets_out += s->packets_out; 3171 if (s->bytes_out != 0) 3172 d->bytes_out += s->bytes_out; 3173 if (s->errors_out != 0) 3174 d->errors_out += s->errors_out; 3175 3176 if (s->collisions != 0) 3177 d->collisions += s->collisions; 3178 if (s->dropped != 0) 3179 d->dropped += s->dropped; 3180 3181 if (poll) 3182 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in); 3183} 3184 3185static void 3186dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) 3187{ 3188 struct ifnet_stat_increment_param *s = &inp->stats; 3189 3190 /* 3191 * Use of atomic operations is unavoidable here because 3192 * these stats may also be incremented elsewhere via KPIs. 3193 */ 3194 if (s->packets_in != 0) { 3195 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in); 3196 s->packets_in = 0; 3197 } 3198 if (s->bytes_in != 0) { 3199 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in); 3200 s->bytes_in = 0; 3201 } 3202 if (s->errors_in != 0) { 3203 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in); 3204 s->errors_in = 0; 3205 } 3206 3207 if (s->packets_out != 0) { 3208 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out); 3209 s->packets_out = 0; 3210 } 3211 if (s->bytes_out != 0) { 3212 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out); 3213 s->bytes_out = 0; 3214 } 3215 if (s->errors_out != 0) { 3216 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out); 3217 s->errors_out = 0; 3218 } 3219 3220 if (s->collisions != 0) { 3221 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions); 3222 s->collisions = 0; 3223 } 3224 if (s->dropped != 0) { 3225 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped); 3226 s->dropped = 0; 3227 } 3228 /* 3229 * If we went over the threshold, notify NetworkStatistics. 3230 */ 3231 if (ifp->if_data_threshold && 3232 (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes > 3233 ifp->if_data_threshold) { 3234 ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes; 3235 nstat_ifnet_threshold_reached(ifp->if_index); 3236 } 3237 /* 3238 * No need for atomic operations as they are modified here 3239 * only from within the DLIL input thread context. 3240 */ 3241 if (inp->tstats.packets != 0) { 3242 inp->pstats.ifi_poll_packets += inp->tstats.packets; 3243 inp->tstats.packets = 0; 3244 } 3245 if (inp->tstats.bytes != 0) { 3246 inp->pstats.ifi_poll_bytes += inp->tstats.bytes; 3247 inp->tstats.bytes = 0; 3248 } 3249} 3250 3251__private_extern__ void 3252dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m) 3253{ 3254 return (dlil_input_packet_list_common(ifp, m, 0, 3255 IFNET_MODEL_INPUT_POLL_OFF, FALSE)); 3256} 3257 3258__private_extern__ void 3259dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m, 3260 u_int32_t cnt, ifnet_model_t mode) 3261{ 3262 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE)); 3263} 3264 3265static void 3266dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, 3267 u_int32_t cnt, ifnet_model_t mode, boolean_t ext) 3268{ 3269 int error = 0; 3270 protocol_family_t protocol_family; 3271 mbuf_t next_packet; 3272 ifnet_t ifp = ifp_param; 3273 char * frame_header; 3274 struct if_proto * last_ifproto = NULL; 3275 mbuf_t pkt_first = NULL; 3276 mbuf_t * pkt_next = NULL; 3277 u_int32_t poll_thresh = 0, poll_ival = 0; 3278 3279 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0); 3280 3281 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 && 3282 (poll_ival = if_rxpoll_interval_pkts) > 0) 3283 poll_thresh = cnt; 3284 3285 while (m != NULL) { 3286 struct if_proto *ifproto = NULL; 3287 int iorefcnt = 0; 3288 uint32_t pktf_mask; /* pkt flags to preserve */ 3289 3290 if (ifp_param == NULL) 3291 ifp = m->m_pkthdr.rcvif; 3292 3293 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 && 3294 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) 3295 ifnet_poll(ifp); 3296 3297 /* Check if this mbuf looks valid */ 3298 MBUF_INPUT_CHECK(m, ifp); 3299 3300 next_packet = m->m_nextpkt; 3301 m->m_nextpkt = NULL; 3302 frame_header = m->m_pkthdr.pkt_hdr; 3303 m->m_pkthdr.pkt_hdr = NULL; 3304 3305 /* 3306 * Get an IO reference count if the interface is not 3307 * loopback (lo0) and it is attached; lo0 never goes 3308 * away, so optimize for that. 3309 */ 3310 if (ifp != lo_ifp) { 3311 if (!ifnet_is_attached(ifp, 1)) { 3312 m_freem(m); 3313 goto next; 3314 } 3315 iorefcnt = 1; 3316 pktf_mask = 0; 3317 } else { 3318 /* 3319 * If this arrived on lo0, preserve interface addr 3320 * info to allow for connectivity between loopback 3321 * and local interface addresses. 3322 */ 3323 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO); 3324 } 3325 3326 /* make sure packet comes in clean */ 3327 m_classifier_init(m, pktf_mask); 3328 3329 ifp_inc_traffic_class_in(ifp, m); 3330 3331 /* find which protocol family this packet is for */ 3332 ifnet_lock_shared(ifp); 3333 error = (*ifp->if_demux)(ifp, m, frame_header, 3334 &protocol_family); 3335 ifnet_lock_done(ifp); 3336 if (error != 0) { 3337 if (error == EJUSTRETURN) 3338 goto next; 3339 protocol_family = 0; 3340 } 3341 3342 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) && 3343 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) 3344 dlil_input_cksum_dbg(ifp, m, frame_header, 3345 protocol_family); 3346 3347 /* 3348 * For partial checksum offload, we expect the driver to 3349 * set the start offset indicating the start of the span 3350 * that is covered by the hardware-computed checksum; 3351 * adjust this start offset accordingly because the data 3352 * pointer has been advanced beyond the link-layer header. 3353 * 3354 * Don't adjust if the interface is a bridge member, as 3355 * the adjustment will occur from the context of the 3356 * bridge interface during input. 3357 */ 3358 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags & 3359 (CSUM_DATA_VALID | CSUM_PARTIAL)) == 3360 (CSUM_DATA_VALID | CSUM_PARTIAL)) { 3361 int adj; 3362 3363 if (frame_header == NULL || 3364 frame_header < (char *)mbuf_datastart(m) || 3365 frame_header > (char *)m->m_data || 3366 (adj = (m->m_data - frame_header)) > 3367 m->m_pkthdr.csum_rx_start) { 3368 m->m_pkthdr.csum_data = 0; 3369 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; 3370 hwcksum_in_invalidated++; 3371 } else { 3372 m->m_pkthdr.csum_rx_start -= adj; 3373 } 3374 } 3375 3376 pktap_input(ifp, protocol_family, m, frame_header); 3377 3378 if (m->m_flags & (M_BCAST|M_MCAST)) 3379 atomic_add_64(&ifp->if_imcasts, 1); 3380 3381 /* run interface filters, exclude VLAN packets PR-3586856 */ 3382 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { 3383 error = dlil_interface_filters_input(ifp, &m, 3384 &frame_header, protocol_family); 3385 if (error != 0) { 3386 if (error != EJUSTRETURN) 3387 m_freem(m); 3388 goto next; 3389 } 3390 } 3391 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) { 3392 m_freem(m); 3393 goto next; 3394 } 3395 3396 /* Lookup the protocol attachment to this interface */ 3397 if (protocol_family == 0) { 3398 ifproto = NULL; 3399 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp && 3400 (last_ifproto->protocol_family == protocol_family)) { 3401 VERIFY(ifproto == NULL); 3402 ifproto = last_ifproto; 3403 if_proto_ref(last_ifproto); 3404 } else { 3405 VERIFY(ifproto == NULL); 3406 ifnet_lock_shared(ifp); 3407 /* callee holds a proto refcnt upon success */ 3408 ifproto = find_attached_proto(ifp, protocol_family); 3409 ifnet_lock_done(ifp); 3410 } 3411 if (ifproto == NULL) { 3412 /* no protocol for this packet, discard */ 3413 m_freem(m); 3414 goto next; 3415 } 3416 if (ifproto != last_ifproto) { 3417 if (last_ifproto != NULL) { 3418 /* pass up the list for the previous protocol */ 3419 dlil_ifproto_input(last_ifproto, pkt_first); 3420 pkt_first = NULL; 3421 if_proto_free(last_ifproto); 3422 } 3423 last_ifproto = ifproto; 3424 if_proto_ref(ifproto); 3425 } 3426 /* extend the list */ 3427 m->m_pkthdr.pkt_hdr = frame_header; 3428 if (pkt_first == NULL) { 3429 pkt_first = m; 3430 } else { 3431 *pkt_next = m; 3432 } 3433 pkt_next = &m->m_nextpkt; 3434 3435next: 3436 if (next_packet == NULL && last_ifproto != NULL) { 3437 /* pass up the last list of packets */ 3438 dlil_ifproto_input(last_ifproto, pkt_first); 3439 if_proto_free(last_ifproto); 3440 last_ifproto = NULL; 3441 } 3442 if (ifproto != NULL) { 3443 if_proto_free(ifproto); 3444 ifproto = NULL; 3445 } 3446 3447 m = next_packet; 3448 3449 /* update the driver's multicast filter, if needed */ 3450 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) 3451 ifp->if_updatemcasts = 0; 3452 if (iorefcnt == 1) 3453 ifnet_decr_iorefcnt(ifp); 3454 } 3455 3456 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0); 3457} 3458 3459errno_t 3460if_mcasts_update(struct ifnet *ifp) 3461{ 3462 errno_t err; 3463 3464 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL); 3465 if (err == EAFNOSUPPORT) 3466 err = 0; 3467 printf("%s: %s %d suspended link-layer multicast membership(s) " 3468 "(err=%d)\n", if_name(ifp), 3469 (err == 0 ? "successfully restored" : "failed to restore"), 3470 ifp->if_updatemcasts, err); 3471 3472 /* just return success */ 3473 return (0); 3474} 3475 3476static int 3477dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) 3478{ 3479 struct ifnet_filter *filter; 3480 3481 /* Get an io ref count if the interface is attached */ 3482 if (!ifnet_is_attached(ifp, 1)) 3483 goto done; 3484 3485 /* 3486 * Pass the event to the interface filters 3487 */ 3488 lck_mtx_lock_spin(&ifp->if_flt_lock); 3489 /* prevent filter list from changing in case we drop the lock */ 3490 if_flt_monitor_busy(ifp); 3491 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 3492 if (filter->filt_event != NULL) { 3493 lck_mtx_unlock(&ifp->if_flt_lock); 3494 3495 filter->filt_event(filter->filt_cookie, ifp, 3496 filter->filt_protocol, event); 3497 3498 lck_mtx_lock_spin(&ifp->if_flt_lock); 3499 } 3500 } 3501 /* we're done with the filter list */ 3502 if_flt_monitor_unbusy(ifp); 3503 lck_mtx_unlock(&ifp->if_flt_lock); 3504 3505 ifnet_lock_shared(ifp); 3506 if (ifp->if_proto_hash != NULL) { 3507 int i; 3508 3509 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 3510 struct if_proto *proto; 3511 3512 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], 3513 next_hash) { 3514 proto_media_event eventp = 3515 (proto->proto_kpi == kProtoKPI_v1 ? 3516 proto->kpi.v1.event : 3517 proto->kpi.v2.event); 3518 3519 if (eventp != NULL) { 3520 if_proto_ref(proto); 3521 ifnet_lock_done(ifp); 3522 3523 eventp(ifp, proto->protocol_family, 3524 event); 3525 3526 ifnet_lock_shared(ifp); 3527 if_proto_free(proto); 3528 } 3529 } 3530 } 3531 } 3532 ifnet_lock_done(ifp); 3533 3534 /* Pass the event to the interface */ 3535 if (ifp->if_event != NULL) 3536 ifp->if_event(ifp, event); 3537 3538 /* Release the io ref count */ 3539 ifnet_decr_iorefcnt(ifp); 3540 3541done: 3542 return (kev_post_msg(event)); 3543} 3544 3545errno_t 3546ifnet_event(ifnet_t ifp, struct kern_event_msg *event) 3547{ 3548 struct kev_msg kev_msg; 3549 int result = 0; 3550 3551 if (ifp == NULL || event == NULL) 3552 return (EINVAL); 3553 3554 bzero(&kev_msg, sizeof (kev_msg)); 3555 kev_msg.vendor_code = event->vendor_code; 3556 kev_msg.kev_class = event->kev_class; 3557 kev_msg.kev_subclass = event->kev_subclass; 3558 kev_msg.event_code = event->event_code; 3559 kev_msg.dv[0].data_ptr = &event->event_data[0]; 3560 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE; 3561 kev_msg.dv[1].data_length = 0; 3562 3563 result = dlil_event_internal(ifp, &kev_msg); 3564 3565 return (result); 3566} 3567 3568#if CONFIG_MACF_NET 3569#include <netinet/ip6.h> 3570#include <netinet/ip.h> 3571static int 3572dlil_get_socket_type(struct mbuf **mp, int family, int raw) 3573{ 3574 struct mbuf *m; 3575 struct ip *ip; 3576 struct ip6_hdr *ip6; 3577 int type = SOCK_RAW; 3578 3579 if (!raw) { 3580 switch (family) { 3581 case PF_INET: 3582 m = m_pullup(*mp, sizeof(struct ip)); 3583 if (m == NULL) 3584 break; 3585 *mp = m; 3586 ip = mtod(m, struct ip *); 3587 if (ip->ip_p == IPPROTO_TCP) 3588 type = SOCK_STREAM; 3589 else if (ip->ip_p == IPPROTO_UDP) 3590 type = SOCK_DGRAM; 3591 break; 3592 case PF_INET6: 3593 m = m_pullup(*mp, sizeof(struct ip6_hdr)); 3594 if (m == NULL) 3595 break; 3596 *mp = m; 3597 ip6 = mtod(m, struct ip6_hdr *); 3598 if (ip6->ip6_nxt == IPPROTO_TCP) 3599 type = SOCK_STREAM; 3600 else if (ip6->ip6_nxt == IPPROTO_UDP) 3601 type = SOCK_DGRAM; 3602 break; 3603 } 3604 } 3605 3606 return (type); 3607} 3608#endif 3609 3610/* 3611 * This is mostly called from the context of the DLIL input thread; 3612 * because of that there is no need for atomic operations. 3613 */ 3614static __inline void 3615ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m) 3616{ 3617 if (!(m->m_flags & M_PKTHDR)) 3618 return; 3619 3620 switch (m_get_traffic_class(m)) { 3621 case MBUF_TC_BE: 3622 ifp->if_tc.ifi_ibepackets++; 3623 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len; 3624 break; 3625 case MBUF_TC_BK: 3626 ifp->if_tc.ifi_ibkpackets++; 3627 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len; 3628 break; 3629 case MBUF_TC_VI: 3630 ifp->if_tc.ifi_ivipackets++; 3631 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len; 3632 break; 3633 case MBUF_TC_VO: 3634 ifp->if_tc.ifi_ivopackets++; 3635 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len; 3636 break; 3637 default: 3638 break; 3639 } 3640 3641 if (mbuf_is_traffic_class_privileged(m)) { 3642 ifp->if_tc.ifi_ipvpackets++; 3643 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len; 3644 } 3645} 3646 3647/* 3648 * This is called from DLIL output, hence multiple threads could end 3649 * up modifying the statistics. We trade off acccuracy for performance 3650 * by not using atomic operations here. 3651 */ 3652static __inline void 3653ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) 3654{ 3655 if (!(m->m_flags & M_PKTHDR)) 3656 return; 3657 3658 switch (m_get_traffic_class(m)) { 3659 case MBUF_TC_BE: 3660 ifp->if_tc.ifi_obepackets++; 3661 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len; 3662 break; 3663 case MBUF_TC_BK: 3664 ifp->if_tc.ifi_obkpackets++; 3665 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len; 3666 break; 3667 case MBUF_TC_VI: 3668 ifp->if_tc.ifi_ovipackets++; 3669 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len; 3670 break; 3671 case MBUF_TC_VO: 3672 ifp->if_tc.ifi_ovopackets++; 3673 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len; 3674 break; 3675 default: 3676 break; 3677 } 3678 3679 if (mbuf_is_traffic_class_privileged(m)) { 3680 ifp->if_tc.ifi_opvpackets++; 3681 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len; 3682 } 3683} 3684 3685/* 3686 * dlil_output 3687 * 3688 * Caller should have a lock on the protocol domain if the protocol 3689 * doesn't support finer grained locking. In most cases, the lock 3690 * will be held from the socket layer and won't be released until 3691 * we return back to the socket layer. 3692 * 3693 * This does mean that we must take a protocol lock before we take 3694 * an interface lock if we're going to take both. This makes sense 3695 * because a protocol is likely to interact with an ifp while it 3696 * is under the protocol lock. 3697 * 3698 * An advisory code will be returned if adv is not null. This 3699 * can be used to provide feedback about interface queues to the 3700 * application. 3701 */ 3702errno_t 3703dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, 3704 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv) 3705{ 3706 char *frame_type = NULL; 3707 char *dst_linkaddr = NULL; 3708 int retval = 0; 3709 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; 3710 char dst_linkaddr_buffer[MAX_LINKADDR * 4]; 3711 struct if_proto *proto = NULL; 3712 mbuf_t m; 3713 mbuf_t send_head = NULL; 3714 mbuf_t *send_tail = &send_head; 3715 int iorefcnt = 0; 3716 u_int32_t pre = 0, post = 0; 3717 u_int32_t fpkts = 0, fbytes = 0; 3718 int32_t flen = 0; 3719 3720 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); 3721 3722 /* Get an io refcnt if the interface is attached to prevent ifnet_detach 3723 * from happening while this operation is in progress */ 3724 if (!ifnet_is_attached(ifp, 1)) { 3725 retval = ENXIO; 3726 goto cleanup; 3727 } 3728 iorefcnt = 1; 3729 3730 /* update the driver's multicast filter, if needed */ 3731 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) 3732 ifp->if_updatemcasts = 0; 3733 3734 frame_type = frame_type_buffer; 3735 dst_linkaddr = dst_linkaddr_buffer; 3736 3737 if (raw == 0) { 3738 ifnet_lock_shared(ifp); 3739 /* callee holds a proto refcnt upon success */ 3740 proto = find_attached_proto(ifp, proto_family); 3741 if (proto == NULL) { 3742 ifnet_lock_done(ifp); 3743 retval = ENXIO; 3744 goto cleanup; 3745 } 3746 ifnet_lock_done(ifp); 3747 } 3748 3749preout_again: 3750 if (packetlist == NULL) 3751 goto cleanup; 3752 3753 m = packetlist; 3754 packetlist = packetlist->m_nextpkt; 3755 m->m_nextpkt = NULL; 3756 3757 if (raw == 0) { 3758 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ? 3759 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output); 3760 retval = 0; 3761 if (preoutp != NULL) { 3762 retval = preoutp(ifp, proto_family, &m, dest, route, 3763 frame_type, dst_linkaddr); 3764 3765 if (retval != 0) { 3766 if (retval == EJUSTRETURN) 3767 goto preout_again; 3768 m_freem(m); 3769 goto cleanup; 3770 } 3771 } 3772 } 3773 3774#if CONFIG_MACF_NET 3775 retval = mac_ifnet_check_transmit(ifp, m, proto_family, 3776 dlil_get_socket_type(&m, proto_family, raw)); 3777 if (retval != 0) { 3778 m_freem(m); 3779 goto cleanup; 3780 } 3781#endif 3782 3783 do { 3784#if CONFIG_DTRACE 3785 if (!raw && proto_family == PF_INET) { 3786 struct ip *ip = mtod(m, struct ip*); 3787 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, 3788 struct ip *, ip, struct ifnet *, ifp, 3789 struct ip *, ip, struct ip6_hdr *, NULL); 3790 3791 } else if (!raw && proto_family == PF_INET6) { 3792 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*); 3793 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL, 3794 struct ip6_hdr *, ip6, struct ifnet*, ifp, 3795 struct ip*, NULL, struct ip6_hdr *, ip6); 3796 } 3797#endif /* CONFIG_DTRACE */ 3798 3799 if (raw == 0 && ifp->if_framer != NULL) { 3800 int rcvif_set = 0; 3801 3802 /* 3803 * If this is a broadcast packet that needs to be 3804 * looped back into the system, set the inbound ifp 3805 * to that of the outbound ifp. This will allow 3806 * us to determine that it is a legitimate packet 3807 * for the system. Only set the ifp if it's not 3808 * already set, just to be safe. 3809 */ 3810 if ((m->m_flags & (M_BCAST | M_LOOP)) && 3811 m->m_pkthdr.rcvif == NULL) { 3812 m->m_pkthdr.rcvif = ifp; 3813 rcvif_set = 1; 3814 } 3815 3816 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, 3817 frame_type, &pre, &post); 3818 if (retval != 0) { 3819 if (retval != EJUSTRETURN) 3820 m_freem(m); 3821 goto next; 3822 } 3823 3824 /* 3825 * For partial checksum offload, adjust the start 3826 * and stuff offsets based on the prepended header. 3827 */ 3828 if ((m->m_pkthdr.csum_flags & 3829 (CSUM_DATA_VALID | CSUM_PARTIAL)) == 3830 (CSUM_DATA_VALID | CSUM_PARTIAL)) { 3831 m->m_pkthdr.csum_tx_stuff += pre; 3832 m->m_pkthdr.csum_tx_start += pre; 3833 } 3834 3835 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) 3836 dlil_output_cksum_dbg(ifp, m, pre, 3837 proto_family); 3838 3839 /* 3840 * Clear the ifp if it was set above, and to be 3841 * safe, only if it is still the same as the 3842 * outbound ifp we have in context. If it was 3843 * looped back, then a copy of it was sent to the 3844 * loopback interface with the rcvif set, and we 3845 * are clearing the one that will go down to the 3846 * layer below. 3847 */ 3848 if (rcvif_set && m->m_pkthdr.rcvif == ifp) 3849 m->m_pkthdr.rcvif = NULL; 3850 } 3851 3852 /* 3853 * Let interface filters (if any) do their thing ... 3854 */ 3855 /* Do not pass VLAN tagged packets to filters PR-3586856 */ 3856 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { 3857 retval = dlil_interface_filters_output(ifp, 3858 &m, proto_family); 3859 if (retval != 0) { 3860 if (retval != EJUSTRETURN) 3861 m_freem(m); 3862 goto next; 3863 } 3864 } 3865 /* 3866 * Strip away M_PROTO1 bit prior to sending packet 3867 * to the driver as this field may be used by the driver 3868 */ 3869 m->m_flags &= ~M_PROTO1; 3870 3871 /* 3872 * If the underlying interface is not capable of handling a 3873 * packet whose data portion spans across physically disjoint 3874 * pages, we need to "normalize" the packet so that we pass 3875 * down a chain of mbufs where each mbuf points to a span that 3876 * resides in the system page boundary. If the packet does 3877 * not cross page(s), the following is a no-op. 3878 */ 3879 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) { 3880 if ((m = m_normalize(m)) == NULL) 3881 goto next; 3882 } 3883 3884 /* 3885 * If this is a TSO packet, make sure the interface still 3886 * advertise TSO capability. 3887 */ 3888 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) { 3889 retval = EMSGSIZE; 3890 m_freem(m); 3891 goto cleanup; 3892 } 3893 3894 /* 3895 * If the packet service class is not background, 3896 * update the timestamp to indicate recent activity 3897 * on a foreground socket. 3898 */ 3899 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) && 3900 (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && 3901 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) 3902 ifp->if_fg_sendts = net_uptime(); 3903 3904 ifp_inc_traffic_class_out(ifp, m); 3905 pktap_output(ifp, proto_family, m, pre, post); 3906 3907 /* 3908 * Finally, call the driver. 3909 */ 3910 if (ifp->if_eflags & IFEF_SENDLIST) { 3911 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { 3912 flen += (m_pktlen(m) - (pre + post)); 3913 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; 3914 } 3915 *send_tail = m; 3916 send_tail = &m->m_nextpkt; 3917 } else { 3918 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { 3919 flen = (m_pktlen(m) - (pre + post)); 3920 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; 3921 } else { 3922 flen = 0; 3923 } 3924 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 3925 0, 0, 0, 0, 0); 3926 retval = (*ifp->if_output)(ifp, m); 3927 if (retval == EQFULL || retval == EQSUSPENDED) { 3928 if (adv != NULL && adv->code == FADV_SUCCESS) { 3929 adv->code = (retval == EQFULL ? 3930 FADV_FLOW_CONTROLLED : 3931 FADV_SUSPENDED); 3932 } 3933 retval = 0; 3934 } 3935 if (retval == 0 && flen > 0) { 3936 fbytes += flen; 3937 fpkts++; 3938 } 3939 if (retval != 0 && dlil_verbose) { 3940 printf("%s: output error on %s retval = %d\n", 3941 __func__, if_name(ifp), 3942 retval); 3943 } 3944 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 3945 0, 0, 0, 0, 0); 3946 } 3947 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); 3948 3949next: 3950 m = packetlist; 3951 if (m != NULL) { 3952 packetlist = packetlist->m_nextpkt; 3953 m->m_nextpkt = NULL; 3954 } 3955 } while (m != NULL); 3956 3957 if (send_head != NULL) { 3958 VERIFY(ifp->if_eflags & IFEF_SENDLIST); 3959 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 3960 0, 0, 0, 0, 0); 3961 retval = (*ifp->if_output)(ifp, send_head); 3962 if (retval == EQFULL || retval == EQSUSPENDED) { 3963 if (adv != NULL) { 3964 adv->code = (retval == EQFULL ? 3965 FADV_FLOW_CONTROLLED : FADV_SUSPENDED); 3966 } 3967 retval = 0; 3968 } 3969 if (retval == 0 && flen > 0) { 3970 fbytes += flen; 3971 fpkts++; 3972 } 3973 if (retval != 0 && dlil_verbose) { 3974 printf("%s: output error on %s retval = %d\n", 3975 __func__, if_name(ifp), retval); 3976 } 3977 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); 3978 } 3979 3980 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); 3981 3982cleanup: 3983 if (fbytes > 0) 3984 ifp->if_fbytes += fbytes; 3985 if (fpkts > 0) 3986 ifp->if_fpackets += fpkts; 3987 if (proto != NULL) 3988 if_proto_free(proto); 3989 if (packetlist) /* if any packets are left, clean up */ 3990 mbuf_freem_list(packetlist); 3991 if (retval == EJUSTRETURN) 3992 retval = 0; 3993 if (iorefcnt == 1) 3994 ifnet_decr_iorefcnt(ifp); 3995 3996 return (retval); 3997} 3998 3999errno_t 4000ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, 4001 void *ioctl_arg) 4002{ 4003 struct ifnet_filter *filter; 4004 int retval = EOPNOTSUPP; 4005 int result = 0; 4006 4007 if (ifp == NULL || ioctl_code == 0) 4008 return (EINVAL); 4009 4010 /* Get an io ref count if the interface is attached */ 4011 if (!ifnet_is_attached(ifp, 1)) 4012 return (EOPNOTSUPP); 4013 4014 /* Run the interface filters first. 4015 * We want to run all filters before calling the protocol, 4016 * interface family, or interface. 4017 */ 4018 lck_mtx_lock_spin(&ifp->if_flt_lock); 4019 /* prevent filter list from changing in case we drop the lock */ 4020 if_flt_monitor_busy(ifp); 4021 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 4022 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 || 4023 filter->filt_protocol == proto_fam)) { 4024 lck_mtx_unlock(&ifp->if_flt_lock); 4025 4026 result = filter->filt_ioctl(filter->filt_cookie, ifp, 4027 proto_fam, ioctl_code, ioctl_arg); 4028 4029 lck_mtx_lock_spin(&ifp->if_flt_lock); 4030 4031 /* Only update retval if no one has handled the ioctl */ 4032 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 4033 if (result == ENOTSUP) 4034 result = EOPNOTSUPP; 4035 retval = result; 4036 if (retval != 0 && retval != EOPNOTSUPP) { 4037 /* we're done with the filter list */ 4038 if_flt_monitor_unbusy(ifp); 4039 lck_mtx_unlock(&ifp->if_flt_lock); 4040 goto cleanup; 4041 } 4042 } 4043 } 4044 } 4045 /* we're done with the filter list */ 4046 if_flt_monitor_unbusy(ifp); 4047 lck_mtx_unlock(&ifp->if_flt_lock); 4048 4049 /* Allow the protocol to handle the ioctl */ 4050 if (proto_fam != 0) { 4051 struct if_proto *proto; 4052 4053 /* callee holds a proto refcnt upon success */ 4054 ifnet_lock_shared(ifp); 4055 proto = find_attached_proto(ifp, proto_fam); 4056 ifnet_lock_done(ifp); 4057 if (proto != NULL) { 4058 proto_media_ioctl ioctlp = 4059 (proto->proto_kpi == kProtoKPI_v1 ? 4060 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl); 4061 result = EOPNOTSUPP; 4062 if (ioctlp != NULL) 4063 result = ioctlp(ifp, proto_fam, ioctl_code, 4064 ioctl_arg); 4065 if_proto_free(proto); 4066 4067 /* Only update retval if no one has handled the ioctl */ 4068 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 4069 if (result == ENOTSUP) 4070 result = EOPNOTSUPP; 4071 retval = result; 4072 if (retval && retval != EOPNOTSUPP) 4073 goto cleanup; 4074 } 4075 } 4076 } 4077 4078 /* retval is either 0 or EOPNOTSUPP */ 4079 4080 /* 4081 * Let the interface handle this ioctl. 4082 * If it returns EOPNOTSUPP, ignore that, we may have 4083 * already handled this in the protocol or family. 4084 */ 4085 if (ifp->if_ioctl) 4086 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg); 4087 4088 /* Only update retval if no one has handled the ioctl */ 4089 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 4090 if (result == ENOTSUP) 4091 result = EOPNOTSUPP; 4092 retval = result; 4093 if (retval && retval != EOPNOTSUPP) { 4094 goto cleanup; 4095 } 4096 } 4097 4098cleanup: 4099 if (retval == EJUSTRETURN) 4100 retval = 0; 4101 4102 ifnet_decr_iorefcnt(ifp); 4103 4104 return (retval); 4105} 4106 4107__private_extern__ errno_t 4108dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback) 4109{ 4110 errno_t error = 0; 4111 4112 4113 if (ifp->if_set_bpf_tap) { 4114 /* Get an io reference on the interface if it is attached */ 4115 if (!ifnet_is_attached(ifp, 1)) 4116 return ENXIO; 4117 error = ifp->if_set_bpf_tap(ifp, mode, callback); 4118 ifnet_decr_iorefcnt(ifp); 4119 } 4120 return (error); 4121} 4122 4123errno_t 4124dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, 4125 struct sockaddr *ll_addr, size_t ll_len) 4126{ 4127 errno_t result = EOPNOTSUPP; 4128 struct if_proto *proto; 4129 const struct sockaddr *verify; 4130 proto_media_resolve_multi resolvep; 4131 4132 if (!ifnet_is_attached(ifp, 1)) 4133 return result; 4134 4135 bzero(ll_addr, ll_len); 4136 4137 /* Call the protocol first; callee holds a proto refcnt upon success */ 4138 ifnet_lock_shared(ifp); 4139 proto = find_attached_proto(ifp, proto_addr->sa_family); 4140 ifnet_lock_done(ifp); 4141 if (proto != NULL) { 4142 resolvep = (proto->proto_kpi == kProtoKPI_v1 ? 4143 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi); 4144 if (resolvep != NULL) 4145 result = resolvep(ifp, proto_addr, 4146 (struct sockaddr_dl*)(void *)ll_addr, ll_len); 4147 if_proto_free(proto); 4148 } 4149 4150 /* Let the interface verify the multicast address */ 4151 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) { 4152 if (result == 0) 4153 verify = ll_addr; 4154 else 4155 verify = proto_addr; 4156 result = ifp->if_check_multi(ifp, verify); 4157 } 4158 4159 ifnet_decr_iorefcnt(ifp); 4160 return (result); 4161} 4162 4163__private_extern__ errno_t 4164dlil_send_arp_internal(ifnet_t ifp, u_short arpop, 4165 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto, 4166 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto) 4167{ 4168 struct if_proto *proto; 4169 errno_t result = 0; 4170 4171 /* callee holds a proto refcnt upon success */ 4172 ifnet_lock_shared(ifp); 4173 proto = find_attached_proto(ifp, target_proto->sa_family); 4174 ifnet_lock_done(ifp); 4175 if (proto == NULL) { 4176 result = ENOTSUP; 4177 } else { 4178 proto_media_send_arp arpp; 4179 arpp = (proto->proto_kpi == kProtoKPI_v1 ? 4180 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp); 4181 if (arpp == NULL) { 4182 result = ENOTSUP; 4183 } else { 4184 switch (arpop) { 4185 case ARPOP_REQUEST: 4186 arpstat.txrequests++; 4187 if (target_hw != NULL) 4188 arpstat.txurequests++; 4189 break; 4190 case ARPOP_REPLY: 4191 arpstat.txreplies++; 4192 break; 4193 } 4194 result = arpp(ifp, arpop, sender_hw, sender_proto, 4195 target_hw, target_proto); 4196 } 4197 if_proto_free(proto); 4198 } 4199 4200 return (result); 4201} 4202 4203struct net_thread_marks { }; 4204static const struct net_thread_marks net_thread_marks_base = { }; 4205 4206__private_extern__ const net_thread_marks_t net_thread_marks_none = 4207 &net_thread_marks_base; 4208 4209__private_extern__ net_thread_marks_t 4210net_thread_marks_push(u_int32_t push) 4211{ 4212 static const char *const base = (const void*)&net_thread_marks_base; 4213 u_int32_t pop = 0; 4214 4215 if (push != 0) { 4216 struct uthread *uth = get_bsdthread_info(current_thread()); 4217 4218 pop = push & ~uth->uu_network_marks; 4219 if (pop != 0) 4220 uth->uu_network_marks |= pop; 4221 } 4222 4223 return ((net_thread_marks_t)&base[pop]); 4224} 4225 4226__private_extern__ net_thread_marks_t 4227net_thread_unmarks_push(u_int32_t unpush) 4228{ 4229 static const char *const base = (const void*)&net_thread_marks_base; 4230 u_int32_t unpop = 0; 4231 4232 if (unpush != 0) { 4233 struct uthread *uth = get_bsdthread_info(current_thread()); 4234 4235 unpop = unpush & uth->uu_network_marks; 4236 if (unpop != 0) 4237 uth->uu_network_marks &= ~unpop; 4238 } 4239 4240 return ((net_thread_marks_t)&base[unpop]); 4241} 4242 4243__private_extern__ void 4244net_thread_marks_pop(net_thread_marks_t popx) 4245{ 4246 static const char *const base = (const void*)&net_thread_marks_base; 4247 ptrdiff_t pop = (caddr_t)popx - (caddr_t)base; 4248 4249 if (pop != 0) { 4250 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; 4251 struct uthread *uth = get_bsdthread_info(current_thread()); 4252 4253 VERIFY((pop & ones) == pop); 4254 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop); 4255 uth->uu_network_marks &= ~pop; 4256 } 4257} 4258 4259__private_extern__ void 4260net_thread_unmarks_pop(net_thread_marks_t unpopx) 4261{ 4262 static const char *const base = (const void*)&net_thread_marks_base; 4263 ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base; 4264 4265 if (unpop != 0) { 4266 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; 4267 struct uthread *uth = get_bsdthread_info(current_thread()); 4268 4269 VERIFY((unpop & ones) == unpop); 4270 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0); 4271 uth->uu_network_marks |= unpop; 4272 } 4273} 4274 4275__private_extern__ u_int32_t 4276net_thread_is_marked(u_int32_t check) 4277{ 4278 if (check != 0) { 4279 struct uthread *uth = get_bsdthread_info(current_thread()); 4280 return (uth->uu_network_marks & check); 4281 } 4282 else 4283 return (0); 4284} 4285 4286__private_extern__ u_int32_t 4287net_thread_is_unmarked(u_int32_t check) 4288{ 4289 if (check != 0) { 4290 struct uthread *uth = get_bsdthread_info(current_thread()); 4291 return (~uth->uu_network_marks & check); 4292 } 4293 else 4294 return (0); 4295} 4296 4297static __inline__ int 4298_is_announcement(const struct sockaddr_in * sender_sin, 4299 const struct sockaddr_in * target_sin) 4300{ 4301 if (sender_sin == NULL) { 4302 return (FALSE); 4303 } 4304 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr); 4305} 4306 4307__private_extern__ errno_t 4308dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw, 4309 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw, 4310 const struct sockaddr* target_proto0, u_int32_t rtflags) 4311{ 4312 errno_t result = 0; 4313 const struct sockaddr_in * sender_sin; 4314 const struct sockaddr_in * target_sin; 4315 struct sockaddr_inarp target_proto_sinarp; 4316 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0; 4317 4318 if (target_proto == NULL || (sender_proto != NULL && 4319 sender_proto->sa_family != target_proto->sa_family)) 4320 return (EINVAL); 4321 4322 /* 4323 * If the target is a (default) router, provide that 4324 * information to the send_arp callback routine. 4325 */ 4326 if (rtflags & RTF_ROUTER) { 4327 bcopy(target_proto, &target_proto_sinarp, 4328 sizeof (struct sockaddr_in)); 4329 target_proto_sinarp.sin_other |= SIN_ROUTER; 4330 target_proto = (struct sockaddr *)&target_proto_sinarp; 4331 } 4332 4333 /* 4334 * If this is an ARP request and the target IP is IPv4LL, 4335 * send the request on all interfaces. The exception is 4336 * an announcement, which must only appear on the specific 4337 * interface. 4338 */ 4339 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto; 4340 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto; 4341 if (target_proto->sa_family == AF_INET && 4342 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) && 4343 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST && 4344 !_is_announcement(target_sin, sender_sin)) { 4345 ifnet_t *ifp_list; 4346 u_int32_t count; 4347 u_int32_t ifp_on; 4348 4349 result = ENOTSUP; 4350 4351 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) { 4352 for (ifp_on = 0; ifp_on < count; ifp_on++) { 4353 errno_t new_result; 4354 ifaddr_t source_hw = NULL; 4355 ifaddr_t source_ip = NULL; 4356 struct sockaddr_in source_ip_copy; 4357 struct ifnet *cur_ifp = ifp_list[ifp_on]; 4358 4359 /* 4360 * Only arp on interfaces marked for IPv4LL 4361 * ARPing. This may mean that we don't ARP on 4362 * the interface the subnet route points to. 4363 */ 4364 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) 4365 continue; 4366 4367 /* Find the source IP address */ 4368 ifnet_lock_shared(cur_ifp); 4369 source_hw = cur_ifp->if_lladdr; 4370 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead, 4371 ifa_link) { 4372 IFA_LOCK(source_ip); 4373 if (source_ip->ifa_addr != NULL && 4374 source_ip->ifa_addr->sa_family == 4375 AF_INET) { 4376 /* Copy the source IP address */ 4377 source_ip_copy = 4378 *(struct sockaddr_in *) 4379 (void *)source_ip->ifa_addr; 4380 IFA_UNLOCK(source_ip); 4381 break; 4382 } 4383 IFA_UNLOCK(source_ip); 4384 } 4385 4386 /* No IP Source, don't arp */ 4387 if (source_ip == NULL) { 4388 ifnet_lock_done(cur_ifp); 4389 continue; 4390 } 4391 4392 IFA_ADDREF(source_hw); 4393 ifnet_lock_done(cur_ifp); 4394 4395 /* Send the ARP */ 4396 new_result = dlil_send_arp_internal(cur_ifp, 4397 arpop, (struct sockaddr_dl *)(void *) 4398 source_hw->ifa_addr, 4399 (struct sockaddr *)&source_ip_copy, NULL, 4400 target_proto); 4401 4402 IFA_REMREF(source_hw); 4403 if (result == ENOTSUP) { 4404 result = new_result; 4405 } 4406 } 4407 ifnet_list_free(ifp_list); 4408 } 4409 } else { 4410 result = dlil_send_arp_internal(ifp, arpop, sender_hw, 4411 sender_proto, target_hw, target_proto); 4412 } 4413 4414 return (result); 4415} 4416 4417/* 4418 * Caller must hold ifnet head lock. 4419 */ 4420static int 4421ifnet_lookup(struct ifnet *ifp) 4422{ 4423 struct ifnet *_ifp; 4424 4425 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD); 4426 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) { 4427 if (_ifp == ifp) 4428 break; 4429 } 4430 return (_ifp != NULL); 4431} 4432/* 4433 * Caller has to pass a non-zero refio argument to get a 4434 * IO reference count. This will prevent ifnet_detach from 4435 * being called when there are outstanding io reference counts. 4436 */ 4437int 4438ifnet_is_attached(struct ifnet *ifp, int refio) 4439{ 4440 int ret; 4441 4442 lck_mtx_lock_spin(&ifp->if_ref_lock); 4443 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) == 4444 IFRF_ATTACHED))) { 4445 if (refio > 0) 4446 ifp->if_refio++; 4447 } 4448 lck_mtx_unlock(&ifp->if_ref_lock); 4449 4450 return (ret); 4451} 4452 4453void 4454ifnet_decr_iorefcnt(struct ifnet *ifp) 4455{ 4456 lck_mtx_lock_spin(&ifp->if_ref_lock); 4457 VERIFY(ifp->if_refio > 0); 4458 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0); 4459 ifp->if_refio--; 4460 4461 /* if there are no more outstanding io references, wakeup the 4462 * ifnet_detach thread if detaching flag is set. 4463 */ 4464 if (ifp->if_refio == 0 && 4465 (ifp->if_refflags & IFRF_DETACHING) != 0) { 4466 wakeup(&(ifp->if_refio)); 4467 } 4468 lck_mtx_unlock(&ifp->if_ref_lock); 4469} 4470 4471static void 4472dlil_if_trace(struct dlil_ifnet *dl_if, int refhold) 4473{ 4474 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if; 4475 ctrace_t *tr; 4476 u_int32_t idx; 4477 u_int16_t *cnt; 4478 4479 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) { 4480 panic("%s: dl_if %p has no debug structure", __func__, dl_if); 4481 /* NOTREACHED */ 4482 } 4483 4484 if (refhold) { 4485 cnt = &dl_if_dbg->dldbg_if_refhold_cnt; 4486 tr = dl_if_dbg->dldbg_if_refhold; 4487 } else { 4488 cnt = &dl_if_dbg->dldbg_if_refrele_cnt; 4489 tr = dl_if_dbg->dldbg_if_refrele; 4490 } 4491 4492 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE; 4493 ctrace_record(&tr[idx]); 4494} 4495 4496errno_t 4497dlil_if_ref(struct ifnet *ifp) 4498{ 4499 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 4500 4501 if (dl_if == NULL) 4502 return (EINVAL); 4503 4504 lck_mtx_lock_spin(&dl_if->dl_if_lock); 4505 ++dl_if->dl_if_refcnt; 4506 if (dl_if->dl_if_refcnt == 0) { 4507 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp); 4508 /* NOTREACHED */ 4509 } 4510 if (dl_if->dl_if_trace != NULL) 4511 (*dl_if->dl_if_trace)(dl_if, TRUE); 4512 lck_mtx_unlock(&dl_if->dl_if_lock); 4513 4514 return (0); 4515} 4516 4517errno_t 4518dlil_if_free(struct ifnet *ifp) 4519{ 4520 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 4521 4522 if (dl_if == NULL) 4523 return (EINVAL); 4524 4525 lck_mtx_lock_spin(&dl_if->dl_if_lock); 4526 if (dl_if->dl_if_refcnt == 0) { 4527 panic("%s: negative refcnt for ifp=%p", __func__, ifp); 4528 /* NOTREACHED */ 4529 } 4530 --dl_if->dl_if_refcnt; 4531 if (dl_if->dl_if_trace != NULL) 4532 (*dl_if->dl_if_trace)(dl_if, FALSE); 4533 lck_mtx_unlock(&dl_if->dl_if_lock); 4534 4535 return (0); 4536} 4537 4538static errno_t 4539dlil_attach_protocol_internal(struct if_proto *proto, 4540 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count) 4541{ 4542 struct kev_dl_proto_data ev_pr_data; 4543 struct ifnet *ifp = proto->ifp; 4544 int retval = 0; 4545 u_int32_t hash_value = proto_hash_value(proto->protocol_family); 4546 struct if_proto *prev_proto; 4547 struct if_proto *_proto; 4548 4549 /* callee holds a proto refcnt upon success */ 4550 ifnet_lock_exclusive(ifp); 4551 _proto = find_attached_proto(ifp, proto->protocol_family); 4552 if (_proto != NULL) { 4553 ifnet_lock_done(ifp); 4554 if_proto_free(_proto); 4555 return (EEXIST); 4556 } 4557 4558 /* 4559 * Call family module add_proto routine so it can refine the 4560 * demux descriptors as it wishes. 4561 */ 4562 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list, 4563 demux_count); 4564 if (retval) { 4565 ifnet_lock_done(ifp); 4566 return (retval); 4567 } 4568 4569 /* 4570 * Insert the protocol in the hash 4571 */ 4572 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]); 4573 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) 4574 prev_proto = SLIST_NEXT(prev_proto, next_hash); 4575 if (prev_proto) 4576 SLIST_INSERT_AFTER(prev_proto, proto, next_hash); 4577 else 4578 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], 4579 proto, next_hash); 4580 4581 /* hold a proto refcnt for attach */ 4582 if_proto_ref(proto); 4583 4584 /* 4585 * The reserved field carries the number of protocol still attached 4586 * (subject to change) 4587 */ 4588 ev_pr_data.proto_family = proto->protocol_family; 4589 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); 4590 ifnet_lock_done(ifp); 4591 4592 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, 4593 (struct net_event_data *)&ev_pr_data, 4594 sizeof (struct kev_dl_proto_data)); 4595 return (retval); 4596} 4597 4598errno_t 4599ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol, 4600 const struct ifnet_attach_proto_param *proto_details) 4601{ 4602 int retval = 0; 4603 struct if_proto *ifproto = NULL; 4604 4605 ifnet_head_lock_shared(); 4606 if (ifp == NULL || protocol == 0 || proto_details == NULL) { 4607 retval = EINVAL; 4608 goto end; 4609 } 4610 /* Check that the interface is in the global list */ 4611 if (!ifnet_lookup(ifp)) { 4612 retval = ENXIO; 4613 goto end; 4614 } 4615 4616 ifproto = zalloc(dlif_proto_zone); 4617 if (ifproto == NULL) { 4618 retval = ENOMEM; 4619 goto end; 4620 } 4621 bzero(ifproto, dlif_proto_size); 4622 4623 /* refcnt held above during lookup */ 4624 ifproto->ifp = ifp; 4625 ifproto->protocol_family = protocol; 4626 ifproto->proto_kpi = kProtoKPI_v1; 4627 ifproto->kpi.v1.input = proto_details->input; 4628 ifproto->kpi.v1.pre_output = proto_details->pre_output; 4629 ifproto->kpi.v1.event = proto_details->event; 4630 ifproto->kpi.v1.ioctl = proto_details->ioctl; 4631 ifproto->kpi.v1.detached = proto_details->detached; 4632 ifproto->kpi.v1.resolve_multi = proto_details->resolve; 4633 ifproto->kpi.v1.send_arp = proto_details->send_arp; 4634 4635 retval = dlil_attach_protocol_internal(ifproto, 4636 proto_details->demux_list, proto_details->demux_count); 4637 4638 if (dlil_verbose) { 4639 printf("%s: attached v1 protocol %d\n", if_name(ifp), 4640 protocol); 4641 } 4642 4643end: 4644 if (retval != 0 && retval != EEXIST && ifp != NULL) { 4645 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n", 4646 if_name(ifp), protocol, retval); 4647 } 4648 ifnet_head_done(); 4649 if (retval != 0 && ifproto != NULL) 4650 zfree(dlif_proto_zone, ifproto); 4651 return (retval); 4652} 4653 4654errno_t 4655ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol, 4656 const struct ifnet_attach_proto_param_v2 *proto_details) 4657{ 4658 int retval = 0; 4659 struct if_proto *ifproto = NULL; 4660 4661 ifnet_head_lock_shared(); 4662 if (ifp == NULL || protocol == 0 || proto_details == NULL) { 4663 retval = EINVAL; 4664 goto end; 4665 } 4666 /* Check that the interface is in the global list */ 4667 if (!ifnet_lookup(ifp)) { 4668 retval = ENXIO; 4669 goto end; 4670 } 4671 4672 ifproto = zalloc(dlif_proto_zone); 4673 if (ifproto == NULL) { 4674 retval = ENOMEM; 4675 goto end; 4676 } 4677 bzero(ifproto, sizeof(*ifproto)); 4678 4679 /* refcnt held above during lookup */ 4680 ifproto->ifp = ifp; 4681 ifproto->protocol_family = protocol; 4682 ifproto->proto_kpi = kProtoKPI_v2; 4683 ifproto->kpi.v2.input = proto_details->input; 4684 ifproto->kpi.v2.pre_output = proto_details->pre_output; 4685 ifproto->kpi.v2.event = proto_details->event; 4686 ifproto->kpi.v2.ioctl = proto_details->ioctl; 4687 ifproto->kpi.v2.detached = proto_details->detached; 4688 ifproto->kpi.v2.resolve_multi = proto_details->resolve; 4689 ifproto->kpi.v2.send_arp = proto_details->send_arp; 4690 4691 retval = dlil_attach_protocol_internal(ifproto, 4692 proto_details->demux_list, proto_details->demux_count); 4693 4694 if (dlil_verbose) { 4695 printf("%s: attached v2 protocol %d\n", if_name(ifp), 4696 protocol); 4697 } 4698 4699end: 4700 if (retval != 0 && retval != EEXIST && ifp != NULL) { 4701 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n", 4702 if_name(ifp), protocol, retval); 4703 } 4704 ifnet_head_done(); 4705 if (retval != 0 && ifproto != NULL) 4706 zfree(dlif_proto_zone, ifproto); 4707 return (retval); 4708} 4709 4710errno_t 4711ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) 4712{ 4713 struct if_proto *proto = NULL; 4714 int retval = 0; 4715 4716 if (ifp == NULL || proto_family == 0) { 4717 retval = EINVAL; 4718 goto end; 4719 } 4720 4721 ifnet_lock_exclusive(ifp); 4722 /* callee holds a proto refcnt upon success */ 4723 proto = find_attached_proto(ifp, proto_family); 4724 if (proto == NULL) { 4725 retval = ENXIO; 4726 ifnet_lock_done(ifp); 4727 goto end; 4728 } 4729 4730 /* call family module del_proto */ 4731 if (ifp->if_del_proto) 4732 ifp->if_del_proto(ifp, proto->protocol_family); 4733 4734 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], 4735 proto, if_proto, next_hash); 4736 4737 if (proto->proto_kpi == kProtoKPI_v1) { 4738 proto->kpi.v1.input = ifproto_media_input_v1; 4739 proto->kpi.v1.pre_output= ifproto_media_preout; 4740 proto->kpi.v1.event = ifproto_media_event; 4741 proto->kpi.v1.ioctl = ifproto_media_ioctl; 4742 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi; 4743 proto->kpi.v1.send_arp = ifproto_media_send_arp; 4744 } else { 4745 proto->kpi.v2.input = ifproto_media_input_v2; 4746 proto->kpi.v2.pre_output = ifproto_media_preout; 4747 proto->kpi.v2.event = ifproto_media_event; 4748 proto->kpi.v2.ioctl = ifproto_media_ioctl; 4749 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi; 4750 proto->kpi.v2.send_arp = ifproto_media_send_arp; 4751 } 4752 proto->detached = 1; 4753 ifnet_lock_done(ifp); 4754 4755 if (dlil_verbose) { 4756 printf("%s: detached %s protocol %d\n", if_name(ifp), 4757 (proto->proto_kpi == kProtoKPI_v1) ? 4758 "v1" : "v2", proto_family); 4759 } 4760 4761 /* release proto refcnt held during protocol attach */ 4762 if_proto_free(proto); 4763 4764 /* 4765 * Release proto refcnt held during lookup; the rest of 4766 * protocol detach steps will happen when the last proto 4767 * reference is released. 4768 */ 4769 if_proto_free(proto); 4770 4771end: 4772 return (retval); 4773} 4774 4775 4776static errno_t 4777ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol, 4778 struct mbuf *packet, char *header) 4779{ 4780#pragma unused(ifp, protocol, packet, header) 4781 return (ENXIO); 4782} 4783 4784static errno_t 4785ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol, 4786 struct mbuf *packet) 4787{ 4788#pragma unused(ifp, protocol, packet) 4789 return (ENXIO); 4790 4791} 4792 4793static errno_t 4794ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol, 4795 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type, 4796 char *link_layer_dest) 4797{ 4798#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest) 4799 return (ENXIO); 4800 4801} 4802 4803static void 4804ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol, 4805 const struct kev_msg *event) 4806{ 4807#pragma unused(ifp, protocol, event) 4808} 4809 4810static errno_t 4811ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol, 4812 unsigned long command, void *argument) 4813{ 4814#pragma unused(ifp, protocol, command, argument) 4815 return (ENXIO); 4816} 4817 4818static errno_t 4819ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr, 4820 struct sockaddr_dl *out_ll, size_t ll_len) 4821{ 4822#pragma unused(ifp, proto_addr, out_ll, ll_len) 4823 return (ENXIO); 4824} 4825 4826static errno_t 4827ifproto_media_send_arp(struct ifnet *ifp, u_short arpop, 4828 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto, 4829 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto) 4830{ 4831#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto) 4832 return (ENXIO); 4833} 4834 4835extern int if_next_index(void); 4836 4837errno_t 4838ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) 4839{ 4840 struct ifnet *tmp_if; 4841 struct ifaddr *ifa; 4842 struct if_data_internal if_data_saved; 4843 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 4844 struct dlil_threading_info *dl_inp; 4845 u_int32_t sflags = 0; 4846 int err; 4847 4848 if (ifp == NULL) 4849 return (EINVAL); 4850 4851 /* 4852 * Serialize ifnet attach using dlil_ifnet_lock, in order to 4853 * prevent the interface from being configured while it is 4854 * embryonic, as ifnet_head_lock is dropped and reacquired 4855 * below prior to marking the ifnet with IFRF_ATTACHED. 4856 */ 4857 dlil_if_lock(); 4858 ifnet_head_lock_exclusive(); 4859 /* Verify we aren't already on the list */ 4860 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) { 4861 if (tmp_if == ifp) { 4862 ifnet_head_done(); 4863 dlil_if_unlock(); 4864 return (EEXIST); 4865 } 4866 } 4867 4868 lck_mtx_lock_spin(&ifp->if_ref_lock); 4869 if (ifp->if_refflags & IFRF_ATTACHED) { 4870 panic_plain("%s: flags mismatch (attached set) ifp=%p", 4871 __func__, ifp); 4872 /* NOTREACHED */ 4873 } 4874 lck_mtx_unlock(&ifp->if_ref_lock); 4875 4876 ifnet_lock_exclusive(ifp); 4877 4878 /* Sanity check */ 4879 VERIFY(ifp->if_detaching_link.tqe_next == NULL); 4880 VERIFY(ifp->if_detaching_link.tqe_prev == NULL); 4881 4882 if (ll_addr != NULL) { 4883 if (ifp->if_addrlen == 0) { 4884 ifp->if_addrlen = ll_addr->sdl_alen; 4885 } else if (ll_addr->sdl_alen != ifp->if_addrlen) { 4886 ifnet_lock_done(ifp); 4887 ifnet_head_done(); 4888 dlil_if_unlock(); 4889 return (EINVAL); 4890 } 4891 } 4892 4893 /* 4894 * Allow interfaces without protocol families to attach 4895 * only if they have the necessary fields filled out. 4896 */ 4897 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) { 4898 DLIL_PRINTF("%s: Attempt to attach interface without " 4899 "family module - %d\n", __func__, ifp->if_family); 4900 ifnet_lock_done(ifp); 4901 ifnet_head_done(); 4902 dlil_if_unlock(); 4903 return (ENODEV); 4904 } 4905 4906 /* Allocate protocol hash table */ 4907 VERIFY(ifp->if_proto_hash == NULL); 4908 ifp->if_proto_hash = zalloc(dlif_phash_zone); 4909 if (ifp->if_proto_hash == NULL) { 4910 ifnet_lock_done(ifp); 4911 ifnet_head_done(); 4912 dlil_if_unlock(); 4913 return (ENOBUFS); 4914 } 4915 bzero(ifp->if_proto_hash, dlif_phash_size); 4916 4917 lck_mtx_lock_spin(&ifp->if_flt_lock); 4918 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head)); 4919 TAILQ_INIT(&ifp->if_flt_head); 4920 VERIFY(ifp->if_flt_busy == 0); 4921 VERIFY(ifp->if_flt_waiters == 0); 4922 lck_mtx_unlock(&ifp->if_flt_lock); 4923 4924 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); 4925 TAILQ_INIT(&ifp->if_prefixhead); 4926 4927 if (!(dl_if->dl_if_flags & DLIF_REUSE)) { 4928 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs)); 4929 LIST_INIT(&ifp->if_multiaddrs); 4930 } 4931 4932 VERIFY(ifp->if_allhostsinm == NULL); 4933 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead)); 4934 TAILQ_INIT(&ifp->if_addrhead); 4935 4936 if (ifp->if_index == 0) { 4937 int idx = if_next_index(); 4938 4939 if (idx == -1) { 4940 ifp->if_index = 0; 4941 ifnet_lock_done(ifp); 4942 ifnet_head_done(); 4943 dlil_if_unlock(); 4944 return (ENOBUFS); 4945 } 4946 ifp->if_index = idx; 4947 } 4948 /* There should not be anything occupying this slot */ 4949 VERIFY(ifindex2ifnet[ifp->if_index] == NULL); 4950 4951 /* allocate (if needed) and initialize a link address */ 4952 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL); 4953 ifa = dlil_alloc_lladdr(ifp, ll_addr); 4954 if (ifa == NULL) { 4955 ifnet_lock_done(ifp); 4956 ifnet_head_done(); 4957 dlil_if_unlock(); 4958 return (ENOBUFS); 4959 } 4960 4961 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL); 4962 ifnet_addrs[ifp->if_index - 1] = ifa; 4963 4964 /* make this address the first on the list */ 4965 IFA_LOCK(ifa); 4966 /* hold a reference for ifnet_addrs[] */ 4967 IFA_ADDREF_LOCKED(ifa); 4968 /* if_attach_link_ifa() holds a reference for ifa_link */ 4969 if_attach_link_ifa(ifp, ifa); 4970 IFA_UNLOCK(ifa); 4971 4972#if CONFIG_MACF_NET 4973 mac_ifnet_label_associate(ifp); 4974#endif 4975 4976 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link); 4977 ifindex2ifnet[ifp->if_index] = ifp; 4978 4979 /* Hold a reference to the underlying dlil_ifnet */ 4980 ifnet_reference(ifp); 4981 4982 /* Clear stats (save and restore other fields that we care) */ 4983 if_data_saved = ifp->if_data; 4984 bzero(&ifp->if_data, sizeof (ifp->if_data)); 4985 ifp->if_data.ifi_type = if_data_saved.ifi_type; 4986 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen; 4987 ifp->if_data.ifi_physical = if_data_saved.ifi_physical; 4988 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen; 4989 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen; 4990 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu; 4991 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate; 4992 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist; 4993 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu; 4994 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu; 4995 ifnet_touch_lastchange(ifp); 4996 4997 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL || 4998 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED); 4999 5000 /* By default, use SFB and enable flow advisory */ 5001 sflags = PKTSCHEDF_QALG_SFB; 5002 if (if_flowadv) 5003 sflags |= PKTSCHEDF_QALG_FLOWCTL; 5004 5005 if (if_delaybased_queue) 5006 sflags |= PKTSCHEDF_QALG_DELAYBASED; 5007 5008 /* Initialize transmit queue(s) */ 5009 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE)); 5010 if (err != 0) { 5011 panic_plain("%s: ifp=%p couldn't initialize transmit queue; " 5012 "err=%d", __func__, ifp, err); 5013 /* NOTREACHED */ 5014 } 5015 5016 /* Sanity checks on the input thread storage */ 5017 dl_inp = &dl_if->dl_if_inpstorage; 5018 bzero(&dl_inp->stats, sizeof (dl_inp->stats)); 5019 VERIFY(dl_inp->input_waiting == 0); 5020 VERIFY(dl_inp->wtot == 0); 5021 VERIFY(dl_inp->ifp == NULL); 5022 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts)); 5023 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0); 5024 VERIFY(!dl_inp->net_affinity); 5025 VERIFY(ifp->if_inp == NULL); 5026 VERIFY(dl_inp->input_thr == THREAD_NULL); 5027 VERIFY(dl_inp->wloop_thr == THREAD_NULL); 5028 VERIFY(dl_inp->poll_thr == THREAD_NULL); 5029 VERIFY(dl_inp->tag == 0); 5030 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 5031 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats)); 5032 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats)); 5033 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats)); 5034#if IFNET_INPUT_SANITY_CHK 5035 VERIFY(dl_inp->input_mbuf_cnt == 0); 5036#endif /* IFNET_INPUT_SANITY_CHK */ 5037 5038 /* 5039 * A specific DLIL input thread is created per Ethernet/cellular 5040 * interface or for an interface which supports opportunistic 5041 * input polling. Pseudo interfaces or other types of interfaces 5042 * use the main input thread instead. 5043 */ 5044 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) || 5045 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) { 5046 ifp->if_inp = dl_inp; 5047 err = dlil_create_input_thread(ifp, ifp->if_inp); 5048 if (err != 0) { 5049 panic_plain("%s: ifp=%p couldn't get an input thread; " 5050 "err=%d", __func__, ifp, err); 5051 /* NOTREACHED */ 5052 } 5053 } 5054 5055 /* 5056 * If the driver supports the new transmit model, calculate flow hash 5057 * and create a workloop starter thread to invoke the if_start callback 5058 * where the packets may be dequeued and transmitted. 5059 */ 5060 if (ifp->if_eflags & IFEF_TXSTART) { 5061 ifp->if_flowhash = ifnet_calc_flowhash(ifp); 5062 VERIFY(ifp->if_flowhash != 0); 5063 5064 VERIFY(ifp->if_start != NULL); 5065 VERIFY(ifp->if_start_thread == THREAD_NULL); 5066 5067 ifnet_set_start_cycle(ifp, NULL); 5068 ifp->if_start_active = 0; 5069 ifp->if_start_req = 0; 5070 ifp->if_start_flags = 0; 5071 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp, 5072 &ifp->if_start_thread)) != KERN_SUCCESS) { 5073 panic_plain("%s: ifp=%p couldn't get a start thread; " 5074 "err=%d", __func__, ifp, err); 5075 /* NOTREACHED */ 5076 } 5077 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP, 5078 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); 5079 } else { 5080 ifp->if_flowhash = 0; 5081 } 5082 5083 /* 5084 * If the driver supports the new receive model, create a poller 5085 * thread to invoke if_input_poll callback where the packets may 5086 * be dequeued from the driver and processed for reception. 5087 */ 5088 if (ifp->if_eflags & IFEF_RXPOLL) { 5089 VERIFY(ifp->if_input_poll != NULL); 5090 VERIFY(ifp->if_input_ctl != NULL); 5091 VERIFY(ifp->if_poll_thread == THREAD_NULL); 5092 5093 ifnet_set_poll_cycle(ifp, NULL); 5094 ifp->if_poll_update = 0; 5095 ifp->if_poll_active = 0; 5096 ifp->if_poll_req = 0; 5097 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp, 5098 &ifp->if_poll_thread)) != KERN_SUCCESS) { 5099 panic_plain("%s: ifp=%p couldn't get a poll thread; " 5100 "err=%d", __func__, ifp, err); 5101 /* NOTREACHED */ 5102 } 5103 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP, 5104 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); 5105 } 5106 5107 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE); 5108 VERIFY(ifp->if_desc.ifd_len == 0); 5109 VERIFY(ifp->if_desc.ifd_desc != NULL); 5110 5111 /* Record attach PC stacktrace */ 5112 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach); 5113 5114 ifp->if_updatemcasts = 0; 5115 if (!LIST_EMPTY(&ifp->if_multiaddrs)) { 5116 struct ifmultiaddr *ifma; 5117 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 5118 IFMA_LOCK(ifma); 5119 if (ifma->ifma_addr->sa_family == AF_LINK || 5120 ifma->ifma_addr->sa_family == AF_UNSPEC) 5121 ifp->if_updatemcasts++; 5122 IFMA_UNLOCK(ifma); 5123 } 5124 5125 printf("%s: attached with %d suspended link-layer multicast " 5126 "membership(s)\n", if_name(ifp), 5127 ifp->if_updatemcasts); 5128 } 5129 5130 /* Clear logging parameters */ 5131 bzero(&ifp->if_log, sizeof (ifp->if_log)); 5132 ifp->if_fg_sendts = 0; 5133 5134 VERIFY(ifp->if_delegated.ifp == NULL); 5135 VERIFY(ifp->if_delegated.type == 0); 5136 VERIFY(ifp->if_delegated.family == 0); 5137 VERIFY(ifp->if_delegated.subfamily == 0); 5138 VERIFY(ifp->if_delegated.expensive == 0); 5139 5140 ifnet_lock_done(ifp); 5141 ifnet_head_done(); 5142 5143 lck_mtx_lock(&ifp->if_cached_route_lock); 5144 /* Enable forwarding cached route */ 5145 ifp->if_fwd_cacheok = 1; 5146 /* Clean up any existing cached routes */ 5147 ROUTE_RELEASE(&ifp->if_fwd_route); 5148 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); 5149 ROUTE_RELEASE(&ifp->if_src_route); 5150 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); 5151 ROUTE_RELEASE(&ifp->if_src_route6); 5152 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); 5153 lck_mtx_unlock(&ifp->if_cached_route_lock); 5154 5155 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE)); 5156 5157 /* 5158 * Allocate and attach IGMPv3/MLDv2 interface specific variables 5159 * and trees; do this before the ifnet is marked as attached. 5160 * The ifnet keeps the reference to the info structures even after 5161 * the ifnet is detached, since the network-layer records still 5162 * refer to the info structures even after that. This also 5163 * makes it possible for them to still function after the ifnet 5164 * is recycled or reattached. 5165 */ 5166#if INET 5167 if (IGMP_IFINFO(ifp) == NULL) { 5168 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK); 5169 VERIFY(IGMP_IFINFO(ifp) != NULL); 5170 } else { 5171 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp); 5172 igmp_domifreattach(IGMP_IFINFO(ifp)); 5173 } 5174#endif /* INET */ 5175#if INET6 5176 if (MLD_IFINFO(ifp) == NULL) { 5177 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK); 5178 VERIFY(MLD_IFINFO(ifp) != NULL); 5179 } else { 5180 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp); 5181 mld_domifreattach(MLD_IFINFO(ifp)); 5182 } 5183#endif /* INET6 */ 5184 5185 VERIFY(ifp->if_data_threshold == 0); 5186 5187 /* 5188 * Finally, mark this ifnet as attached. 5189 */ 5190 lck_mtx_lock(rnh_lock); 5191 ifnet_lock_exclusive(ifp); 5192 /* Initialize Link Quality Metric (loopback [lo0] is always good) */ 5193 ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD : 5194 IFNET_LQM_THRESH_UNKNOWN; 5195 lck_mtx_lock_spin(&ifp->if_ref_lock); 5196 ifp->if_refflags = IFRF_ATTACHED; 5197 lck_mtx_unlock(&ifp->if_ref_lock); 5198 if (net_rtref) { 5199 /* boot-args override; enable idle notification */ 5200 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY, 5201 IFRF_IDLE_NOTIFY); 5202 } else { 5203 /* apply previous request(s) to set the idle flags, if any */ 5204 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags, 5205 ifp->if_idle_new_flags_mask); 5206 5207 } 5208 ifnet_lock_done(ifp); 5209 lck_mtx_unlock(rnh_lock); 5210 dlil_if_unlock(); 5211 5212#if PF 5213 /* 5214 * Attach packet filter to this interface, if enabled. 5215 */ 5216 pf_ifnet_hook(ifp, 1); 5217#endif /* PF */ 5218 5219 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0); 5220 5221 if (dlil_verbose) { 5222 printf("%s: attached%s\n", if_name(ifp), 5223 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : ""); 5224 } 5225 5226 return (0); 5227} 5228 5229/* 5230 * Prepare the storage for the first/permanent link address, which must 5231 * must have the same lifetime as the ifnet itself. Although the link 5232 * address gets removed from if_addrhead and ifnet_addrs[] at detach time, 5233 * its location in memory must never change as it may still be referred 5234 * to by some parts of the system afterwards (unfortunate implementation 5235 * artifacts inherited from BSD.) 5236 * 5237 * Caller must hold ifnet lock as writer. 5238 */ 5239static struct ifaddr * 5240dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) 5241{ 5242 struct ifaddr *ifa, *oifa; 5243 struct sockaddr_dl *asdl, *msdl; 5244 char workbuf[IFNAMSIZ*2]; 5245 int namelen, masklen, socksize; 5246 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 5247 5248 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); 5249 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen); 5250 5251 namelen = snprintf(workbuf, sizeof (workbuf), "%s", 5252 if_name(ifp)); 5253 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 5254 socksize = masklen + ifp->if_addrlen; 5255#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) 5256 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl)) 5257 socksize = sizeof(struct sockaddr_dl); 5258 socksize = ROUNDUP(socksize); 5259#undef ROUNDUP 5260 5261 ifa = ifp->if_lladdr; 5262 if (socksize > DLIL_SDLMAXLEN || 5263 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) { 5264 /* 5265 * Rare, but in the event that the link address requires 5266 * more storage space than DLIL_SDLMAXLEN, allocate the 5267 * largest possible storages for address and mask, such 5268 * that we can reuse the same space when if_addrlen grows. 5269 * This same space will be used when if_addrlen shrinks. 5270 */ 5271 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) { 5272 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN; 5273 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO); 5274 if (ifa == NULL) 5275 return (NULL); 5276 ifa_lock_init(ifa); 5277 /* Don't set IFD_ALLOC, as this is permanent */ 5278 ifa->ifa_debug = IFD_LINK; 5279 } 5280 IFA_LOCK(ifa); 5281 /* address and mask sockaddr_dl locations */ 5282 asdl = (struct sockaddr_dl *)(ifa + 1); 5283 bzero(asdl, SOCK_MAXADDRLEN); 5284 msdl = (struct sockaddr_dl *)(void *) 5285 ((char *)asdl + SOCK_MAXADDRLEN); 5286 bzero(msdl, SOCK_MAXADDRLEN); 5287 } else { 5288 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa); 5289 /* 5290 * Use the storage areas for address and mask within the 5291 * dlil_ifnet structure. This is the most common case. 5292 */ 5293 if (ifa == NULL) { 5294 ifa = &dl_if->dl_if_lladdr.ifa; 5295 ifa_lock_init(ifa); 5296 /* Don't set IFD_ALLOC, as this is permanent */ 5297 ifa->ifa_debug = IFD_LINK; 5298 } 5299 IFA_LOCK(ifa); 5300 /* address and mask sockaddr_dl locations */ 5301 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl; 5302 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl)); 5303 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl; 5304 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl)); 5305 } 5306 5307 /* hold a permanent reference for the ifnet itself */ 5308 IFA_ADDREF_LOCKED(ifa); 5309 oifa = ifp->if_lladdr; 5310 ifp->if_lladdr = ifa; 5311 5312 VERIFY(ifa->ifa_debug == IFD_LINK); 5313 ifa->ifa_ifp = ifp; 5314 ifa->ifa_rtrequest = link_rtrequest; 5315 ifa->ifa_addr = (struct sockaddr *)asdl; 5316 asdl->sdl_len = socksize; 5317 asdl->sdl_family = AF_LINK; 5318 bcopy(workbuf, asdl->sdl_data, namelen); 5319 asdl->sdl_nlen = namelen; 5320 asdl->sdl_index = ifp->if_index; 5321 asdl->sdl_type = ifp->if_type; 5322 if (ll_addr != NULL) { 5323 asdl->sdl_alen = ll_addr->sdl_alen; 5324 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen); 5325 } else { 5326 asdl->sdl_alen = 0; 5327 } 5328 ifa->ifa_netmask = (struct sockaddr*)msdl; 5329 msdl->sdl_len = masklen; 5330 while (namelen != 0) 5331 msdl->sdl_data[--namelen] = 0xff; 5332 IFA_UNLOCK(ifa); 5333 5334 if (oifa != NULL) 5335 IFA_REMREF(oifa); 5336 5337 return (ifa); 5338} 5339 5340static void 5341if_purgeaddrs(struct ifnet *ifp) 5342{ 5343#if INET 5344 in_purgeaddrs(ifp); 5345#endif /* INET */ 5346#if INET6 5347 in6_purgeaddrs(ifp); 5348#endif /* INET6 */ 5349} 5350 5351errno_t 5352ifnet_detach(ifnet_t ifp) 5353{ 5354 struct ifnet *delegated_ifp; 5355 5356 if (ifp == NULL) 5357 return (EINVAL); 5358 5359 lck_mtx_lock(rnh_lock); 5360 ifnet_head_lock_exclusive(); 5361 ifnet_lock_exclusive(ifp); 5362 5363 /* 5364 * Check to see if this interface has previously triggered 5365 * aggressive protocol draining; if so, decrement the global 5366 * refcnt and clear PR_AGGDRAIN on the route domain if 5367 * there are no more of such an interface around. 5368 */ 5369 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0); 5370 5371 lck_mtx_lock_spin(&ifp->if_ref_lock); 5372 if (!(ifp->if_refflags & IFRF_ATTACHED)) { 5373 lck_mtx_unlock(&ifp->if_ref_lock); 5374 ifnet_lock_done(ifp); 5375 ifnet_head_done(); 5376 lck_mtx_unlock(rnh_lock); 5377 return (EINVAL); 5378 } else if (ifp->if_refflags & IFRF_DETACHING) { 5379 /* Interface has already been detached */ 5380 lck_mtx_unlock(&ifp->if_ref_lock); 5381 ifnet_lock_done(ifp); 5382 ifnet_head_done(); 5383 lck_mtx_unlock(rnh_lock); 5384 return (ENXIO); 5385 } 5386 /* Indicate this interface is being detached */ 5387 ifp->if_refflags &= ~IFRF_ATTACHED; 5388 ifp->if_refflags |= IFRF_DETACHING; 5389 lck_mtx_unlock(&ifp->if_ref_lock); 5390 5391 if (dlil_verbose) 5392 printf("%s: detaching\n", if_name(ifp)); 5393 5394 /* 5395 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will 5396 * no longer be visible during lookups from this point. 5397 */ 5398 VERIFY(ifindex2ifnet[ifp->if_index] == ifp); 5399 TAILQ_REMOVE(&ifnet_head, ifp, if_link); 5400 ifp->if_link.tqe_next = NULL; 5401 ifp->if_link.tqe_prev = NULL; 5402 ifindex2ifnet[ifp->if_index] = NULL; 5403 5404 /* Record detach PC stacktrace */ 5405 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); 5406 5407 /* Clear logging parameters */ 5408 bzero(&ifp->if_log, sizeof (ifp->if_log)); 5409 5410 /* Clear delegated interface info (reference released below) */ 5411 delegated_ifp = ifp->if_delegated.ifp; 5412 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); 5413 5414 ifnet_lock_done(ifp); 5415 ifnet_head_done(); 5416 lck_mtx_unlock(rnh_lock); 5417 5418 /* Release reference held on the delegated interface */ 5419 if (delegated_ifp != NULL) 5420 ifnet_release(delegated_ifp); 5421 5422 /* Reset Link Quality Metric (unless loopback [lo0]) */ 5423 if (ifp != lo_ifp) 5424 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF); 5425 5426 /* Reset TCP local statistics */ 5427 if (ifp->if_tcp_stat != NULL) 5428 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat)); 5429 5430 /* Reset UDP local statistics */ 5431 if (ifp->if_udp_stat != NULL) 5432 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat)); 5433 5434 /* Let BPF know we're detaching */ 5435 bpfdetach(ifp); 5436 5437 /* Mark the interface as DOWN */ 5438 if_down(ifp); 5439 5440 /* Disable forwarding cached route */ 5441 lck_mtx_lock(&ifp->if_cached_route_lock); 5442 ifp->if_fwd_cacheok = 0; 5443 lck_mtx_unlock(&ifp->if_cached_route_lock); 5444 5445 ifp->if_data_threshold = 0; 5446 /* 5447 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the 5448 * references to the info structures and leave them attached to 5449 * this ifnet. 5450 */ 5451#if INET 5452 igmp_domifdetach(ifp); 5453#endif /* INET */ 5454#if INET6 5455 mld_domifdetach(ifp); 5456#endif /* INET6 */ 5457 5458 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0); 5459 5460 /* Let worker thread take care of the rest, to avoid reentrancy */ 5461 dlil_if_lock(); 5462 ifnet_detaching_enqueue(ifp); 5463 dlil_if_unlock(); 5464 5465 return (0); 5466} 5467 5468static void 5469ifnet_detaching_enqueue(struct ifnet *ifp) 5470{ 5471 dlil_if_lock_assert(); 5472 5473 ++ifnet_detaching_cnt; 5474 VERIFY(ifnet_detaching_cnt != 0); 5475 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link); 5476 wakeup((caddr_t)&ifnet_delayed_run); 5477} 5478 5479static struct ifnet * 5480ifnet_detaching_dequeue(void) 5481{ 5482 struct ifnet *ifp; 5483 5484 dlil_if_lock_assert(); 5485 5486 ifp = TAILQ_FIRST(&ifnet_detaching_head); 5487 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL); 5488 if (ifp != NULL) { 5489 VERIFY(ifnet_detaching_cnt != 0); 5490 --ifnet_detaching_cnt; 5491 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link); 5492 ifp->if_detaching_link.tqe_next = NULL; 5493 ifp->if_detaching_link.tqe_prev = NULL; 5494 } 5495 return (ifp); 5496} 5497 5498static int 5499ifnet_detacher_thread_cont(int err) 5500{ 5501#pragma unused(err) 5502 struct ifnet *ifp; 5503 5504 for (;;) { 5505 dlil_if_lock_assert(); 5506 while (ifnet_detaching_cnt == 0) { 5507 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock, 5508 (PZERO - 1), "ifnet_detacher_cont", 0, 5509 ifnet_detacher_thread_cont); 5510 /* NOTREACHED */ 5511 } 5512 5513 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL); 5514 5515 /* Take care of detaching ifnet */ 5516 ifp = ifnet_detaching_dequeue(); 5517 if (ifp != NULL) { 5518 dlil_if_unlock(); 5519 ifnet_detach_final(ifp); 5520 dlil_if_lock(); 5521 } 5522 } 5523 /* NOTREACHED */ 5524 return (0); 5525} 5526 5527static void 5528ifnet_detacher_thread_func(void *v, wait_result_t w) 5529{ 5530#pragma unused(v, w) 5531 dlil_if_lock(); 5532 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock, 5533 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont); 5534 /* 5535 * msleep0() shouldn't have returned as PCATCH was not set; 5536 * therefore assert in this case. 5537 */ 5538 dlil_if_unlock(); 5539 VERIFY(0); 5540} 5541 5542static void 5543ifnet_detach_final(struct ifnet *ifp) 5544{ 5545 struct ifnet_filter *filter, *filter_next; 5546 struct ifnet_filter_head fhead; 5547 struct dlil_threading_info *inp; 5548 struct ifaddr *ifa; 5549 ifnet_detached_func if_free; 5550 int i; 5551 5552 lck_mtx_lock(&ifp->if_ref_lock); 5553 if (!(ifp->if_refflags & IFRF_DETACHING)) { 5554 panic("%s: flags mismatch (detaching not set) ifp=%p", 5555 __func__, ifp); 5556 /* NOTREACHED */ 5557 } 5558 5559 /* 5560 * Wait until the existing IO references get released 5561 * before we proceed with ifnet_detach. This is not a 5562 * common case, so block without using a continuation. 5563 */ 5564 while (ifp->if_refio > 0) { 5565 printf("%s: Waiting for IO references on %s interface " 5566 "to be released\n", __func__, if_name(ifp)); 5567 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock, 5568 (PZERO - 1), "ifnet_ioref_wait", NULL); 5569 } 5570 lck_mtx_unlock(&ifp->if_ref_lock); 5571 5572 /* Drain and destroy send queue */ 5573 ifclassq_teardown(ifp); 5574 5575 /* Detach interface filters */ 5576 lck_mtx_lock(&ifp->if_flt_lock); 5577 if_flt_monitor_enter(ifp); 5578 5579 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 5580 fhead = ifp->if_flt_head; 5581 TAILQ_INIT(&ifp->if_flt_head); 5582 5583 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) { 5584 filter_next = TAILQ_NEXT(filter, filt_next); 5585 lck_mtx_unlock(&ifp->if_flt_lock); 5586 5587 dlil_detach_filter_internal(filter, 1); 5588 lck_mtx_lock(&ifp->if_flt_lock); 5589 } 5590 if_flt_monitor_leave(ifp); 5591 lck_mtx_unlock(&ifp->if_flt_lock); 5592 5593 /* Tell upper layers to drop their network addresses */ 5594 if_purgeaddrs(ifp); 5595 5596 ifnet_lock_exclusive(ifp); 5597 5598 /* Uplumb all protocols */ 5599 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 5600 struct if_proto *proto; 5601 5602 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 5603 while (proto != NULL) { 5604 protocol_family_t family = proto->protocol_family; 5605 ifnet_lock_done(ifp); 5606 proto_unplumb(family, ifp); 5607 ifnet_lock_exclusive(ifp); 5608 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 5609 } 5610 /* There should not be any protocols left */ 5611 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i])); 5612 } 5613 zfree(dlif_phash_zone, ifp->if_proto_hash); 5614 ifp->if_proto_hash = NULL; 5615 5616 /* Detach (permanent) link address from if_addrhead */ 5617 ifa = TAILQ_FIRST(&ifp->if_addrhead); 5618 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa); 5619 IFA_LOCK(ifa); 5620 if_detach_link_ifa(ifp, ifa); 5621 IFA_UNLOCK(ifa); 5622 5623 /* Remove (permanent) link address from ifnet_addrs[] */ 5624 IFA_REMREF(ifa); 5625 ifnet_addrs[ifp->if_index - 1] = NULL; 5626 5627 /* This interface should not be on {ifnet_head,detaching} */ 5628 VERIFY(ifp->if_link.tqe_next == NULL); 5629 VERIFY(ifp->if_link.tqe_prev == NULL); 5630 VERIFY(ifp->if_detaching_link.tqe_next == NULL); 5631 VERIFY(ifp->if_detaching_link.tqe_prev == NULL); 5632 5633 /* Prefix list should be empty by now */ 5634 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); 5635 5636 /* The slot should have been emptied */ 5637 VERIFY(ifindex2ifnet[ifp->if_index] == NULL); 5638 5639 /* There should not be any addresses left */ 5640 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead)); 5641 5642 /* 5643 * Signal the starter thread to terminate itself. 5644 */ 5645 if (ifp->if_start_thread != THREAD_NULL) { 5646 lck_mtx_lock_spin(&ifp->if_start_lock); 5647 ifp->if_start_flags = 0; 5648 ifp->if_start_thread = THREAD_NULL; 5649 wakeup_one((caddr_t)&ifp->if_start_thread); 5650 lck_mtx_unlock(&ifp->if_start_lock); 5651 } 5652 5653 /* 5654 * Signal the poller thread to terminate itself. 5655 */ 5656 if (ifp->if_poll_thread != THREAD_NULL) { 5657 lck_mtx_lock_spin(&ifp->if_poll_lock); 5658 ifp->if_poll_thread = THREAD_NULL; 5659 wakeup_one((caddr_t)&ifp->if_poll_thread); 5660 lck_mtx_unlock(&ifp->if_poll_lock); 5661 } 5662 5663 /* 5664 * If thread affinity was set for the workloop thread, we will need 5665 * to tear down the affinity and release the extra reference count 5666 * taken at attach time. Does not apply to lo0 or other interfaces 5667 * without dedicated input threads. 5668 */ 5669 if ((inp = ifp->if_inp) != NULL) { 5670 VERIFY(inp != dlil_main_input_thread); 5671 5672 if (inp->net_affinity) { 5673 struct thread *tp, *wtp, *ptp; 5674 5675 lck_mtx_lock_spin(&inp->input_lck); 5676 wtp = inp->wloop_thr; 5677 inp->wloop_thr = THREAD_NULL; 5678 ptp = inp->poll_thr; 5679 inp->poll_thr = THREAD_NULL; 5680 tp = inp->input_thr; /* don't nullify now */ 5681 inp->tag = 0; 5682 inp->net_affinity = FALSE; 5683 lck_mtx_unlock(&inp->input_lck); 5684 5685 /* Tear down poll thread affinity */ 5686 if (ptp != NULL) { 5687 VERIFY(ifp->if_eflags & IFEF_RXPOLL); 5688 (void) dlil_affinity_set(ptp, 5689 THREAD_AFFINITY_TAG_NULL); 5690 thread_deallocate(ptp); 5691 } 5692 5693 /* Tear down workloop thread affinity */ 5694 if (wtp != NULL) { 5695 (void) dlil_affinity_set(wtp, 5696 THREAD_AFFINITY_TAG_NULL); 5697 thread_deallocate(wtp); 5698 } 5699 5700 /* Tear down DLIL input thread affinity */ 5701 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL); 5702 thread_deallocate(tp); 5703 } 5704 5705 /* disassociate ifp DLIL input thread */ 5706 ifp->if_inp = NULL; 5707 5708 lck_mtx_lock_spin(&inp->input_lck); 5709 inp->input_waiting |= DLIL_INPUT_TERMINATE; 5710 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { 5711 wakeup_one((caddr_t)&inp->input_waiting); 5712 } 5713 lck_mtx_unlock(&inp->input_lck); 5714 } 5715 5716 /* The driver might unload, so point these to ourselves */ 5717 if_free = ifp->if_free; 5718 ifp->if_output = ifp_if_output; 5719 ifp->if_pre_enqueue = ifp_if_output; 5720 ifp->if_start = ifp_if_start; 5721 ifp->if_output_ctl = ifp_if_ctl; 5722 ifp->if_input_poll = ifp_if_input_poll; 5723 ifp->if_input_ctl = ifp_if_ctl; 5724 ifp->if_ioctl = ifp_if_ioctl; 5725 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap; 5726 ifp->if_free = ifp_if_free; 5727 ifp->if_demux = ifp_if_demux; 5728 ifp->if_event = ifp_if_event; 5729 ifp->if_framer_legacy = ifp_if_framer; 5730 ifp->if_framer = ifp_if_framer_extended; 5731 ifp->if_add_proto = ifp_if_add_proto; 5732 ifp->if_del_proto = ifp_if_del_proto; 5733 ifp->if_check_multi = ifp_if_check_multi; 5734 5735 /* wipe out interface description */ 5736 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE); 5737 ifp->if_desc.ifd_len = 0; 5738 VERIFY(ifp->if_desc.ifd_desc != NULL); 5739 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE); 5740 5741 /* there shouldn't be any delegation by now */ 5742 VERIFY(ifp->if_delegated.ifp == NULL); 5743 VERIFY(ifp->if_delegated.type == 0); 5744 VERIFY(ifp->if_delegated.family == 0); 5745 VERIFY(ifp->if_delegated.subfamily == 0); 5746 VERIFY(ifp->if_delegated.expensive == 0); 5747 5748 ifnet_lock_done(ifp); 5749 5750#if PF 5751 /* 5752 * Detach this interface from packet filter, if enabled. 5753 */ 5754 pf_ifnet_hook(ifp, 0); 5755#endif /* PF */ 5756 5757 /* Filter list should be empty */ 5758 lck_mtx_lock_spin(&ifp->if_flt_lock); 5759 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head)); 5760 VERIFY(ifp->if_flt_busy == 0); 5761 VERIFY(ifp->if_flt_waiters == 0); 5762 lck_mtx_unlock(&ifp->if_flt_lock); 5763 5764 /* Last chance to drain send queue */ 5765 if_qflush(ifp, 0); 5766 5767 /* Last chance to cleanup any cached route */ 5768 lck_mtx_lock(&ifp->if_cached_route_lock); 5769 VERIFY(!ifp->if_fwd_cacheok); 5770 ROUTE_RELEASE(&ifp->if_fwd_route); 5771 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); 5772 ROUTE_RELEASE(&ifp->if_src_route); 5773 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); 5774 ROUTE_RELEASE(&ifp->if_src_route6); 5775 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); 5776 lck_mtx_unlock(&ifp->if_cached_route_lock); 5777 5778 VERIFY(ifp->if_data_threshold == 0); 5779 5780 ifnet_llreach_ifdetach(ifp); 5781 5782 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0); 5783 5784 if (if_free != NULL) 5785 if_free(ifp); 5786 5787 /* 5788 * Finally, mark this ifnet as detached. 5789 */ 5790 lck_mtx_lock_spin(&ifp->if_ref_lock); 5791 if (!(ifp->if_refflags & IFRF_DETACHING)) { 5792 panic("%s: flags mismatch (detaching not set) ifp=%p", 5793 __func__, ifp); 5794 /* NOTREACHED */ 5795 } 5796 ifp->if_refflags &= ~IFRF_DETACHING; 5797 lck_mtx_unlock(&ifp->if_ref_lock); 5798 5799 if (dlil_verbose) 5800 printf("%s: detached\n", if_name(ifp)); 5801 5802 /* Release reference held during ifnet attach */ 5803 ifnet_release(ifp); 5804} 5805 5806static errno_t 5807ifp_if_output(struct ifnet *ifp, struct mbuf *m) 5808{ 5809#pragma unused(ifp) 5810 m_freem(m); 5811 return (0); 5812} 5813 5814static void 5815ifp_if_start(struct ifnet *ifp) 5816{ 5817 ifnet_purge(ifp); 5818} 5819 5820static void 5821ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt, 5822 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len) 5823{ 5824#pragma unused(ifp, flags, max_cnt) 5825 if (m_head != NULL) 5826 *m_head = NULL; 5827 if (m_tail != NULL) 5828 *m_tail = NULL; 5829 if (cnt != NULL) 5830 *cnt = 0; 5831 if (len != NULL) 5832 *len = 0; 5833} 5834 5835static errno_t 5836ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg) 5837{ 5838#pragma unused(ifp, cmd, arglen, arg) 5839 return (EOPNOTSUPP); 5840} 5841 5842static errno_t 5843ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf) 5844{ 5845#pragma unused(ifp, fh, pf) 5846 m_freem(m); 5847 return (EJUSTRETURN); 5848} 5849 5850static errno_t 5851ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf, 5852 const struct ifnet_demux_desc *da, u_int32_t dc) 5853{ 5854#pragma unused(ifp, pf, da, dc) 5855 return (EINVAL); 5856} 5857 5858static errno_t 5859ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf) 5860{ 5861#pragma unused(ifp, pf) 5862 return (EINVAL); 5863} 5864 5865static errno_t 5866ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa) 5867{ 5868#pragma unused(ifp, sa) 5869 return (EOPNOTSUPP); 5870} 5871 5872static errno_t 5873ifp_if_framer(struct ifnet *ifp, struct mbuf **m, 5874 const struct sockaddr *sa, const char *ll, const char *t) 5875{ 5876#pragma unused(ifp, m, sa, ll, t) 5877 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL)); 5878} 5879 5880static errno_t 5881ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m, 5882 const struct sockaddr *sa, const char *ll, const char *t, 5883 u_int32_t *pre, u_int32_t *post) 5884{ 5885#pragma unused(ifp, sa, ll, t) 5886 m_freem(*m); 5887 *m = NULL; 5888 5889 if (pre != NULL) 5890 *pre = 0; 5891 if (post != NULL) 5892 *post = 0; 5893 5894 return (EJUSTRETURN); 5895} 5896 5897errno_t 5898ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg) 5899{ 5900#pragma unused(ifp, cmd, arg) 5901 return (EOPNOTSUPP); 5902} 5903 5904static errno_t 5905ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f) 5906{ 5907#pragma unused(ifp, tm, f) 5908 /* XXX not sure what to do here */ 5909 return (0); 5910} 5911 5912static void 5913ifp_if_free(struct ifnet *ifp) 5914{ 5915#pragma unused(ifp) 5916} 5917 5918static void 5919ifp_if_event(struct ifnet *ifp, const struct kev_msg *e) 5920{ 5921#pragma unused(ifp, e) 5922} 5923 5924__private_extern__ 5925int dlil_if_acquire(u_int32_t family, const void *uniqueid, 5926 size_t uniqueid_len, struct ifnet **ifp) 5927{ 5928 struct ifnet *ifp1 = NULL; 5929 struct dlil_ifnet *dlifp1 = NULL; 5930 void *buf, *base, **pbuf; 5931 int ret = 0; 5932 5933 dlil_if_lock(); 5934 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) { 5935 ifp1 = (struct ifnet *)dlifp1; 5936 5937 if (ifp1->if_family != family) 5938 continue; 5939 5940 lck_mtx_lock(&dlifp1->dl_if_lock); 5941 /* same uniqueid and same len or no unique id specified */ 5942 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) && 5943 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) { 5944 /* check for matching interface in use */ 5945 if (dlifp1->dl_if_flags & DLIF_INUSE) { 5946 if (uniqueid_len) { 5947 ret = EBUSY; 5948 lck_mtx_unlock(&dlifp1->dl_if_lock); 5949 goto end; 5950 } 5951 } else { 5952 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE); 5953 lck_mtx_unlock(&dlifp1->dl_if_lock); 5954 *ifp = ifp1; 5955 goto end; 5956 } 5957 } 5958 lck_mtx_unlock(&dlifp1->dl_if_lock); 5959 } 5960 5961 /* no interface found, allocate a new one */ 5962 buf = zalloc(dlif_zone); 5963 if (buf == NULL) { 5964 ret = ENOMEM; 5965 goto end; 5966 } 5967 bzero(buf, dlif_bufsize); 5968 5969 /* Get the 64-bit aligned base address for this object */ 5970 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 5971 sizeof (u_int64_t)); 5972 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize)); 5973 5974 /* 5975 * Wind back a pointer size from the aligned base and 5976 * save the original address so we can free it later. 5977 */ 5978 pbuf = (void **)((intptr_t)base - sizeof (void *)); 5979 *pbuf = buf; 5980 dlifp1 = base; 5981 5982 if (uniqueid_len) { 5983 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len, 5984 M_NKE, M_WAITOK); 5985 if (dlifp1->dl_if_uniqueid == NULL) { 5986 zfree(dlif_zone, dlifp1); 5987 ret = ENOMEM; 5988 goto end; 5989 } 5990 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len); 5991 dlifp1->dl_if_uniqueid_len = uniqueid_len; 5992 } 5993 5994 ifp1 = (struct ifnet *)dlifp1; 5995 dlifp1->dl_if_flags = DLIF_INUSE; 5996 if (ifnet_debug) { 5997 dlifp1->dl_if_flags |= DLIF_DEBUG; 5998 dlifp1->dl_if_trace = dlil_if_trace; 5999 } 6000 ifp1->if_name = dlifp1->dl_if_namestorage; 6001 ifp1->if_xname = dlifp1->dl_if_xnamestorage; 6002 6003 /* initialize interface description */ 6004 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE; 6005 ifp1->if_desc.ifd_len = 0; 6006 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage; 6007 6008#if CONFIG_MACF_NET 6009 mac_ifnet_label_init(ifp1); 6010#endif 6011 6012 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) { 6013 DLIL_PRINTF("%s: failed to allocate if local stats, " 6014 "error: %d\n", __func__, ret); 6015 /* This probably shouldn't be fatal */ 6016 ret = 0; 6017 } 6018 6019 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr); 6020 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr); 6021 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr); 6022 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr); 6023 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group, 6024 ifnet_lock_attr); 6025 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr); 6026#if INET6 6027 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr); 6028 ifp1->if_inet6data = NULL; 6029#endif 6030 6031 /* for send data paths */ 6032 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group, 6033 ifnet_lock_attr); 6034 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group, 6035 ifnet_lock_attr); 6036 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group, 6037 ifnet_lock_attr); 6038 6039 /* for receive data paths */ 6040 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group, 6041 ifnet_lock_attr); 6042 6043 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link); 6044 6045 *ifp = ifp1; 6046 6047end: 6048 dlil_if_unlock(); 6049 6050 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) && 6051 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t)))); 6052 6053 return (ret); 6054} 6055 6056__private_extern__ void 6057dlil_if_release(ifnet_t ifp) 6058{ 6059 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp; 6060 6061 ifnet_lock_exclusive(ifp); 6062 lck_mtx_lock(&dlifp->dl_if_lock); 6063 dlifp->dl_if_flags &= ~DLIF_INUSE; 6064 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ); 6065 ifp->if_name = dlifp->dl_if_namestorage; 6066 /* Reset external name (name + unit) */ 6067 ifp->if_xname = dlifp->dl_if_xnamestorage; 6068 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ, 6069 "%s?", ifp->if_name); 6070 lck_mtx_unlock(&dlifp->dl_if_lock); 6071#if CONFIG_MACF_NET 6072 /* 6073 * We can either recycle the MAC label here or in dlil_if_acquire(). 6074 * It seems logical to do it here but this means that anything that 6075 * still has a handle on ifp will now see it as unlabeled. 6076 * Since the interface is "dead" that may be OK. Revisit later. 6077 */ 6078 mac_ifnet_label_recycle(ifp); 6079#endif 6080 ifnet_lock_done(ifp); 6081} 6082 6083__private_extern__ void 6084dlil_if_lock(void) 6085{ 6086 lck_mtx_lock(&dlil_ifnet_lock); 6087} 6088 6089__private_extern__ void 6090dlil_if_unlock(void) 6091{ 6092 lck_mtx_unlock(&dlil_ifnet_lock); 6093} 6094 6095__private_extern__ void 6096dlil_if_lock_assert(void) 6097{ 6098 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED); 6099} 6100 6101__private_extern__ void 6102dlil_proto_unplumb_all(struct ifnet *ifp) 6103{ 6104 /* 6105 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where 6106 * each bucket contains exactly one entry; PF_VLAN does not need an 6107 * explicit unplumb. 6108 * 6109 * if_proto_hash[3] is for other protocols; we expect anything 6110 * in this bucket to respond to the DETACHING event (which would 6111 * have happened by now) and do the unplumb then. 6112 */ 6113 (void) proto_unplumb(PF_INET, ifp); 6114#if INET6 6115 (void) proto_unplumb(PF_INET6, ifp); 6116#endif /* INET6 */ 6117} 6118 6119static void 6120ifp_src_route_copyout(struct ifnet *ifp, struct route *dst) 6121{ 6122 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 6123 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 6124 6125 route_copyout(dst, &ifp->if_src_route, sizeof (*dst)); 6126 6127 lck_mtx_unlock(&ifp->if_cached_route_lock); 6128} 6129 6130static void 6131ifp_src_route_copyin(struct ifnet *ifp, struct route *src) 6132{ 6133 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 6134 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 6135 6136 if (ifp->if_fwd_cacheok) { 6137 route_copyin(src, &ifp->if_src_route, sizeof (*src)); 6138 } else { 6139 ROUTE_RELEASE(src); 6140 } 6141 lck_mtx_unlock(&ifp->if_cached_route_lock); 6142} 6143 6144#if INET6 6145static void 6146ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst) 6147{ 6148 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 6149 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 6150 6151 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6, 6152 sizeof (*dst)); 6153 6154 lck_mtx_unlock(&ifp->if_cached_route_lock); 6155} 6156 6157static void 6158ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src) 6159{ 6160 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 6161 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 6162 6163 if (ifp->if_fwd_cacheok) { 6164 route_copyin((struct route *)src, 6165 (struct route *)&ifp->if_src_route6, sizeof (*src)); 6166 } else { 6167 ROUTE_RELEASE(src); 6168 } 6169 lck_mtx_unlock(&ifp->if_cached_route_lock); 6170} 6171#endif /* INET6 */ 6172 6173struct rtentry * 6174ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) 6175{ 6176 struct route src_rt; 6177 struct sockaddr_in *dst; 6178 6179 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst); 6180 6181 ifp_src_route_copyout(ifp, &src_rt); 6182 6183 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) { 6184 ROUTE_RELEASE(&src_rt); 6185 if (dst->sin_family != AF_INET) { 6186 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); 6187 dst->sin_len = sizeof (src_rt.ro_dst); 6188 dst->sin_family = AF_INET; 6189 } 6190 dst->sin_addr = src_ip; 6191 6192 if (src_rt.ro_rt == NULL) { 6193 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst, 6194 0, 0, ifp->if_index); 6195 6196 if (src_rt.ro_rt != NULL) { 6197 /* retain a ref, copyin consumes one */ 6198 struct rtentry *rte = src_rt.ro_rt; 6199 RT_ADDREF(rte); 6200 ifp_src_route_copyin(ifp, &src_rt); 6201 src_rt.ro_rt = rte; 6202 } 6203 } 6204 } 6205 6206 return (src_rt.ro_rt); 6207} 6208 6209#if INET6 6210struct rtentry* 6211ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) 6212{ 6213 struct route_in6 src_rt; 6214 6215 ifp_src_route6_copyout(ifp, &src_rt); 6216 6217 if (ROUTE_UNUSABLE(&src_rt) || 6218 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) { 6219 ROUTE_RELEASE(&src_rt); 6220 if (src_rt.ro_dst.sin6_family != AF_INET6) { 6221 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); 6222 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst); 6223 src_rt.ro_dst.sin6_family = AF_INET6; 6224 } 6225 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6); 6226 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr, 6227 sizeof (src_rt.ro_dst.sin6_addr)); 6228 6229 if (src_rt.ro_rt == NULL) { 6230 src_rt.ro_rt = rtalloc1_scoped( 6231 (struct sockaddr *)&src_rt.ro_dst, 0, 0, 6232 ifp->if_index); 6233 6234 if (src_rt.ro_rt != NULL) { 6235 /* retain a ref, copyin consumes one */ 6236 struct rtentry *rte = src_rt.ro_rt; 6237 RT_ADDREF(rte); 6238 ifp_src_route6_copyin(ifp, &src_rt); 6239 src_rt.ro_rt = rte; 6240 } 6241 } 6242 } 6243 6244 return (src_rt.ro_rt); 6245} 6246#endif /* INET6 */ 6247 6248void 6249if_lqm_update(struct ifnet *ifp, int lqm) 6250{ 6251 struct kev_dl_link_quality_metric_data ev_lqm_data; 6252 6253 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX); 6254 6255 /* Normalize to edge */ 6256 if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_BAD) 6257 lqm = IFNET_LQM_THRESH_BAD; 6258 else if (lqm > IFNET_LQM_THRESH_BAD && lqm <= IFNET_LQM_THRESH_POOR) 6259 lqm = IFNET_LQM_THRESH_POOR; 6260 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD) 6261 lqm = IFNET_LQM_THRESH_GOOD; 6262 6263 ifnet_lock_exclusive(ifp); 6264 if (lqm == ifp->if_lqm) { 6265 ifnet_lock_done(ifp); 6266 return; /* nothing to update */ 6267 } 6268 ifp->if_lqm = lqm; 6269 ifnet_lock_done(ifp); 6270 6271 bzero(&ev_lqm_data, sizeof (ev_lqm_data)); 6272 ev_lqm_data.link_quality_metric = lqm; 6273 6274 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED, 6275 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data)); 6276} 6277 6278/* for uuid.c */ 6279int 6280uuid_get_ethernet(u_int8_t *node) 6281{ 6282 struct ifnet *ifp; 6283 struct sockaddr_dl *sdl; 6284 6285 ifnet_head_lock_shared(); 6286 TAILQ_FOREACH(ifp, &ifnet_head, if_link) { 6287 ifnet_lock_shared(ifp); 6288 IFA_LOCK_SPIN(ifp->if_lladdr); 6289 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr; 6290 if (sdl->sdl_type == IFT_ETHER) { 6291 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN); 6292 IFA_UNLOCK(ifp->if_lladdr); 6293 ifnet_lock_done(ifp); 6294 ifnet_head_done(); 6295 return (0); 6296 } 6297 IFA_UNLOCK(ifp->if_lladdr); 6298 ifnet_lock_done(ifp); 6299 } 6300 ifnet_head_done(); 6301 6302 return (-1); 6303} 6304 6305static int 6306sysctl_rxpoll SYSCTL_HANDLER_ARGS 6307{ 6308#pragma unused(arg1, arg2) 6309 uint32_t i; 6310 int err; 6311 6312 i = if_rxpoll; 6313 6314 err = sysctl_handle_int(oidp, &i, 0, req); 6315 if (err != 0 || req->newptr == USER_ADDR_NULL) 6316 return (err); 6317 6318 if (net_rxpoll == 0) 6319 return (ENXIO); 6320 6321 if_rxpoll = i; 6322 return (err); 6323} 6324 6325static int 6326sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS 6327{ 6328#pragma unused(arg1, arg2) 6329 uint64_t q; 6330 int err; 6331 6332 q = if_rxpoll_mode_holdtime; 6333 6334 err = sysctl_handle_quad(oidp, &q, 0, req); 6335 if (err != 0 || req->newptr == USER_ADDR_NULL) 6336 return (err); 6337 6338 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) 6339 q = IF_RXPOLL_MODE_HOLDTIME_MIN; 6340 6341 if_rxpoll_mode_holdtime = q; 6342 6343 return (err); 6344} 6345 6346static int 6347sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS 6348{ 6349#pragma unused(arg1, arg2) 6350 uint64_t q; 6351 int err; 6352 6353 q = if_rxpoll_sample_holdtime; 6354 6355 err = sysctl_handle_quad(oidp, &q, 0, req); 6356 if (err != 0 || req->newptr == USER_ADDR_NULL) 6357 return (err); 6358 6359 if (q < IF_RXPOLL_SAMPLETIME_MIN) 6360 q = IF_RXPOLL_SAMPLETIME_MIN; 6361 6362 if_rxpoll_sample_holdtime = q; 6363 6364 return (err); 6365} 6366 6367static int 6368sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS 6369{ 6370#pragma unused(arg1, arg2) 6371 uint64_t q; 6372 int err; 6373 6374 q = if_rxpoll_interval_time; 6375 6376 err = sysctl_handle_quad(oidp, &q, 0, req); 6377 if (err != 0 || req->newptr == USER_ADDR_NULL) 6378 return (err); 6379 6380 if (q < IF_RXPOLL_INTERVALTIME_MIN) 6381 q = IF_RXPOLL_INTERVALTIME_MIN; 6382 6383 if_rxpoll_interval_time = q; 6384 6385 return (err); 6386} 6387 6388static int 6389sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS 6390{ 6391#pragma unused(arg1, arg2) 6392 uint32_t i; 6393 int err; 6394 6395 i = if_rxpoll_wlowat; 6396 6397 err = sysctl_handle_int(oidp, &i, 0, req); 6398 if (err != 0 || req->newptr == USER_ADDR_NULL) 6399 return (err); 6400 6401 if (i == 0 || i >= if_rxpoll_whiwat) 6402 return (EINVAL); 6403 6404 if_rxpoll_wlowat = i; 6405 return (err); 6406} 6407 6408static int 6409sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS 6410{ 6411#pragma unused(arg1, arg2) 6412 uint32_t i; 6413 int err; 6414 6415 i = if_rxpoll_whiwat; 6416 6417 err = sysctl_handle_int(oidp, &i, 0, req); 6418 if (err != 0 || req->newptr == USER_ADDR_NULL) 6419 return (err); 6420 6421 if (i <= if_rxpoll_wlowat) 6422 return (EINVAL); 6423 6424 if_rxpoll_whiwat = i; 6425 return (err); 6426} 6427 6428static int 6429sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS 6430{ 6431#pragma unused(arg1, arg2) 6432 int i, err; 6433 6434 i = if_sndq_maxlen; 6435 6436 err = sysctl_handle_int(oidp, &i, 0, req); 6437 if (err != 0 || req->newptr == USER_ADDR_NULL) 6438 return (err); 6439 6440 if (i < IF_SNDQ_MINLEN) 6441 i = IF_SNDQ_MINLEN; 6442 6443 if_sndq_maxlen = i; 6444 return (err); 6445} 6446 6447static int 6448sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS 6449{ 6450#pragma unused(arg1, arg2) 6451 int i, err; 6452 6453 i = if_rcvq_maxlen; 6454 6455 err = sysctl_handle_int(oidp, &i, 0, req); 6456 if (err != 0 || req->newptr == USER_ADDR_NULL) 6457 return (err); 6458 6459 if (i < IF_RCVQ_MINLEN) 6460 i = IF_RCVQ_MINLEN; 6461 6462 if_rcvq_maxlen = i; 6463 return (err); 6464} 6465 6466void 6467dlil_node_present(struct ifnet *ifp, struct sockaddr *sa, 6468 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) 6469{ 6470 struct kev_dl_node_presence kev; 6471 struct sockaddr_dl *sdl; 6472 struct sockaddr_in6 *sin6; 6473 6474 VERIFY(ifp); 6475 VERIFY(sa); 6476 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); 6477 6478 bzero(&kev, sizeof (kev)); 6479 sin6 = &kev.sin6_node_address; 6480 sdl = &kev.sdl_node_address; 6481 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); 6482 kev.rssi = rssi; 6483 kev.link_quality_metric = lqm; 6484 kev.node_proximity_metric = npm; 6485 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info)); 6486 6487 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm); 6488 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE, 6489 &kev.link_data, sizeof (kev)); 6490} 6491 6492void 6493dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa) 6494{ 6495 struct kev_dl_node_absence kev; 6496 struct sockaddr_in6 *sin6; 6497 struct sockaddr_dl *sdl; 6498 6499 VERIFY(ifp); 6500 VERIFY(sa); 6501 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); 6502 6503 bzero(&kev, sizeof (kev)); 6504 sin6 = &kev.sin6_node_address; 6505 sdl = &kev.sdl_node_address; 6506 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); 6507 6508 nd6_alt_node_absent(ifp, sin6); 6509 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE, 6510 &kev.link_data, sizeof (kev)); 6511} 6512 6513const void * 6514dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, 6515 kauth_cred_t *credp) 6516{ 6517 const u_int8_t *bytes; 6518 size_t size; 6519 6520 bytes = CONST_LLADDR(sdl); 6521 size = sdl->sdl_alen; 6522 6523#if CONFIG_MACF 6524 if (dlil_lladdr_ckreq) { 6525 switch (sdl->sdl_type) { 6526 case IFT_ETHER: 6527 case IFT_IEEE1394: 6528 break; 6529 default: 6530 credp = NULL; 6531 break; 6532 }; 6533 6534 if (credp && mac_system_check_info(*credp, "net.link.addr")) { 6535 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = { 6536 [0] = 2 6537 }; 6538 6539 switch (sdl->sdl_type) { 6540 case IFT_ETHER: 6541 VERIFY(size == ETHER_ADDR_LEN); 6542 bytes = unspec; 6543 break; 6544 case IFT_IEEE1394: 6545 VERIFY(size == FIREWIRE_EUI64_LEN); 6546 bytes = unspec; 6547 break; 6548 default: 6549 VERIFY(FALSE); 6550 break; 6551 }; 6552 } 6553 } 6554#else 6555#pragma unused(credp) 6556#endif 6557 6558 if (sizep != NULL) *sizep = size; 6559 return (bytes); 6560} 6561 6562void 6563dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN], 6564 u_int8_t info[DLIL_MODARGLEN]) 6565{ 6566 struct kev_dl_issues kev; 6567 struct timeval tv; 6568 6569 VERIFY(ifp != NULL); 6570 VERIFY(modid != NULL); 6571 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN); 6572 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN); 6573 6574 bzero(&kev, sizeof (&kev)); 6575 6576 microtime(&tv); 6577 kev.timestamp = tv.tv_sec; 6578 bcopy(modid, &kev.modid, DLIL_MODIDLEN); 6579 if (info != NULL) 6580 bcopy(info, &kev.info, DLIL_MODARGLEN); 6581 6582 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES, 6583 &kev.link_data, sizeof (kev)); 6584} 6585 6586errno_t 6587ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, 6588 struct proc *p) 6589{ 6590 u_int32_t level = IFNET_THROTTLE_OFF; 6591 errno_t result = 0; 6592 6593 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC); 6594 6595 if (cmd == SIOCSIFOPPORTUNISTIC) { 6596 /* 6597 * XXX: Use priv_check_cred() instead of root check? 6598 */ 6599 if ((result = proc_suser(p)) != 0) 6600 return (result); 6601 6602 if (ifr->ifr_opportunistic.ifo_flags == 6603 IFRIFOF_BLOCK_OPPORTUNISTIC) 6604 level = IFNET_THROTTLE_OPPORTUNISTIC; 6605 else if (ifr->ifr_opportunistic.ifo_flags == 0) 6606 level = IFNET_THROTTLE_OFF; 6607 else 6608 result = EINVAL; 6609 6610 if (result == 0) 6611 result = ifnet_set_throttle(ifp, level); 6612 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) { 6613 ifr->ifr_opportunistic.ifo_flags = 0; 6614 if (level == IFNET_THROTTLE_OPPORTUNISTIC) { 6615 ifr->ifr_opportunistic.ifo_flags |= 6616 IFRIFOF_BLOCK_OPPORTUNISTIC; 6617 } 6618 } 6619 6620 /* 6621 * Return the count of current opportunistic connections 6622 * over the interface. 6623 */ 6624 if (result == 0) { 6625 uint32_t flags = 0; 6626 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ? 6627 INPCB_OPPORTUNISTIC_SETCMD : 0; 6628 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? 6629 INPCB_OPPORTUNISTIC_THROTTLEON : 0; 6630 ifr->ifr_opportunistic.ifo_inuse = 6631 udp_count_opportunistic(ifp->if_index, flags) + 6632 tcp_count_opportunistic(ifp->if_index, flags); 6633 } 6634 6635 if (result == EALREADY) 6636 result = 0; 6637 6638 return (result); 6639} 6640 6641int 6642ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level) 6643{ 6644 struct ifclassq *ifq; 6645 int err = 0; 6646 6647 if (!(ifp->if_eflags & IFEF_TXSTART)) 6648 return (ENXIO); 6649 6650 *level = IFNET_THROTTLE_OFF; 6651 6652 ifq = &ifp->if_snd; 6653 IFCQ_LOCK(ifq); 6654 /* Throttling works only for IFCQ, not ALTQ instances */ 6655 if (IFCQ_IS_ENABLED(ifq)) 6656 IFCQ_GET_THROTTLE(ifq, *level, err); 6657 IFCQ_UNLOCK(ifq); 6658 6659 return (err); 6660} 6661 6662int 6663ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) 6664{ 6665 struct ifclassq *ifq; 6666 int err = 0; 6667 6668 if (!(ifp->if_eflags & IFEF_TXSTART)) 6669 return (ENXIO); 6670 6671 ifq = &ifp->if_snd; 6672 6673 switch (level) { 6674 case IFNET_THROTTLE_OFF: 6675 case IFNET_THROTTLE_OPPORTUNISTIC: 6676#if PF_ALTQ 6677 /* Throttling works only for IFCQ, not ALTQ instances */ 6678 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) 6679 return (ENXIO); 6680#endif /* PF_ALTQ */ 6681 break; 6682 default: 6683 return (EINVAL); 6684 } 6685 6686 IFCQ_LOCK(ifq); 6687 if (IFCQ_IS_ENABLED(ifq)) 6688 IFCQ_SET_THROTTLE(ifq, level, err); 6689 IFCQ_UNLOCK(ifq); 6690 6691 if (err == 0) { 6692 printf("%s: throttling level set to %d\n", if_name(ifp), 6693 level); 6694 if (level == IFNET_THROTTLE_OFF) 6695 ifnet_start(ifp); 6696 } 6697 6698 return (err); 6699} 6700 6701errno_t 6702ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr, 6703 struct proc *p) 6704{ 6705#pragma unused(p) 6706 errno_t result = 0; 6707 uint32_t flags; 6708 int level, category, subcategory; 6709 6710 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG); 6711 6712 if (cmd == SIOCSIFLOG) { 6713 if ((result = priv_check_cred(kauth_cred_get(), 6714 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) 6715 return (result); 6716 6717 level = ifr->ifr_log.ifl_level; 6718 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) 6719 result = EINVAL; 6720 6721 flags = ifr->ifr_log.ifl_flags; 6722 if ((flags &= IFNET_LOGF_MASK) == 0) 6723 result = EINVAL; 6724 6725 category = ifr->ifr_log.ifl_category; 6726 subcategory = ifr->ifr_log.ifl_subcategory; 6727 6728 if (result == 0) 6729 result = ifnet_set_log(ifp, level, flags, 6730 category, subcategory); 6731 } else { 6732 result = ifnet_get_log(ifp, &level, &flags, &category, 6733 &subcategory); 6734 if (result == 0) { 6735 ifr->ifr_log.ifl_level = level; 6736 ifr->ifr_log.ifl_flags = flags; 6737 ifr->ifr_log.ifl_category = category; 6738 ifr->ifr_log.ifl_subcategory = subcategory; 6739 } 6740 } 6741 6742 return (result); 6743} 6744 6745int 6746ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, 6747 int32_t category, int32_t subcategory) 6748{ 6749 int err = 0; 6750 6751 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX); 6752 VERIFY(flags & IFNET_LOGF_MASK); 6753 6754 /* 6755 * The logging level applies to all facilities; make sure to 6756 * update them all with the most current level. 6757 */ 6758 flags |= ifp->if_log.flags; 6759 6760 if (ifp->if_output_ctl != NULL) { 6761 struct ifnet_log_params l; 6762 6763 bzero(&l, sizeof (l)); 6764 l.level = level; 6765 l.flags = flags; 6766 l.flags &= ~IFNET_LOGF_DLIL; 6767 l.category = category; 6768 l.subcategory = subcategory; 6769 6770 /* Send this request to lower layers */ 6771 if (l.flags != 0) { 6772 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG, 6773 sizeof (l), &l); 6774 } 6775 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) { 6776 /* 6777 * If targeted to the lower layers without an output 6778 * control callback registered on the interface, just 6779 * silently ignore facilities other than ours. 6780 */ 6781 flags &= IFNET_LOGF_DLIL; 6782 if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL)) 6783 level = 0; 6784 } 6785 6786 if (err == 0) { 6787 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) 6788 ifp->if_log.flags = 0; 6789 else 6790 ifp->if_log.flags |= flags; 6791 6792 log(LOG_INFO, "%s: logging level set to %d flags=%b " 6793 "arg=%b, category=%d subcategory=%d\n", if_name(ifp), 6794 ifp->if_log.level, ifp->if_log.flags, 6795 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS, 6796 category, subcategory); 6797 } 6798 6799 return (err); 6800} 6801 6802int 6803ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags, 6804 int32_t *category, int32_t *subcategory) 6805{ 6806 if (level != NULL) 6807 *level = ifp->if_log.level; 6808 if (flags != NULL) 6809 *flags = ifp->if_log.flags; 6810 if (category != NULL) 6811 *category = ifp->if_log.category; 6812 if (subcategory != NULL) 6813 *subcategory = ifp->if_log.subcategory; 6814 6815 return (0); 6816} 6817 6818int 6819ifnet_notify_address(struct ifnet *ifp, int af) 6820{ 6821 struct ifnet_notify_address_params na; 6822 6823#if PF 6824 (void) pf_ifaddr_hook(ifp); 6825#endif /* PF */ 6826 6827 if (ifp->if_output_ctl == NULL) 6828 return (EOPNOTSUPP); 6829 6830 bzero(&na, sizeof (na)); 6831 na.address_family = af; 6832 6833 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS, 6834 sizeof (na), &na)); 6835} 6836 6837errno_t 6838ifnet_flowid(struct ifnet *ifp, uint32_t *flowid) 6839{ 6840 if (ifp == NULL || flowid == NULL) { 6841 return (EINVAL); 6842 } else if (!(ifp->if_eflags & IFEF_TXSTART) || 6843 !(ifp->if_refflags & IFRF_ATTACHED)) { 6844 return (ENXIO); 6845 } 6846 6847 *flowid = ifp->if_flowhash; 6848 6849 return (0); 6850} 6851 6852errno_t 6853ifnet_disable_output(struct ifnet *ifp) 6854{ 6855 int err; 6856 6857 if (ifp == NULL) { 6858 return (EINVAL); 6859 } else if (!(ifp->if_eflags & IFEF_TXSTART) || 6860 !(ifp->if_refflags & IFRF_ATTACHED)) { 6861 return (ENXIO); 6862 } 6863 6864 if ((err = ifnet_fc_add(ifp)) == 0) { 6865 lck_mtx_lock_spin(&ifp->if_start_lock); 6866 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED; 6867 lck_mtx_unlock(&ifp->if_start_lock); 6868 } 6869 return (err); 6870} 6871 6872errno_t 6873ifnet_enable_output(struct ifnet *ifp) 6874{ 6875 if (ifp == NULL) { 6876 return (EINVAL); 6877 } else if (!(ifp->if_eflags & IFEF_TXSTART) || 6878 !(ifp->if_refflags & IFRF_ATTACHED)) { 6879 return (ENXIO); 6880 } 6881 6882 ifnet_start_common(ifp, 1); 6883 return (0); 6884} 6885 6886void 6887ifnet_flowadv(uint32_t flowhash) 6888{ 6889 struct ifnet_fc_entry *ifce; 6890 struct ifnet *ifp; 6891 6892 ifce = ifnet_fc_get(flowhash); 6893 if (ifce == NULL) 6894 return; 6895 6896 VERIFY(ifce->ifce_ifp != NULL); 6897 ifp = ifce->ifce_ifp; 6898 6899 /* flow hash gets recalculated per attach, so check */ 6900 if (ifnet_is_attached(ifp, 1)) { 6901 if (ifp->if_flowhash == flowhash) 6902 (void) ifnet_enable_output(ifp); 6903 ifnet_decr_iorefcnt(ifp); 6904 } 6905 ifnet_fc_entry_free(ifce); 6906} 6907 6908/* 6909 * Function to compare ifnet_fc_entries in ifnet flow control tree 6910 */ 6911static inline int 6912ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2) 6913{ 6914 return (fc1->ifce_flowhash - fc2->ifce_flowhash); 6915} 6916 6917static int 6918ifnet_fc_add(struct ifnet *ifp) 6919{ 6920 struct ifnet_fc_entry keyfc, *ifce; 6921 uint32_t flowhash; 6922 6923 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)); 6924 VERIFY(ifp->if_flowhash != 0); 6925 flowhash = ifp->if_flowhash; 6926 6927 bzero(&keyfc, sizeof (keyfc)); 6928 keyfc.ifce_flowhash = flowhash; 6929 6930 lck_mtx_lock_spin(&ifnet_fc_lock); 6931 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc); 6932 if (ifce != NULL && ifce->ifce_ifp == ifp) { 6933 /* Entry is already in ifnet_fc_tree, return */ 6934 lck_mtx_unlock(&ifnet_fc_lock); 6935 return (0); 6936 } 6937 6938 if (ifce != NULL) { 6939 /* 6940 * There is a different fc entry with the same flow hash 6941 * but different ifp pointer. There can be a collision 6942 * on flow hash but the probability is low. Let's just 6943 * avoid adding a second one when there is a collision. 6944 */ 6945 lck_mtx_unlock(&ifnet_fc_lock); 6946 return (EAGAIN); 6947 } 6948 6949 /* become regular mutex */ 6950 lck_mtx_convert_spin(&ifnet_fc_lock); 6951 6952 ifce = zalloc_noblock(ifnet_fc_zone); 6953 if (ifce == NULL) { 6954 /* memory allocation failed */ 6955 lck_mtx_unlock(&ifnet_fc_lock); 6956 return (ENOMEM); 6957 } 6958 bzero(ifce, ifnet_fc_zone_size); 6959 6960 ifce->ifce_flowhash = flowhash; 6961 ifce->ifce_ifp = ifp; 6962 6963 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce); 6964 lck_mtx_unlock(&ifnet_fc_lock); 6965 return (0); 6966} 6967 6968static struct ifnet_fc_entry * 6969ifnet_fc_get(uint32_t flowhash) 6970{ 6971 struct ifnet_fc_entry keyfc, *ifce; 6972 struct ifnet *ifp; 6973 6974 bzero(&keyfc, sizeof (keyfc)); 6975 keyfc.ifce_flowhash = flowhash; 6976 6977 lck_mtx_lock_spin(&ifnet_fc_lock); 6978 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc); 6979 if (ifce == NULL) { 6980 /* Entry is not present in ifnet_fc_tree, return */ 6981 lck_mtx_unlock(&ifnet_fc_lock); 6982 return (NULL); 6983 } 6984 6985 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce); 6986 6987 VERIFY(ifce->ifce_ifp != NULL); 6988 ifp = ifce->ifce_ifp; 6989 6990 /* become regular mutex */ 6991 lck_mtx_convert_spin(&ifnet_fc_lock); 6992 6993 if (!ifnet_is_attached(ifp, 0)) { 6994 /* 6995 * This ifp is not attached or in the process of being 6996 * detached; just don't process it. 6997 */ 6998 ifnet_fc_entry_free(ifce); 6999 ifce = NULL; 7000 } 7001 lck_mtx_unlock(&ifnet_fc_lock); 7002 7003 return (ifce); 7004} 7005 7006static void 7007ifnet_fc_entry_free(struct ifnet_fc_entry *ifce) 7008{ 7009 zfree(ifnet_fc_zone, ifce); 7010} 7011 7012static uint32_t 7013ifnet_calc_flowhash(struct ifnet *ifp) 7014{ 7015 struct ifnet_flowhash_key fh __attribute__((aligned(8))); 7016 uint32_t flowhash = 0; 7017 7018 if (ifnet_flowhash_seed == 0) 7019 ifnet_flowhash_seed = RandomULong(); 7020 7021 bzero(&fh, sizeof (fh)); 7022 7023 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name); 7024 fh.ifk_unit = ifp->if_unit; 7025 fh.ifk_flags = ifp->if_flags; 7026 fh.ifk_eflags = ifp->if_eflags; 7027 fh.ifk_capabilities = ifp->if_capabilities; 7028 fh.ifk_capenable = ifp->if_capenable; 7029 fh.ifk_output_sched_model = ifp->if_output_sched_model; 7030 fh.ifk_rand1 = RandomULong(); 7031 fh.ifk_rand2 = RandomULong(); 7032 7033try_again: 7034 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed); 7035 if (flowhash == 0) { 7036 /* try to get a non-zero flowhash */ 7037 ifnet_flowhash_seed = RandomULong(); 7038 goto try_again; 7039 } 7040 7041 return (flowhash); 7042} 7043 7044static void 7045dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, 7046 protocol_family_t pf) 7047{ 7048#pragma unused(ifp) 7049 uint32_t did_sw; 7050 7051 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) || 7052 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6))) 7053 return; 7054 7055 switch (pf) { 7056 case PF_INET: 7057 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags); 7058 if (did_sw & CSUM_DELAY_IP) 7059 hwcksum_dbg_finalized_hdr++; 7060 if (did_sw & CSUM_DELAY_DATA) 7061 hwcksum_dbg_finalized_data++; 7062 break; 7063#if INET6 7064 case PF_INET6: 7065 /* 7066 * Checksum offload should not have been enabled when 7067 * extension headers exist; that also means that we 7068 * cannot force-finalize packets with extension headers. 7069 * Indicate to the callee should it skip such case by 7070 * setting optlen to -1. 7071 */ 7072 did_sw = in6_finalize_cksum(m, hoff, -1, -1, 7073 m->m_pkthdr.csum_flags); 7074 if (did_sw & CSUM_DELAY_IPV6_DATA) 7075 hwcksum_dbg_finalized_data++; 7076 break; 7077#endif /* INET6 */ 7078 default: 7079 return; 7080 } 7081} 7082 7083static void 7084dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, 7085 protocol_family_t pf) 7086{ 7087 uint16_t sum; 7088 uint32_t hlen; 7089 7090 if (frame_header == NULL || 7091 frame_header < (char *)mbuf_datastart(m) || 7092 frame_header > (char *)m->m_data) { 7093 printf("%s: frame header pointer 0x%llx out of range " 7094 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp), 7095 (uint64_t)VM_KERNEL_ADDRPERM(frame_header), 7096 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)), 7097 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data), 7098 (uint64_t)VM_KERNEL_ADDRPERM(m)); 7099 return; 7100 } 7101 hlen = (m->m_data - frame_header); 7102 7103 switch (pf) { 7104 case PF_INET: 7105#if INET6 7106 case PF_INET6: 7107#endif /* INET6 */ 7108 break; 7109 default: 7110 return; 7111 } 7112 7113 /* 7114 * Force partial checksum offload; useful to simulate cases 7115 * where the hardware does not support partial checksum offload, 7116 * in order to validate correctness throughout the layers above. 7117 */ 7118 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) { 7119 uint32_t foff = hwcksum_dbg_partial_rxoff_forced; 7120 7121 if (foff > (uint32_t)m->m_pkthdr.len) 7122 return; 7123 7124 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS; 7125 7126 /* Compute 16-bit 1's complement sum from forced offset */ 7127 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff)); 7128 7129 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL); 7130 m->m_pkthdr.csum_rx_val = sum; 7131 m->m_pkthdr.csum_rx_start = (foff + hlen); 7132 7133 hwcksum_dbg_partial_forced++; 7134 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len; 7135 } 7136 7137 /* 7138 * Partial checksum offload verification (and adjustment); 7139 * useful to validate and test cases where the hardware 7140 * supports partial checksum offload. 7141 */ 7142 if ((m->m_pkthdr.csum_flags & 7143 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == 7144 (CSUM_DATA_VALID | CSUM_PARTIAL)) { 7145 uint32_t rxoff; 7146 7147 /* Start offset must begin after frame header */ 7148 rxoff = m->m_pkthdr.csum_rx_start; 7149 if (hlen > rxoff) { 7150 hwcksum_dbg_bad_rxoff++; 7151 if (dlil_verbose) { 7152 printf("%s: partial cksum start offset %d " 7153 "is less than frame header length %d for " 7154 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen, 7155 (uint64_t)VM_KERNEL_ADDRPERM(m)); 7156 } 7157 return; 7158 } 7159 rxoff -=hlen; 7160 7161 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) { 7162 /* 7163 * Compute the expected 16-bit 1's complement sum; 7164 * skip this if we've already computed it above 7165 * when partial checksum offload is forced. 7166 */ 7167 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff)); 7168 7169 /* Hardware or driver is buggy */ 7170 if (sum != m->m_pkthdr.csum_rx_val) { 7171 hwcksum_dbg_bad_cksum++; 7172 if (dlil_verbose) { 7173 printf("%s: bad partial cksum value " 7174 "0x%x (expected 0x%x) for mbuf " 7175 "0x%llx [rx_start %d]\n", 7176 if_name(ifp), 7177 m->m_pkthdr.csum_rx_val, sum, 7178 (uint64_t)VM_KERNEL_ADDRPERM(m), 7179 m->m_pkthdr.csum_rx_start); 7180 } 7181 return; 7182 } 7183 } 7184 hwcksum_dbg_verified++; 7185 7186 /* 7187 * This code allows us to emulate various hardwares that 7188 * perform 16-bit 1's complement sum beginning at various 7189 * start offset values. 7190 */ 7191 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) { 7192 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj; 7193 7194 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) 7195 return; 7196 7197 sum = m_adj_sum16(m, rxoff, aoff, sum); 7198 7199 m->m_pkthdr.csum_rx_val = sum; 7200 m->m_pkthdr.csum_rx_start = (aoff + hlen); 7201 7202 hwcksum_dbg_adjusted++; 7203 } 7204 } 7205} 7206 7207static int 7208sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS 7209{ 7210#pragma unused(arg1, arg2) 7211 u_int32_t i; 7212 int err; 7213 7214 i = hwcksum_dbg_mode; 7215 7216 err = sysctl_handle_int(oidp, &i, 0, req); 7217 if (err != 0 || req->newptr == USER_ADDR_NULL) 7218 return (err); 7219 7220 if (hwcksum_dbg == 0) 7221 return (ENODEV); 7222 7223 if ((i & ~HWCKSUM_DBG_MASK) != 0) 7224 return (EINVAL); 7225 7226 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK); 7227 7228 return (err); 7229} 7230 7231static int 7232sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS 7233{ 7234#pragma unused(arg1, arg2) 7235 u_int32_t i; 7236 int err; 7237 7238 i = hwcksum_dbg_partial_rxoff_forced; 7239 7240 err = sysctl_handle_int(oidp, &i, 0, req); 7241 if (err != 0 || req->newptr == USER_ADDR_NULL) 7242 return (err); 7243 7244 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) 7245 return (ENODEV); 7246 7247 hwcksum_dbg_partial_rxoff_forced = i; 7248 7249 return (err); 7250} 7251 7252static int 7253sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS 7254{ 7255#pragma unused(arg1, arg2) 7256 u_int32_t i; 7257 int err; 7258 7259 i = hwcksum_dbg_partial_rxoff_adj; 7260 7261 err = sysctl_handle_int(oidp, &i, 0, req); 7262 if (err != 0 || req->newptr == USER_ADDR_NULL) 7263 return (err); 7264 7265 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) 7266 return (ENODEV); 7267 7268 hwcksum_dbg_partial_rxoff_adj = i; 7269 7270 return (err); 7271} 7272 7273#if DEBUG 7274/* Blob for sum16 verification */ 7275static uint8_t sumdata[] = { 7276 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03, 7277 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45, 7278 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00, 7279 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71, 7280 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93, 7281 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c, 7282 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93, 7283 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75, 7284 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09, 7285 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc, 7286 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42, 7287 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49, 7288 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90, 7289 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85, 7290 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd, 7291 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad, 7292 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc, 7293 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2, 7294 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c, 7295 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea, 7296 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87, 7297 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54, 7298 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e, 7299 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e, 7300 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf, 7301 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75, 7302 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6, 7303 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab, 7304 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17, 7305 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95, 7306 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9, 7307 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe, 7308 0xc8, 0x28, 0x02, 0x00, 0x00 7309}; 7310 7311/* Precomputed 16-bit 1's complement sums for various spans of the above data */ 7312static struct { 7313 int len; 7314 uint16_t sum; 7315} sumtbl[] = { 7316 { 11, 0xcb6d }, 7317 { 20, 0x20dd }, 7318 { 27, 0xbabd }, 7319 { 32, 0xf3e8 }, 7320 { 37, 0x197d }, 7321 { 43, 0x9eae }, 7322 { 64, 0x4678 }, 7323 { 127, 0x9399 }, 7324 { 256, 0xd147 }, 7325 { 325, 0x0358 } 7326}; 7327#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0])) 7328 7329static void 7330dlil_verify_sum16(void) 7331{ 7332 struct mbuf *m; 7333 uint8_t *buf; 7334 int n; 7335 7336 /* Make sure test data plus extra room for alignment fits in cluster */ 7337 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES); 7338 7339 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); 7340 MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */ 7341 buf = mtod(m, uint8_t *); /* base address */ 7342 7343 for (n = 0; n < SUMTBL_MAX; n++) { 7344 uint16_t len = sumtbl[n].len; 7345 int i; 7346 7347 /* Verify for all possible alignments */ 7348 for (i = 0; i < (int)sizeof (uint64_t); i++) { 7349 uint16_t sum; 7350 uint8_t *c; 7351 7352 /* Copy over test data to mbuf */ 7353 VERIFY(len <= sizeof (sumdata)); 7354 c = buf + i; 7355 bcopy(sumdata, c, len); 7356 7357 /* Zero-offset test (align by data pointer) */ 7358 m->m_data = (caddr_t)c; 7359 m->m_len = len; 7360 sum = m_sum16(m, 0, len); 7361 7362 /* Something is horribly broken; stop now */ 7363 if (sum != sumtbl[n].sum) { 7364 panic("%s: broken m_sum16 for len=%d align=%d " 7365 "sum=0x%04x [expected=0x%04x]\n", __func__, 7366 len, i, sum, sumtbl[n].sum); 7367 /* NOTREACHED */ 7368 } 7369 7370 /* Alignment test by offset (fixed data pointer) */ 7371 m->m_data = (caddr_t)buf; 7372 m->m_len = i + len; 7373 sum = m_sum16(m, i, len); 7374 7375 /* Something is horribly broken; stop now */ 7376 if (sum != sumtbl[n].sum) { 7377 panic("%s: broken m_sum16 for len=%d offset=%d " 7378 "sum=0x%04x [expected=0x%04x]\n", __func__, 7379 len, i, sum, sumtbl[n].sum); 7380 /* NOTREACHED */ 7381 } 7382#if INET 7383 /* Simple sum16 contiguous buffer test by aligment */ 7384 sum = b_sum16(c, len); 7385 7386 /* Something is horribly broken; stop now */ 7387 if (sum != sumtbl[n].sum) { 7388 panic("%s: broken b_sum16 for len=%d align=%d " 7389 "sum=0x%04x [expected=0x%04x]\n", __func__, 7390 len, i, sum, sumtbl[n].sum); 7391 /* NOTREACHED */ 7392 } 7393#endif /* INET */ 7394 } 7395 } 7396 m_freem(m); 7397 7398 printf("DLIL: SUM16 self-tests PASSED\n"); 7399} 7400#endif /* DEBUG */ 7401 7402#define CASE_STRINGIFY(x) case x: return #x 7403 7404__private_extern__ const char * 7405dlil_kev_dl_code_str(u_int32_t event_code) 7406{ 7407 switch (event_code) { 7408 CASE_STRINGIFY(KEV_DL_SIFFLAGS); 7409 CASE_STRINGIFY(KEV_DL_SIFMETRICS); 7410 CASE_STRINGIFY(KEV_DL_SIFMTU); 7411 CASE_STRINGIFY(KEV_DL_SIFPHYS); 7412 CASE_STRINGIFY(KEV_DL_SIFMEDIA); 7413 CASE_STRINGIFY(KEV_DL_SIFGENERIC); 7414 CASE_STRINGIFY(KEV_DL_ADDMULTI); 7415 CASE_STRINGIFY(KEV_DL_DELMULTI); 7416 CASE_STRINGIFY(KEV_DL_IF_ATTACHED); 7417 CASE_STRINGIFY(KEV_DL_IF_DETACHING); 7418 CASE_STRINGIFY(KEV_DL_IF_DETACHED); 7419 CASE_STRINGIFY(KEV_DL_LINK_OFF); 7420 CASE_STRINGIFY(KEV_DL_LINK_ON); 7421 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED); 7422 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED); 7423 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED); 7424 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED); 7425 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT); 7426 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED); 7427 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED); 7428 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE); 7429 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE); 7430 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED); 7431 CASE_STRINGIFY(KEV_DL_ISSUES); 7432 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED); 7433 default: 7434 break; 7435 } 7436 return (""); 7437} 7438