1/* 2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 */ 29 30#include <kern/sched_prim.h> 31#include <kern/kalloc.h> 32#include <kern/assert.h> 33#include <kern/debug.h> 34#include <kern/locks.h> 35#include <kern/task.h> 36#include <kern/thread.h> 37#include <kern/host.h> 38#include <libkern/libkern.h> 39#include <mach/mach_time.h> 40#include <mach/task.h> 41#include <mach/host_priv.h> 42#include <mach/mach_host.h> 43#include <pexpert/pexpert.h> 44#include <sys/kern_event.h> 45#include <sys/proc.h> 46#include <sys/proc_info.h> 47#include <sys/signal.h> 48#include <sys/signalvar.h> 49#include <sys/sysctl.h> 50#include <sys/sysproto.h> 51#include <sys/wait.h> 52#include <sys/tree.h> 53#include <sys/priv.h> 54#include <vm/vm_pageout.h> 55#include <vm/vm_protos.h> 56 57#if CONFIG_FREEZE 58#include <vm/vm_map.h> 59#endif /* CONFIG_FREEZE */ 60 61#include <sys/kern_memorystatus.h> 62 63#if CONFIG_JETSAM 64/* For logging clarity */ 65static const char *jetsam_kill_cause_name[] = { 66 "" , 67 "jettisoned" , /* kMemorystatusKilled */ 68 "highwater" , /* kMemorystatusKilledHiwat */ 69 "vnode-limit" , /* kMemorystatusKilledVnodes */ 70 "vm-pageshortage" , /* kMemorystatusKilledVMPageShortage */ 71 "vm-thrashing" , /* kMemorystatusKilledVMThrashing */ 72 "fc-thrashing" , /* kMemorystatusKilledFCThrashing */ 73 "per-process-limit" , /* kMemorystatusKilledPerProcessLimit */ 74 "diagnostic" , /* kMemorystatusKilledDiagnostic */ 75 "idle-exit" , /* kMemorystatusKilledIdleExit */ 76}; 77 78/* Does cause indicate vm or fc thrashing? */ 79static boolean_t 80is_thrashing(unsigned cause) 81{ 82 switch (cause) { 83 case kMemorystatusKilledVMThrashing: 84 case kMemorystatusKilledFCThrashing: 85 return TRUE; 86 default: 87 return FALSE; 88 } 89} 90 91/* Callback into vm_compressor.c to signal that thrashing has been mitigated. */ 92extern void vm_thrashing_jetsam_done(void); 93#endif 94 95/* These are very verbose printfs(), enable with 96 * MEMORYSTATUS_DEBUG_LOG 97 */ 98#if MEMORYSTATUS_DEBUG_LOG 99#define MEMORYSTATUS_DEBUG(cond, format, ...) \ 100do { \ 101 if (cond) { printf(format, ##__VA_ARGS__); } \ 102} while(0) 103#else 104#define MEMORYSTATUS_DEBUG(cond, format, ...) 105#endif 106 107/* General tunables */ 108 109unsigned long delta_percentage = 5; 110unsigned long critical_threshold_percentage = 5; 111unsigned long idle_offset_percentage = 5; 112unsigned long pressure_threshold_percentage = 15; 113unsigned long freeze_threshold_percentage = 50; 114 115/* General memorystatus stuff */ 116 117struct klist memorystatus_klist; 118static lck_mtx_t memorystatus_klist_mutex; 119 120static void memorystatus_klist_lock(void); 121static void memorystatus_klist_unlock(void); 122 123static uint64_t memorystatus_idle_delay_time = 0; 124 125/* 126 * Memorystatus kevents 127 */ 128 129static int filt_memorystatusattach(struct knote *kn); 130static void filt_memorystatusdetach(struct knote *kn); 131static int filt_memorystatus(struct knote *kn, long hint); 132 133struct filterops memorystatus_filtops = { 134 .f_attach = filt_memorystatusattach, 135 .f_detach = filt_memorystatusdetach, 136 .f_event = filt_memorystatus, 137}; 138 139enum { 140 kMemorystatusNoPressure = 0x1, 141 kMemorystatusPressure = 0x2, 142 kMemorystatusLowSwap = 0x4 143}; 144 145/* Idle guard handling */ 146 147static int32_t memorystatus_scheduled_idle_demotions = 0; 148 149static thread_call_t memorystatus_idle_demotion_call; 150 151static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); 152static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); 153static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); 154static void memorystatus_reschedule_idle_demotion_locked(void); 155 156static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert); 157 158boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); 159void memorystatus_send_low_swap_note(void); 160 161int memorystatus_wakeup = 0; 162 163unsigned int memorystatus_level = 0; 164 165static int memorystatus_list_count = 0; 166 167#define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1) 168 169typedef struct memstat_bucket { 170 TAILQ_HEAD(, proc) list; 171 int count; 172} memstat_bucket_t; 173 174memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; 175 176uint64_t memstat_idle_demotion_deadline = 0; 177 178static unsigned int memorystatus_dirty_count = 0; 179 180 181int 182memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) 183{ 184 user_addr_t level = 0; 185 186 level = args->level; 187 188 if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { 189 return EFAULT; 190 } 191 192 return 0; 193} 194 195static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search); 196static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search); 197 198static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); 199 200/* Jetsam */ 201 202#if CONFIG_JETSAM 203 204int proc_get_memstat_priority(proc_t, boolean_t); 205 206/* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ 207#define LEGACY_HIWATER 1 208 209static boolean_t memorystatus_idle_snapshot = 0; 210 211static int memorystatus_highwater_enabled = 1; 212 213unsigned int memorystatus_delta = 0; 214 215static unsigned int memorystatus_available_pages_critical_base = 0; 216//static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; 217static unsigned int memorystatus_available_pages_critical_idle_offset = 0; 218 219#if DEVELOPMENT || DEBUG 220static unsigned int memorystatus_jetsam_panic_debug = 0; 221 222static unsigned int memorystatus_jetsam_policy = kPolicyDefault; 223static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; 224#endif 225 226static unsigned int memorystatus_thread_wasted_wakeup = 0; 227 228static uint32_t kill_under_pressure_cause = 0; 229 230static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; 231#define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries 232 233static unsigned int memorystatus_jetsam_snapshot_count = 0; 234static unsigned int memorystatus_jetsam_snapshot_max = 0; 235 236static void memorystatus_clear_errors(void); 237static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages); 238static uint32_t memorystatus_build_state(proc_t p); 239static void memorystatus_update_levels_locked(boolean_t critical_only); 240//static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); 241 242static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); 243static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); 244#if LEGACY_HIWATER 245static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); 246#endif 247 248static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); 249static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); 250 251#endif /* CONFIG_JETSAM */ 252 253/* VM pressure */ 254 255extern unsigned int vm_page_free_count; 256extern unsigned int vm_page_active_count; 257extern unsigned int vm_page_inactive_count; 258extern unsigned int vm_page_throttled_count; 259extern unsigned int vm_page_purgeable_count; 260extern unsigned int vm_page_wire_count; 261 262#if VM_PRESSURE_EVENTS 263 264#include "vm_pressure.h" 265 266extern boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical); 267 268vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; 269 270#if CONFIG_MEMORYSTATUS 271unsigned int memorystatus_available_pages = (unsigned int)-1; 272unsigned int memorystatus_available_pages_pressure = 0; 273unsigned int memorystatus_available_pages_critical = 0; 274unsigned int memorystatus_frozen_count = 0; 275unsigned int memorystatus_suspended_count = 0; 276 277/* 278 * We use this flag to signal if we have any HWM offenders 279 * on the system. This way we can reduce the number of wakeups 280 * of the memorystatus_thread when the system is between the 281 * "pressure" and "critical" threshold. 282 * 283 * The (re-)setting of this variable is done without any locks 284 * or synchronization simply because it is not possible (currently) 285 * to keep track of HWM offenders that drop down below their memory 286 * limit and/or exit. So, we choose to burn a couple of wasted wakeups 287 * by allowing the unguarded modification of this variable. 288 */ 289boolean_t memorystatus_hwm_candidates = 0; 290 291static int memorystatus_send_note(int event_code, void *data, size_t data_length); 292#endif /* CONFIG_MEMORYSTATUS */ 293 294#endif /* VM_PRESSURE_EVENTS */ 295 296/* Freeze */ 297 298#if CONFIG_FREEZE 299 300boolean_t memorystatus_freeze_enabled = FALSE; 301int memorystatus_freeze_wakeup = 0; 302 303static inline boolean_t memorystatus_can_freeze_processes(void); 304static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); 305 306static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); 307 308/* Thresholds */ 309static unsigned int memorystatus_freeze_threshold = 0; 310 311static unsigned int memorystatus_freeze_pages_min = 0; 312static unsigned int memorystatus_freeze_pages_max = 0; 313 314static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; 315 316/* Stats */ 317static uint64_t memorystatus_freeze_count = 0; 318static uint64_t memorystatus_freeze_pageouts = 0; 319 320/* Throttling */ 321static throttle_interval_t throttle_intervals[] = { 322 { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */ 323 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */ 324}; 325 326static uint64_t memorystatus_freeze_throttle_count = 0; 327 328static unsigned int memorystatus_suspended_footprint_total = 0; 329 330#endif /* CONFIG_FREEZE */ 331 332/* Debug */ 333 334extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); 335 336#if DEVELOPMENT || DEBUG 337 338#if CONFIG_JETSAM 339 340/* Debug aid to aid determination of limit */ 341 342static int 343sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS 344{ 345#pragma unused(oidp, arg2) 346 proc_t p; 347 unsigned int b = 0; 348 int error, enable = 0; 349 int32_t memlimit; 350 351 error = SYSCTL_OUT(req, arg1, sizeof(int)); 352 if (error || !req->newptr) { 353 return (error); 354 } 355 356 error = SYSCTL_IN(req, &enable, sizeof(int)); 357 if (error || !req->newptr) { 358 return (error); 359 } 360 361 if (!(enable == 0 || enable == 1)) { 362 return EINVAL; 363 } 364 365 proc_list_lock(); 366 367 p = memorystatus_get_first_proc_locked(&b, TRUE); 368 while (p) { 369 if (enable) { 370 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { 371 memlimit = -1; 372 } else { 373 memlimit = p->p_memstat_memlimit; 374 } 375 } else { 376 memlimit = -1; 377 } 378 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 379 380 if (memlimit == -1) { 381 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; 382 } else { 383 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { 384 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; 385 } 386 } 387 388 p = memorystatus_get_next_proc_locked(&b, p, TRUE); 389 } 390 391 memorystatus_highwater_enabled = enable; 392 393 proc_list_unlock(); 394 395 return 0; 396} 397 398SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); 399 400SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); 401 402SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); 403SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); 404SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); 405SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); 406 407/* Diagnostic code */ 408 409enum { 410 kJetsamDiagnosticModeNone = 0, 411 kJetsamDiagnosticModeAll = 1, 412 kJetsamDiagnosticModeStopAtFirstActive = 2, 413 kJetsamDiagnosticModeCount 414} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone; 415 416static int jetsam_diagnostic_suspended_one_active_proc = 0; 417 418static int 419sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS 420{ 421#pragma unused(arg1, arg2) 422 423 const char *diagnosticStrings[] = { 424 "jetsam: diagnostic mode: resetting critical level.", 425 "jetsam: diagnostic mode: will examine all processes", 426 "jetsam: diagnostic mode: will stop at first active process" 427 }; 428 429 int error, val = jetsam_diagnostic_mode; 430 boolean_t changed = FALSE; 431 432 error = sysctl_handle_int(oidp, &val, 0, req); 433 if (error || !req->newptr) 434 return (error); 435 if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) { 436 printf("jetsam: diagnostic mode: invalid value - %d\n", val); 437 return EINVAL; 438 } 439 440 proc_list_lock(); 441 442 if ((unsigned int) val != jetsam_diagnostic_mode) { 443 jetsam_diagnostic_mode = val; 444 445 memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive; 446 447 switch (jetsam_diagnostic_mode) { 448 case kJetsamDiagnosticModeNone: 449 /* Already cleared */ 450 break; 451 case kJetsamDiagnosticModeAll: 452 memorystatus_jetsam_policy |= kPolicyDiagnoseAll; 453 break; 454 case kJetsamDiagnosticModeStopAtFirstActive: 455 memorystatus_jetsam_policy |= kPolicyDiagnoseFirst; 456 break; 457 default: 458 /* Already validated */ 459 break; 460 } 461 462 memorystatus_update_levels_locked(FALSE); 463 changed = TRUE; 464 } 465 466 proc_list_unlock(); 467 468 if (changed) { 469 printf("%s\n", diagnosticStrings[val]); 470 } 471 472 return (0); 473} 474 475SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, 476 &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode"); 477 478SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); 479 480#if VM_PRESSURE_EVENTS 481 482SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); 483 484 485/* 486 * This routine is used for targeted notifications 487 * regardless of system memory pressure. 488 * "memnote" is the current user. 489 */ 490 491static int 492sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS 493{ 494#pragma unused(arg1, arg2) 495 496 int error = 0, pid = 0; 497 int ret = 0; 498 struct knote *kn = NULL; 499 500 error = sysctl_handle_int(oidp, &pid, 0, req); 501 if (error || !req->newptr) 502 return (error); 503 504 /* 505 * We inspect 3 lists here for targeted notifications: 506 * - memorystatus_klist 507 * - vm_pressure_klist 508 * - vm_pressure_dormant_klist 509 * 510 * The vm_pressure_* lists are tied to the old VM_PRESSURE 511 * notification mechanism. We intend to stop using that 512 * mechanism and, in turn, get rid of the 2 lists and 513 * vm_dispatch_pressure_note_to_pid() too. 514 */ 515 516 memorystatus_klist_lock(); 517 kn = vm_find_knote_from_pid(pid, &memorystatus_klist); 518 if (kn) { 519 /* 520 * Forcibly send this pid a "warning" memory pressure notification. 521 */ 522 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; 523 KNOTE(&memorystatus_klist, kMemorystatusPressure); 524 ret = 0; 525 } else { 526 ret = vm_dispatch_pressure_note_to_pid(pid, FALSE); 527 } 528 memorystatus_klist_unlock(); 529 530 return ret; 531} 532 533SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 534 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", ""); 535 536#endif /* VM_PRESSURE_EVENTS */ 537 538#endif /* CONFIG_JETSAM */ 539 540#if CONFIG_FREEZE 541 542SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); 543 544SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); 545SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); 546 547SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, ""); 548SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); 549SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, ""); 550SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); 551 552boolean_t memorystatus_freeze_throttle_enabled = TRUE; 553SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); 554 555/* 556 * Manual trigger of freeze and thaw for dev / debug kernels only. 557 */ 558static int 559sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS 560{ 561#pragma unused(arg1, arg2) 562 563 int error, pid = 0; 564 proc_t p; 565 566 if (memorystatus_freeze_enabled == FALSE) { 567 return ENOTSUP; 568 } 569 570 error = sysctl_handle_int(oidp, &pid, 0, req); 571 if (error || !req->newptr) 572 return (error); 573 574 p = proc_find(pid); 575 if (p != NULL) { 576 uint32_t purgeable, wired, clean, dirty; 577 boolean_t shared; 578 uint32_t max_pages = 0; 579 580 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { 581 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); 582 } else { 583 max_pages = UINT32_MAX - 1; 584 } 585 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); 586 proc_rele(p); 587 588 if (error) 589 error = EIO; 590 return error; 591 } 592 return EINVAL; 593} 594 595SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 596 0, 0, &sysctl_memorystatus_freeze, "I", ""); 597 598static int 599sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS 600{ 601#pragma unused(arg1, arg2) 602 603 int error, pid = 0; 604 proc_t p; 605 606 if (memorystatus_freeze_enabled == FALSE) { 607 return ENOTSUP; 608 } 609 610 error = sysctl_handle_int(oidp, &pid, 0, req); 611 if (error || !req->newptr) 612 return (error); 613 614 p = proc_find(pid); 615 if (p != NULL) { 616 error = task_thaw(p->task); 617 proc_rele(p); 618 619 if (error) 620 error = EIO; 621 return error; 622 } 623 624 return EINVAL; 625} 626 627SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 628 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); 629 630#endif /* CONFIG_FREEZE */ 631 632#endif /* DEVELOPMENT || DEBUG */ 633 634extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, 635 void *parameter, 636 integer_t priority, 637 thread_t *new_thread); 638 639#if CONFIG_JETSAM 640/* 641 * Sort processes by size for a single jetsam bucket. 642 */ 643 644static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) 645{ 646 proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; 647 uint32_t pages = 0, max_pages = 0; 648 memstat_bucket_t *current_bucket; 649 650 if (bucket_index >= MEMSTAT_BUCKET_COUNT) { 651 return; 652 } 653 654 current_bucket = &memstat_bucket[bucket_index]; 655 656 p = TAILQ_FIRST(¤t_bucket->list); 657 658 if (p) { 659 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); 660 max_pages = pages; 661 insert_after_proc = NULL; 662 663 p = TAILQ_NEXT(p, p_memstat_list); 664 665restart: 666 while (p) { 667 668 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); 669 670 if (pages > max_pages) { 671 max_pages = pages; 672 max_proc = p; 673 } 674 675 p = TAILQ_NEXT(p, p_memstat_list); 676 } 677 678 if (max_proc) { 679 680 TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); 681 682 if (insert_after_proc == NULL) { 683 TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); 684 } else { 685 TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); 686 } 687 688 insert_after_proc = max_proc; 689 690 /* Reset parameters for the new search. */ 691 p = TAILQ_NEXT(max_proc, p_memstat_list); 692 if (p) { 693 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); 694 max_pages = pages; 695 } 696 max_proc = NULL; 697 698 goto restart; 699 } 700 } 701} 702 703#endif /* CONFIG_JETSAM */ 704 705static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { 706 memstat_bucket_t *current_bucket; 707 proc_t next_p; 708 709 if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { 710 return NULL; 711 } 712 713 current_bucket = &memstat_bucket[*bucket_index]; 714 next_p = TAILQ_FIRST(¤t_bucket->list); 715 if (!next_p && search) { 716 while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { 717 current_bucket = &memstat_bucket[*bucket_index]; 718 next_p = TAILQ_FIRST(¤t_bucket->list); 719 } 720 } 721 722 return next_p; 723} 724 725static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) { 726 memstat_bucket_t *current_bucket; 727 proc_t next_p; 728 729 if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { 730 return NULL; 731 } 732 733 next_p = TAILQ_NEXT(p, p_memstat_list); 734 while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { 735 current_bucket = &memstat_bucket[*bucket_index]; 736 next_p = TAILQ_FIRST(¤t_bucket->list); 737 } 738 739 return next_p; 740} 741 742__private_extern__ void 743memorystatus_init(void) 744{ 745 thread_t thread = THREAD_NULL; 746 kern_return_t result; 747 int i; 748 749#if CONFIG_FREEZE 750 memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; 751 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; 752#endif 753 754 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); 755 756 /* Init buckets */ 757 for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { 758 TAILQ_INIT(&memstat_bucket[i].list); 759 memstat_bucket[i].count = 0; 760 } 761 762 memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); 763 764 /* Apply overrides */ 765 PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); 766 assert(delta_percentage < 100); 767 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); 768 assert(critical_threshold_percentage < 100); 769 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); 770 assert(idle_offset_percentage < 100); 771 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); 772 assert(pressure_threshold_percentage < 100); 773 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); 774 assert(freeze_threshold_percentage < 100); 775 776#if CONFIG_JETSAM 777 memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; 778 memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; 779 memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; 780 781 memorystatus_jetsam_snapshot_max = maxproc; 782 memorystatus_jetsam_snapshot = 783 (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) + 784 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); 785 if (!memorystatus_jetsam_snapshot) { 786 panic("Could not allocate memorystatus_jetsam_snapshot"); 787 } 788 789 /* No contention at this point */ 790 memorystatus_update_levels_locked(FALSE); 791#endif 792 793#if CONFIG_FREEZE 794 memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta; 795#endif 796 797 result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); 798 if (result == KERN_SUCCESS) { 799 thread_deallocate(thread); 800 } else { 801 panic("Could not create memorystatus_thread"); 802 } 803} 804 805/* Centralised for the purposes of allowing panic-on-jetsam */ 806extern void 807vm_wake_compactor_swapper(void); 808 809/* 810 * The jetsam no frills kill call 811 * Return: 0 on success 812 * error code on failure (EINVAL...) 813 */ 814static int 815jetsam_do_kill(proc_t p, int jetsam_flags) { 816 int error = 0; 817 error = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); 818 return(error); 819} 820 821/* 822 * Wrapper for processes exiting with memorystatus details 823 */ 824static boolean_t 825memorystatus_do_kill(proc_t p, uint32_t cause) { 826 827 int error = 0; 828 __unused pid_t victim_pid = p->p_pid; 829 830 KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START, 831 victim_pid, cause, vm_page_free_count, 0, 0); 832 833#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) 834 if (memorystatus_jetsam_panic_debug & (1 << cause)) { 835 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); 836 } 837#else 838#pragma unused(cause) 839#endif 840 int jetsam_flags = P_LTERM_JETSAM; 841 switch (cause) { 842 case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; 843 case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; 844 case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; 845 case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; 846 case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break; 847 case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; 848 case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; 849 } 850 error = jetsam_do_kill(p, jetsam_flags); 851 852 KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, 853 victim_pid, cause, vm_page_free_count, error, 0); 854 855 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 856 vm_wake_compactor_swapper(); 857 } 858 859 return (error == 0); 860} 861 862/* 863 * Node manipulation 864 */ 865 866static void 867memorystatus_check_levels_locked(void) { 868#if CONFIG_JETSAM 869 /* Update levels */ 870 memorystatus_update_levels_locked(TRUE); 871#endif 872} 873 874static void 875memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) 876{ 877 proc_t p; 878 uint64_t current_time; 879 memstat_bucket_t *demotion_bucket; 880 881 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); 882 883 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); 884 885 current_time = mach_absolute_time(); 886 887 proc_list_lock(); 888 889 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; 890 p = TAILQ_FIRST(&demotion_bucket->list); 891 892 while (p) { 893 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); 894 895 assert(p->p_memstat_idledeadline); 896 assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS); 897 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED); 898 899 if (current_time >= p->p_memstat_idledeadline) { 900#if DEBUG || DEVELOPMENT 901 if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { 902 printf("memorystatus_perform_idle_demotion: moving process %d [%s] to idle band, but never dirtied (0x%x)!\n", 903 p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"), p->p_memstat_dirty); 904 } 905#endif 906 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 907 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false); 908 909 // The prior process has moved out of the demotion bucket, so grab the new head and continue 910 p = TAILQ_FIRST(&demotion_bucket->list); 911 continue; 912 } 913 914 // No further candidates 915 break; 916 } 917 918 memorystatus_reschedule_idle_demotion_locked(); 919 920 proc_list_unlock(); 921 922 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); 923} 924 925static void 926memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) 927{ 928 boolean_t present_in_deferred_bucket = FALSE; 929 930 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 931 present_in_deferred_bucket = TRUE; 932 } 933 934 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", 935 p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); 936 937 assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); 938 939 if (set_state) { 940 assert(p->p_memstat_idledeadline == 0); 941 p->p_memstat_dirty |= P_DIRTY_DEFER_IN_PROGRESS; 942 p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; 943 } 944 945 assert(p->p_memstat_idledeadline); 946 947 if (present_in_deferred_bucket == FALSE) { 948 memorystatus_scheduled_idle_demotions++; 949 } 950} 951 952static void 953memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) 954{ 955 boolean_t present_in_deferred_bucket = FALSE; 956 957 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 958 present_in_deferred_bucket = TRUE; 959 assert(p->p_memstat_idledeadline); 960 } 961 962 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", 963 p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); 964 965 966 if (clear_state) { 967 p->p_memstat_idledeadline = 0; 968 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; 969 } 970 971 if (present_in_deferred_bucket == TRUE) { 972 memorystatus_scheduled_idle_demotions--; 973 } 974 975 assert(memorystatus_scheduled_idle_demotions >= 0); 976} 977 978static void 979memorystatus_reschedule_idle_demotion_locked(void) { 980 if (0 == memorystatus_scheduled_idle_demotions) { 981 if (memstat_idle_demotion_deadline) { 982 /* Transitioned 1->0, so cancel next call */ 983 thread_call_cancel(memorystatus_idle_demotion_call); 984 memstat_idle_demotion_deadline = 0; 985 } 986 } else { 987 memstat_bucket_t *demotion_bucket; 988 proc_t p; 989 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; 990 p = TAILQ_FIRST(&demotion_bucket->list); 991 992 assert(p && p->p_memstat_idledeadline); 993 994 if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ 995 thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); 996 memstat_idle_demotion_deadline = p->p_memstat_idledeadline; 997 } 998 } 999} 1000 1001/* 1002 * List manipulation 1003 */ 1004 1005int 1006memorystatus_add(proc_t p, boolean_t locked) 1007{ 1008 memstat_bucket_t *bucket; 1009 1010 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); 1011 1012 if (!locked) { 1013 proc_list_lock(); 1014 } 1015 1016 /* Processes marked internal do not have priority tracked */ 1017 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1018 goto exit; 1019 } 1020 1021 bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 1022 1023 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1024 assert(bucket->count == memorystatus_scheduled_idle_demotions); 1025 } 1026 1027 TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); 1028 bucket->count++; 1029 1030 memorystatus_list_count++; 1031 1032 memorystatus_check_levels_locked(); 1033 1034exit: 1035 if (!locked) { 1036 proc_list_unlock(); 1037 } 1038 1039 return 0; 1040} 1041 1042static void 1043memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert) 1044{ 1045 memstat_bucket_t *old_bucket, *new_bucket; 1046 1047 assert(priority < MEMSTAT_BUCKET_COUNT); 1048 1049 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ 1050 if ((p->p_listflag & P_LIST_EXITED) != 0) { 1051 return; 1052 } 1053 1054 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n", 1055 p->p_pid, priority, head_insert ? "head" : "tail"); 1056 1057 old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 1058 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1059 assert(old_bucket->count == (memorystatus_scheduled_idle_demotions + 1)); 1060 } 1061 1062 TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); 1063 old_bucket->count--; 1064 1065 new_bucket = &memstat_bucket[priority]; 1066 if (head_insert) 1067 TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list); 1068 else 1069 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); 1070 new_bucket->count++; 1071 1072#if CONFIG_JETSAM 1073 if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { 1074 1075 /* 1076 * Adjust memory limit based on if the task is going to/from foreground and background. 1077 */ 1078 1079 if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || 1080 ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { 1081 int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; 1082 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 1083 1084 if (memlimit <= 0) { 1085 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; 1086 } else { 1087 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; 1088 } 1089 } 1090 } 1091#endif 1092 1093 p->p_memstat_effectivepriority = priority; 1094 1095 memorystatus_check_levels_locked(); 1096} 1097 1098int 1099memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit) 1100{ 1101 int ret; 1102 boolean_t head_insert = false; 1103 1104#if !CONFIG_JETSAM 1105#pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit) 1106#endif 1107 1108 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); 1109 1110 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); 1111 1112 if (priority == -1) { 1113 /* Use as shorthand for default priority */ 1114 priority = JETSAM_PRIORITY_DEFAULT; 1115 } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1116 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ 1117 priority = JETSAM_PRIORITY_IDLE; 1118 } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { 1119 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ 1120 priority = JETSAM_PRIORITY_IDLE; 1121 head_insert = true; 1122 } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { 1123 /* Sanity check */ 1124 ret = EINVAL; 1125 goto out; 1126 } 1127 1128 proc_list_lock(); 1129 1130 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); 1131 1132 if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { 1133 ret = EALREADY; 1134 proc_list_unlock(); 1135 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid); 1136 goto out; 1137 } 1138 1139 if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) { 1140 /* 1141 * This could happen when a process calling posix_spawn() is exiting on the jetsam thread. 1142 */ 1143 ret = EBUSY; 1144 proc_list_unlock(); 1145 goto out; 1146 } 1147 1148 p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; 1149 p->p_memstat_userdata = user_data; 1150 p->p_memstat_requestedpriority = priority; 1151 1152#if CONFIG_JETSAM 1153 if (update_memlimit) { 1154 p->p_memstat_memlimit = memlimit; 1155 if (memlimit_background) { 1156 /* Will be set as priority is updated */ 1157 p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; 1158 1159 /* Cannot have a background memory limit and be fatal. */ 1160 is_fatal_limit = FALSE; 1161 1162 } else { 1163 /* Otherwise, apply now */ 1164 if (memorystatus_highwater_enabled) { 1165 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); 1166 } 1167 } 1168 1169 if (is_fatal_limit || memlimit <= 0) { 1170 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; 1171 } else { 1172 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; 1173 } 1174 } 1175#endif 1176 1177 /* 1178 * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. 1179 * But, we could be removing it from the bucket. 1180 * Check and take appropriate steps if so. 1181 */ 1182 1183 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1184 1185 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1186 } 1187 1188 memorystatus_update_priority_locked(p, priority, head_insert); 1189 1190 proc_list_unlock(); 1191 ret = 0; 1192 1193out: 1194 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); 1195 1196 return ret; 1197} 1198 1199int 1200memorystatus_remove(proc_t p, boolean_t locked) 1201{ 1202 int ret; 1203 memstat_bucket_t *bucket; 1204 1205 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid); 1206 1207 if (!locked) { 1208 proc_list_lock(); 1209 } 1210 1211 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); 1212 1213 bucket = &memstat_bucket[p->p_memstat_effectivepriority]; 1214 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1215 assert(bucket->count == memorystatus_scheduled_idle_demotions); 1216 } 1217 1218 TAILQ_REMOVE(&bucket->list, p, p_memstat_list); 1219 bucket->count--; 1220 1221 memorystatus_list_count--; 1222 1223 /* If awaiting demotion to the idle band, clean up */ 1224 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 1225 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1226 memorystatus_reschedule_idle_demotion_locked(); 1227 } 1228 1229 memorystatus_check_levels_locked(); 1230 1231#if CONFIG_FREEZE 1232 if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { 1233 memorystatus_frozen_count--; 1234 } 1235 1236 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { 1237 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; 1238 memorystatus_suspended_count--; 1239 } 1240#endif 1241 1242 if (!locked) { 1243 proc_list_unlock(); 1244 } 1245 1246 if (p) { 1247 ret = 0; 1248 } else { 1249 ret = ESRCH; 1250 } 1251 1252 return ret; 1253} 1254 1255static boolean_t 1256memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { 1257 /* See that the process isn't marked for termination */ 1258 if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { 1259 return FALSE; 1260 } 1261 1262 /* Idle exit requires that process be tracked */ 1263 if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && 1264 !(pcontrol & PROC_DIRTY_TRACK)) { 1265 return FALSE; 1266 } 1267 1268 /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ 1269 if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && 1270 !(pcontrol & PROC_DIRTY_TRACK)) { 1271 return FALSE; 1272 } 1273 1274 /* Deferral is only relevant if idle exit is specified */ 1275 if ((pcontrol & PROC_DIRTY_DEFER) && 1276 !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { 1277 return FALSE; 1278 } 1279 1280 return TRUE; 1281} 1282 1283static void 1284memorystatus_update_idle_priority_locked(proc_t p) { 1285 int32_t priority; 1286 1287 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); 1288 1289 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { 1290 priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE; 1291 } else { 1292 priority = p->p_memstat_requestedpriority; 1293 } 1294 1295 if (priority != p->p_memstat_effectivepriority) { 1296 memorystatus_update_priority_locked(p, priority, false); 1297 } 1298} 1299 1300/* 1301 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle 1302 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted 1303 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low 1304 * priority idle band when clean (and killed earlier, protecting higher priority procesess). 1305 * 1306 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by 1307 * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band 1308 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to 1309 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle 1310 * band. The deferral can be cleared early by clearing the appropriate flag. 1311 * 1312 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process 1313 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be 1314 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. 1315 */ 1316 1317int 1318memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { 1319 unsigned int old_dirty; 1320 boolean_t reschedule = FALSE; 1321 boolean_t already_deferred = FALSE; 1322 boolean_t defer_now = FALSE; 1323 int ret; 1324 1325 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), 1326 p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); 1327 1328 proc_list_lock(); 1329 1330 if ((p->p_listflag & P_LIST_EXITED) != 0) { 1331 /* 1332 * Process is on its way out. 1333 */ 1334 ret = EBUSY; 1335 goto exit; 1336 } 1337 1338 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1339 ret = EPERM; 1340 goto exit; 1341 } 1342 1343 if (!memorystatus_validate_track_flags(p, pcontrol)) { 1344 ret = EINVAL; 1345 goto exit; 1346 } 1347 1348 old_dirty = p->p_memstat_dirty; 1349 1350 /* These bits are cumulative, as per <rdar://problem/11159924> */ 1351 if (pcontrol & PROC_DIRTY_TRACK) { 1352 p->p_memstat_dirty |= P_DIRTY_TRACK; 1353 } 1354 1355 if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { 1356 p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; 1357 } 1358 1359 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { 1360 p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS; 1361 } 1362 1363 if (old_dirty & P_DIRTY_DEFER_IN_PROGRESS) { 1364 already_deferred = TRUE; 1365 } 1366 1367 /* This can be set and cleared exactly once. */ 1368 if (pcontrol & PROC_DIRTY_DEFER) { 1369 1370 if ( !(old_dirty & P_DIRTY_DEFER)) { 1371 p->p_memstat_dirty |= P_DIRTY_DEFER; 1372 } 1373 1374 defer_now = TRUE; 1375 } 1376 1377 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n", 1378 ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", 1379 defer_now ? "Y" : "N", 1380 p->p_memstat_dirty & P_DIRTY ? "Y" : "N", 1381 p->p_pid); 1382 1383 /* Kick off or invalidate the idle exit deferment if there's a state transition. */ 1384 if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { 1385 if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && 1386 defer_now && !already_deferred) { 1387 1388 /* 1389 * Request to defer a clean process that's idle-exit enabled 1390 * and not already in the jetsam deferred band. 1391 */ 1392 memorystatus_schedule_idle_demotion_locked(p, TRUE); 1393 reschedule = TRUE; 1394 1395 } else if (!defer_now && already_deferred) { 1396 1397 /* 1398 * Either the process is no longer idle-exit enabled OR 1399 * there's a request to cancel a currently active deferral. 1400 */ 1401 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1402 reschedule = TRUE; 1403 } 1404 } else { 1405 1406 /* 1407 * We are trying to operate on a dirty process. Dirty processes have to 1408 * be removed from the deferred band. The question is do we reset the 1409 * deferred state or not? 1410 * 1411 * This could be a legal request like: 1412 * - this process had opted into the JETSAM_DEFERRED band 1413 * - but it's now dirty and requests to opt out. 1414 * In this case, we remove the process from the band and reset its 1415 * state too. It'll opt back in properly when needed. 1416 * 1417 * OR, this request could be a user-space bug. E.g.: 1418 * - this process had opted into the JETSAM_DEFERRED band when clean 1419 * - and, then issues another request to again put it into the band except 1420 * this time the process is dirty. 1421 * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of 1422 * the deferred band with its state intact. So our request below is no-op. 1423 * But we do it here anyways for coverage. 1424 * 1425 * memorystatus_update_idle_priority_locked() 1426 * single-mindedly treats a dirty process as "cannot be in the deferred band". 1427 */ 1428 1429 if (!defer_now && already_deferred) { 1430 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1431 reschedule = TRUE; 1432 } else { 1433 memorystatus_invalidate_idle_demotion_locked(p, FALSE); 1434 reschedule = TRUE; 1435 } 1436 } 1437 1438 memorystatus_update_idle_priority_locked(p); 1439 1440 if (reschedule) { 1441 memorystatus_reschedule_idle_demotion_locked(); 1442 } 1443 1444 ret = 0; 1445 1446exit: 1447 proc_list_unlock(); 1448 1449 return ret; 1450} 1451 1452int 1453memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { 1454 int ret; 1455 boolean_t kill = false; 1456 boolean_t reschedule = FALSE; 1457 boolean_t was_dirty = FALSE; 1458 boolean_t now_dirty = FALSE; 1459 1460 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); 1461 1462 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0); 1463 1464 proc_list_lock(); 1465 1466 if ((p->p_listflag & P_LIST_EXITED) != 0) { 1467 /* 1468 * Process is on its way out. 1469 */ 1470 ret = EBUSY; 1471 goto exit; 1472 } 1473 1474 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1475 ret = EPERM; 1476 goto exit; 1477 } 1478 1479 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) 1480 was_dirty = TRUE; 1481 1482 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { 1483 /* Dirty tracking not enabled */ 1484 ret = EINVAL; 1485 } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { 1486 /* 1487 * Process is set to be terminated and we're attempting to mark it dirty. 1488 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>. 1489 */ 1490 ret = EBUSY; 1491 } else { 1492 int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; 1493 if (pcontrol && !(p->p_memstat_dirty & flag)) { 1494 /* Mark the process as having been dirtied at some point */ 1495 p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); 1496 memorystatus_dirty_count++; 1497 ret = 0; 1498 } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { 1499 if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) { 1500 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ 1501 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1502 kill = true; 1503 } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { 1504 /* Kill previously terminated processes if set clean */ 1505 kill = true; 1506 } 1507 p->p_memstat_dirty &= ~flag; 1508 memorystatus_dirty_count--; 1509 ret = 0; 1510 } else { 1511 /* Already set */ 1512 ret = EALREADY; 1513 } 1514 } 1515 1516 if (ret != 0) { 1517 goto exit; 1518 } 1519 1520 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) 1521 now_dirty = TRUE; 1522 1523 if ((was_dirty == TRUE && now_dirty == FALSE) || 1524 (was_dirty == FALSE && now_dirty == TRUE)) { 1525 1526 /* Manage idle exit deferral, if applied */ 1527 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == 1528 (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { 1529 1530 /* 1531 * P_DIRTY_DEFER_IN_PROGRESS means the process is in the deferred band OR it might be heading back 1532 * there once it's clean again and has some protection window left. 1533 */ 1534 1535 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { 1536 /* 1537 * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE" 1538 * 1539 * The process will move from the deferred band to its higher requested 1540 * jetsam band. But we don't clear its state i.e. we want to remember that 1541 * this process was part of the "deferred" band and will return to it. 1542 * 1543 * This way, we don't let it age beyond the protection 1544 * window when it returns to "clean". All the while giving 1545 * it a chance to perform its work while "dirty". 1546 * 1547 */ 1548 memorystatus_invalidate_idle_demotion_locked(p, FALSE); 1549 reschedule = TRUE; 1550 } else { 1551 1552 /* 1553 * Process is back from "dirty" to "clean". 1554 * 1555 * Is its timer up OR does it still have some protection 1556 * window left? 1557 */ 1558 1559 if (mach_absolute_time() >= p->p_memstat_idledeadline) { 1560 /* 1561 * The process' deadline has expired. It currently 1562 * does not reside in the DEFERRED bucket. 1563 * 1564 * It's on its way to the JETSAM_PRIORITY_IDLE 1565 * bucket via memorystatus_update_idle_priority_locked() 1566 * below. 1567 1568 * So all we need to do is reset all the state on the 1569 * process that's related to the DEFERRED bucket i.e. 1570 * the DIRTY_DEFER_IN_PROGRESS flag and the timer deadline. 1571 * 1572 */ 1573 1574 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1575 reschedule = TRUE; 1576 } else { 1577 /* 1578 * It still has some protection window left and so 1579 * we just re-arm the timer without modifying any 1580 * state on the process. 1581 */ 1582 memorystatus_schedule_idle_demotion_locked(p, FALSE); 1583 reschedule = TRUE; 1584 } 1585 } 1586 } 1587 1588 memorystatus_update_idle_priority_locked(p); 1589 1590 /* If the deferral state changed, reschedule the demotion timer */ 1591 if (reschedule) { 1592 memorystatus_reschedule_idle_demotion_locked(); 1593 } 1594 } 1595 1596 if (kill) { 1597 psignal(p, SIGKILL); 1598 } 1599 1600exit: 1601 proc_list_unlock(); 1602 1603 return ret; 1604} 1605 1606int 1607memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) { 1608 1609 int ret = 0; 1610 1611 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty); 1612 1613 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0); 1614 1615 proc_list_lock(); 1616 1617 if ((p->p_listflag & P_LIST_EXITED) != 0) { 1618 /* 1619 * Process is on its way out. 1620 */ 1621 ret = EBUSY; 1622 goto exit; 1623 } 1624 1625 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 1626 ret = EPERM; 1627 goto exit; 1628 } 1629 1630 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { 1631 /* Dirty tracking not enabled */ 1632 ret = EINVAL; 1633 goto exit; 1634 } 1635 1636 if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER)) == 0) { 1637 ret = EINVAL; 1638 goto exit; 1639 } 1640 1641 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { 1642 p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS; 1643 } 1644 1645 /* This can be set and cleared exactly once. */ 1646 if (pcontrol & PROC_DIRTY_DEFER) { 1647 1648 if (p->p_memstat_dirty & P_DIRTY_DEFER) { 1649 1650 p->p_memstat_dirty &= ~P_DIRTY_DEFER; 1651 1652 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 1653 memorystatus_update_idle_priority_locked(p); 1654 memorystatus_reschedule_idle_demotion_locked(); 1655 } 1656 } 1657 1658 ret = 0; 1659exit: 1660 proc_list_unlock(); 1661 1662 return ret; 1663} 1664 1665int 1666memorystatus_dirty_get(proc_t p) { 1667 int ret = 0; 1668 1669 proc_list_lock(); 1670 1671 if (p->p_memstat_dirty & P_DIRTY_TRACK) { 1672 ret |= PROC_DIRTY_TRACKED; 1673 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { 1674 ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; 1675 } 1676 if (p->p_memstat_dirty & P_DIRTY) { 1677 ret |= PROC_DIRTY_IS_DIRTY; 1678 } 1679 if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) { 1680 ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS; 1681 } 1682 } 1683 1684 proc_list_unlock(); 1685 1686 return ret; 1687} 1688 1689int 1690memorystatus_on_terminate(proc_t p) { 1691 int sig; 1692 1693 proc_list_lock(); 1694 1695 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1696 1697 if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { 1698 /* Clean; mark as terminated and issue SIGKILL */ 1699 sig = SIGKILL; 1700 } else { 1701 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ 1702 sig = SIGTERM; 1703 } 1704 1705 proc_list_unlock(); 1706 1707 return sig; 1708} 1709 1710void 1711memorystatus_on_suspend(proc_t p) 1712{ 1713#if CONFIG_FREEZE 1714 uint32_t pages; 1715 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); 1716#endif 1717 proc_list_lock(); 1718#if CONFIG_FREEZE 1719 p->p_memstat_suspendedfootprint = pages; 1720 memorystatus_suspended_footprint_total += pages; 1721 memorystatus_suspended_count++; 1722#endif 1723 p->p_memstat_state |= P_MEMSTAT_SUSPENDED; 1724 proc_list_unlock(); 1725} 1726 1727void 1728memorystatus_on_resume(proc_t p) 1729{ 1730#if CONFIG_FREEZE 1731 boolean_t frozen; 1732 pid_t pid; 1733#endif 1734 1735 proc_list_lock(); 1736 1737#if CONFIG_FREEZE 1738 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); 1739 if (frozen) { 1740 memorystatus_frozen_count--; 1741 p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW; 1742 } 1743 1744 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; 1745 memorystatus_suspended_count--; 1746 1747 pid = p->p_pid; 1748#endif 1749 1750 p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); 1751 1752 proc_list_unlock(); 1753 1754#if CONFIG_FREEZE 1755 if (frozen) { 1756 memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; 1757 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); 1758 } 1759#endif 1760} 1761 1762void 1763memorystatus_on_inactivity(proc_t p) 1764{ 1765#pragma unused(p) 1766#if CONFIG_FREEZE 1767 /* Wake the freeze thread */ 1768 thread_wakeup((event_t)&memorystatus_freeze_wakeup); 1769#endif 1770} 1771 1772static uint32_t 1773memorystatus_build_state(proc_t p) { 1774 uint32_t snapshot_state = 0; 1775 1776 /* General */ 1777 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { 1778 snapshot_state |= kMemorystatusSuspended; 1779 } 1780 if (p->p_memstat_state & P_MEMSTAT_FROZEN) { 1781 snapshot_state |= kMemorystatusFrozen; 1782 } 1783 if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) { 1784 snapshot_state |= kMemorystatusWasThawed; 1785 } 1786 1787 /* Tracking */ 1788 if (p->p_memstat_dirty & P_DIRTY_TRACK) { 1789 snapshot_state |= kMemorystatusTracked; 1790 } 1791 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { 1792 snapshot_state |= kMemorystatusSupportsIdleExit; 1793 } 1794 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { 1795 snapshot_state |= kMemorystatusDirty; 1796 } 1797 1798 return snapshot_state; 1799} 1800 1801#if !CONFIG_JETSAM 1802 1803static boolean_t 1804kill_idle_exit_proc(void) 1805{ 1806 proc_t p, victim_p = PROC_NULL; 1807 uint64_t current_time; 1808 boolean_t killed = FALSE; 1809 unsigned int i = 0; 1810 1811 /* Pick next idle exit victim. */ 1812 current_time = mach_absolute_time(); 1813 1814 proc_list_lock(); 1815 1816 p = memorystatus_get_first_proc_locked(&i, FALSE); 1817 while (p) { 1818 /* No need to look beyond the idle band */ 1819 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { 1820 break; 1821 } 1822 1823 if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { 1824 if (current_time >= p->p_memstat_idledeadline) { 1825 p->p_memstat_dirty |= P_DIRTY_TERMINATED; 1826 victim_p = proc_ref_locked(p); 1827 break; 1828 } 1829 } 1830 1831 p = memorystatus_get_next_proc_locked(&i, p, FALSE); 1832 } 1833 1834 proc_list_unlock(); 1835 1836 if (victim_p) { 1837 printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)")); 1838 killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit); 1839 proc_rele(victim_p); 1840 } 1841 1842 return killed; 1843} 1844#endif 1845 1846#if CONFIG_JETSAM 1847static void 1848memorystatus_thread_wake(void) { 1849 thread_wakeup((event_t)&memorystatus_wakeup); 1850} 1851#endif /* CONFIG_JETSAM */ 1852 1853extern void vm_pressure_response(void); 1854 1855static int 1856memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) 1857{ 1858 if (interval_ms) { 1859 assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC); 1860 } else { 1861 assert_wait(&memorystatus_wakeup, THREAD_UNINT); 1862 } 1863 1864 return thread_block(continuation); 1865} 1866 1867static void 1868memorystatus_thread(void *param __unused, wait_result_t wr __unused) 1869{ 1870 static boolean_t is_vm_privileged = FALSE; 1871#if CONFIG_JETSAM 1872 boolean_t post_snapshot = FALSE; 1873 uint32_t errors = 0; 1874 uint32_t hwm_kill = 0; 1875#endif 1876 1877 if (is_vm_privileged == FALSE) { 1878 /* 1879 * It's the first time the thread has run, so just mark the thread as privileged and block. 1880 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>. 1881 */ 1882 thread_wire(host_priv_self(), current_thread(), TRUE); 1883 is_vm_privileged = TRUE; 1884 1885 memorystatus_thread_block(0, memorystatus_thread); 1886 } 1887 1888#if CONFIG_JETSAM 1889 1890 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, 1891 memorystatus_available_pages, 0, 0, 0, 0); 1892 1893 /* 1894 * Jetsam aware version. 1895 * 1896 * The VM pressure notification thread is working it's way through clients in parallel. 1897 * 1898 * So, while the pressure notification thread is targeting processes in order of 1899 * increasing jetsam priority, we can hopefully reduce / stop it's work by killing 1900 * any processes that have exceeded their highwater mark. 1901 * 1902 * If we run out of HWM processes and our available pages drops below the critical threshold, then, 1903 * we target the least recently used process in order of increasing jetsam priority (exception: the FG band). 1904 */ 1905 while (is_thrashing(kill_under_pressure_cause) || 1906 memorystatus_available_pages <= memorystatus_available_pages_pressure) { 1907 boolean_t killed; 1908 int32_t priority; 1909 uint32_t cause; 1910 1911 if (kill_under_pressure_cause) { 1912 cause = kill_under_pressure_cause; 1913 } else { 1914 cause = kMemorystatusKilledVMPageShortage; 1915 } 1916 1917#if LEGACY_HIWATER 1918 /* Highwater */ 1919 killed = memorystatus_kill_hiwat_proc(&errors); 1920 if (killed) { 1921 hwm_kill++; 1922 post_snapshot = TRUE; 1923 goto done; 1924 } else { 1925 memorystatus_hwm_candidates = FALSE; 1926 } 1927 1928 /* No highwater processes to kill. Continue or stop for now? */ 1929 if (!is_thrashing(kill_under_pressure_cause) && 1930 (memorystatus_available_pages > memorystatus_available_pages_critical)) { 1931 /* 1932 * We are _not_ out of pressure but we are above the critical threshold and there's: 1933 * - no compressor thrashing 1934 * - no more HWM processes left. 1935 * For now, don't kill any other processes. 1936 */ 1937 1938 if (hwm_kill == 0) { 1939 memorystatus_thread_wasted_wakeup++; 1940 } 1941 1942 break; 1943 } 1944#endif 1945 1946 /* LRU */ 1947 killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); 1948 if (killed) { 1949 /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */ 1950 if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) { 1951 post_snapshot = TRUE; 1952 } 1953 goto done; 1954 } 1955 1956 if (memorystatus_available_pages <= memorystatus_available_pages_critical) { 1957 /* Under pressure and unable to kill a process - panic */ 1958 panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); 1959 } 1960 1961done: 1962 1963 /* 1964 * We do not want to over-kill when thrashing has been detected. 1965 * To avoid that, we reset the flag here and notify the 1966 * compressor. 1967 */ 1968 if (is_thrashing(kill_under_pressure_cause)) { 1969 kill_under_pressure_cause = 0; 1970 vm_thrashing_jetsam_done(); 1971 } 1972 } 1973 1974 kill_under_pressure_cause = 0; 1975 1976 if (errors) { 1977 memorystatus_clear_errors(); 1978 } 1979 1980#if VM_PRESSURE_EVENTS 1981 /* 1982 * LD: We used to target the foreground process first and foremost here. 1983 * Now, we target all processes, starting from the non-suspended, background 1984 * processes first. We will target foreground too. 1985 * 1986 * memorystatus_update_vm_pressure(TRUE); 1987 */ 1988 //vm_pressure_response(); 1989#endif 1990 1991 if (post_snapshot) { 1992 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + 1993 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); 1994 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); 1995 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); 1996 } 1997 1998 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, 1999 memorystatus_available_pages, 0, 0, 0, 0); 2000 2001#else /* CONFIG_JETSAM */ 2002 2003 /* 2004 * Jetsam not enabled 2005 */ 2006 2007#endif /* CONFIG_JETSAM */ 2008 2009 memorystatus_thread_block(0, memorystatus_thread); 2010} 2011 2012#if !CONFIG_JETSAM 2013/* 2014 * Returns TRUE: 2015 * when an idle-exitable proc was killed 2016 * Returns FALSE: 2017 * when there are no more idle-exitable procs found 2018 * when the attempt to kill an idle-exitable proc failed 2019 */ 2020boolean_t memorystatus_idle_exit_from_VM(void) { 2021 return(kill_idle_exit_proc()); 2022} 2023#endif /* !CONFIG_JETSAM */ 2024 2025#if CONFIG_JETSAM 2026 2027/* 2028 * Callback invoked when allowable physical memory footprint exceeded 2029 * (dirty pages + IOKit mappings) 2030 * 2031 * This is invoked for both advisory, non-fatal per-task high watermarks, 2032 * as well as the fatal task memory limits. 2033 */ 2034void 2035memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) 2036{ 2037 proc_t p = current_proc(); 2038 2039 if (warning == FALSE) { 2040 printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n", 2041 p->p_pid, p->p_comm, max_footprint_mb); 2042 } 2043 2044#if VM_PRESSURE_EVENTS 2045 if (warning == TRUE) { 2046 if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) { 2047 /* Print warning, since it's possible that task has not registered for pressure notifications */ 2048 printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); 2049 } 2050 return; 2051 } 2052#endif /* VM_PRESSURE_EVENTS */ 2053 2054 if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) { 2055 /* 2056 * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task 2057 * has violated either the system-wide per-task memory limit OR its own task limit. 2058 */ 2059 if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { 2060 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); 2061 } 2062 } else { 2063 /* 2064 * HWM offender exists. Done without locks or synchronization. 2065 * See comment near its declaration for more details. 2066 */ 2067 memorystatus_hwm_candidates = TRUE; 2068 } 2069} 2070 2071/* 2072 * This is invoked when cpulimits have been exceeded while in fatal mode. 2073 * The jetsam_flags do not apply as those are for memory related kills. 2074 * We call this routine so that the offending process is killed with 2075 * a non-zero exit status. 2076 */ 2077void 2078jetsam_on_ledger_cpulimit_exceeded(void) 2079{ 2080 int retval = 0; 2081 int jetsam_flags = 0; /* make it obvious */ 2082 proc_t p = current_proc(); 2083 2084 printf("task_exceeded_cpulimit: killing pid %d [%s]\n", 2085 p->p_pid, (p->p_comm ? p->p_comm : "(unknown)")); 2086 2087 retval = jetsam_do_kill(p, jetsam_flags); 2088 2089 if (retval) { 2090 printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n"); 2091 } 2092} 2093 2094static void 2095memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages) 2096{ 2097 assert(task); 2098 assert(footprint); 2099 2100 *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64); 2101 if (max_footprint) { 2102 *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); 2103 } 2104 if (max_footprint_lifetime) { 2105 *max_footprint_lifetime = (uint32_t)(get_task_resident_max(task) / PAGE_SIZE_64); 2106 } 2107 if (purgeable_pages) { 2108 *purgeable_pages = (uint32_t)(get_task_purgeable_size(task) / PAGE_SIZE_64); 2109 } 2110} 2111 2112 2113static void 2114memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) 2115{ 2116 unsigned int i; 2117 2118 for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { 2119 if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) { 2120 /* Update if the priority has changed since the snapshot was taken */ 2121 if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) { 2122 memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority; 2123 strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1); 2124 memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p); 2125 memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata; 2126 memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles; 2127 } 2128 memorystatus_jetsam_snapshot_list[i].killed = kill_cause; 2129 return; 2130 } 2131 } 2132} 2133 2134void memorystatus_pages_update(unsigned int pages_avail) 2135{ 2136 memorystatus_available_pages = pages_avail; 2137 2138#if VM_PRESSURE_EVENTS 2139 /* 2140 * Since memorystatus_available_pages changes, we should 2141 * re-evaluate the pressure levels on the system and 2142 * check if we need to wake the pressure thread. 2143 * We also update memorystatus_level in that routine. 2144 */ 2145 vm_pressure_response(); 2146 2147 if (memorystatus_available_pages <= memorystatus_available_pages_pressure) { 2148 2149 if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) { 2150 memorystatus_thread_wake(); 2151 } 2152 } 2153#else /* VM_PRESSURE_EVENTS */ 2154 2155 boolean_t critical, delta; 2156 2157 if (!memorystatus_delta) { 2158 return; 2159 } 2160 2161 critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; 2162 delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) 2163 || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; 2164 2165 if (critical || delta) { 2166 memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); 2167 memorystatus_thread_wake(); 2168 } 2169#endif /* VM_PRESSURE_EVENTS */ 2170} 2171 2172static boolean_t 2173memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) 2174{ 2175 clock_sec_t tv_sec; 2176 clock_usec_t tv_usec; 2177 2178 memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); 2179 2180 entry->pid = p->p_pid; 2181 strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); 2182 entry->priority = p->p_memstat_effectivepriority; 2183 memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages, &entry->max_pages_lifetime, &entry->purgeable_pages); 2184 entry->state = memorystatus_build_state(p); 2185 entry->user_data = p->p_memstat_userdata; 2186 memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); 2187 entry->fds = p->p_fd->fd_nfiles; 2188 2189 absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec); 2190 entry->cpu_time.tv_sec = tv_sec; 2191 entry->cpu_time.tv_usec = tv_usec; 2192 2193 return TRUE; 2194} 2195 2196static void 2197memorystatus_jetsam_snapshot_procs_locked(void) 2198{ 2199 proc_t p, next_p; 2200 unsigned int b = 0, i = 0; 2201 kern_return_t kr = KERN_SUCCESS; 2202 2203 mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; 2204 vm_statistics64_data_t vm_stat; 2205 2206 if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { 2207 printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr); 2208 memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats)); 2209 } else { 2210 memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count; 2211 memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count; 2212 memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count; 2213 memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count; 2214 memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count; 2215 memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count; 2216 2217 memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count; 2218 memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count; 2219 memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count; 2220 memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions; 2221 memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions; 2222 memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count; 2223 memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; 2224 } 2225 2226 next_p = memorystatus_get_first_proc_locked(&b, TRUE); 2227 while (next_p) { 2228 p = next_p; 2229 next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); 2230 2231 if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) { 2232 continue; 2233 } 2234 2235 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", 2236 p->p_pid, 2237 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], 2238 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); 2239 2240 if (++i == memorystatus_jetsam_snapshot_max) { 2241 break; 2242 } 2243 } 2244 2245 memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time(); 2246 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i; 2247} 2248 2249#if DEVELOPMENT || DEBUG 2250 2251static int 2252memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { 2253 int ret; 2254 memorystatus_jetsam_panic_options_t debug; 2255 2256 if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { 2257 return EINVAL; 2258 } 2259 2260 ret = copyin(buffer, &debug, buffer_size); 2261 if (ret) { 2262 return ret; 2263 } 2264 2265 /* Panic bits match kMemorystatusKilled* enum */ 2266 memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); 2267 2268 /* Copyout new value */ 2269 debug.data = memorystatus_jetsam_panic_debug; 2270 ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); 2271 2272 return ret; 2273} 2274 2275#endif 2276 2277/* 2278 * Jetsam a specific process. 2279 */ 2280static boolean_t 2281memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { 2282 boolean_t killed; 2283 proc_t p; 2284 2285 /* TODO - add a victim queue and push this into the main jetsam thread */ 2286 2287 p = proc_find(victim_pid); 2288 if (!p) { 2289 return FALSE; 2290 } 2291 2292 printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n", 2293 victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), 2294 jetsam_kill_cause_name[cause], memorystatus_available_pages); 2295 2296 proc_list_lock(); 2297 2298 if (memorystatus_jetsam_snapshot_count == 0) { 2299 memorystatus_jetsam_snapshot_procs_locked(); 2300 } 2301 2302 memorystatus_update_snapshot_locked(p, cause); 2303 proc_list_unlock(); 2304 2305 killed = memorystatus_do_kill(p, cause); 2306 proc_rele(p); 2307 2308 return killed; 2309} 2310 2311/* 2312 * Jetsam the first process in the queue. 2313 */ 2314static boolean_t 2315memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors) 2316{ 2317 pid_t aPid; 2318 proc_t p = PROC_NULL, next_p = PROC_NULL; 2319 boolean_t new_snapshot = FALSE, killed = FALSE; 2320 unsigned int i = 0; 2321 2322#ifndef CONFIG_FREEZE 2323#pragma unused(any) 2324#endif 2325 2326 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, 2327 memorystatus_available_pages, 0, 0, 0, 0); 2328 2329 proc_list_lock(); 2330 2331 memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); 2332 2333 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2334 while (next_p) { 2335#if DEVELOPMENT || DEBUG 2336 int activeProcess; 2337 int procSuspendedForDiagnosis; 2338#endif /* DEVELOPMENT || DEBUG */ 2339 2340 p = next_p; 2341 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 2342 2343#if DEVELOPMENT || DEBUG 2344 activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; 2345 procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; 2346#endif /* DEVELOPMENT || DEBUG */ 2347 2348 aPid = p->p_pid; 2349 2350 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { 2351 continue; 2352 } 2353 2354#if DEVELOPMENT || DEBUG 2355 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { 2356 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); 2357 continue; 2358 } 2359#endif /* DEVELOPMENT || DEBUG */ 2360 2361 if (cause == kMemorystatusKilledVnodes) 2362 { 2363 /* 2364 * If the system runs out of vnodes, we systematically jetsam 2365 * processes in hopes of stumbling onto a vnode gain that helps 2366 * the system recover. The process that happens to trigger 2367 * this path has no known relationship to the vnode consumption. 2368 * We attempt to safeguard that process e.g: do not jetsam it. 2369 */ 2370 2371 if (p == current_proc()) { 2372 /* do not jetsam the current process */ 2373 continue; 2374 } 2375 } 2376 2377#if CONFIG_FREEZE 2378 boolean_t skip; 2379 boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); 2380 if (any || reclaim_proc) { 2381 skip = FALSE; 2382 } else { 2383 skip = TRUE; 2384 } 2385 2386 if (skip) { 2387 continue; 2388 } else 2389#endif 2390 { 2391 if (priority) { 2392 *priority = p->p_memstat_effectivepriority; 2393 } 2394 2395 /* 2396 * Capture a snapshot if none exists and: 2397 * - priority was not requested (this is something other than an ambient kill) 2398 * - the priority was requested *and* the targeted process is not at idle priority 2399 */ 2400 if ((memorystatus_jetsam_snapshot_count == 0) && 2401 (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) { 2402 memorystatus_jetsam_snapshot_procs_locked(); 2403 new_snapshot = TRUE; 2404 } 2405 2406 /* 2407 * Mark as terminated so that if exit1() indicates success, but the process (for example) 2408 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see 2409 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the 2410 * acquisition of the proc lock. 2411 */ 2412 p->p_memstat_state |= P_MEMSTAT_TERMINATED; 2413 2414#if DEVELOPMENT || DEBUG 2415 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { 2416 MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", 2417 aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); 2418 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); 2419 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; 2420 if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { 2421 jetsam_diagnostic_suspended_one_active_proc = 1; 2422 printf("jetsam: returning after suspending first active proc - %d\n", aPid); 2423 } 2424 2425 p = proc_ref_locked(p); 2426 proc_list_unlock(); 2427 if (p) { 2428 task_suspend(p->task); 2429 proc_rele(p); 2430 killed = TRUE; 2431 } 2432 2433 goto exit; 2434 } else 2435#endif /* DEVELOPMENT || DEBUG */ 2436 { 2437 /* Shift queue, update stats */ 2438 memorystatus_update_snapshot_locked(p, cause); 2439 2440 p = proc_ref_locked(p); 2441 proc_list_unlock(); 2442 if (p) { 2443 printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n", 2444 ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ? 2445 "idle exiting pid" : "jetsam killing pid"), 2446 aPid, (p->p_comm ? p->p_comm : "(unknown)"), 2447 jetsam_kill_cause_name[cause], memorystatus_available_pages); 2448 killed = memorystatus_do_kill(p, cause); 2449 } 2450 2451 /* Success? */ 2452 if (killed) { 2453 proc_rele(p); 2454 goto exit; 2455 } 2456 2457 /* Failure - unwind and restart. */ 2458 proc_list_lock(); 2459 proc_rele_locked(p); 2460 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; 2461 p->p_memstat_state |= P_MEMSTAT_ERROR; 2462 *errors += 1; 2463 i = 0; 2464 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2465 } 2466 } 2467 } 2468 2469 proc_list_unlock(); 2470 2471exit: 2472 /* Clear snapshot if freshly captured and no target was found */ 2473 if (new_snapshot && !killed) { 2474 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 2475 } 2476 2477 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, 2478 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); 2479 2480 return killed; 2481} 2482 2483#if LEGACY_HIWATER 2484 2485static boolean_t 2486memorystatus_kill_hiwat_proc(uint32_t *errors) 2487{ 2488 pid_t aPid = 0; 2489 proc_t p = PROC_NULL, next_p = PROC_NULL; 2490 boolean_t new_snapshot = FALSE, killed = FALSE; 2491 unsigned int i = 0; 2492 2493 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, 2494 memorystatus_available_pages, 0, 0, 0, 0); 2495 2496 proc_list_lock(); 2497 memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); 2498 2499 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2500 while (next_p) { 2501 uint32_t footprint; 2502 boolean_t skip; 2503 2504 p = next_p; 2505 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 2506 2507 aPid = p->p_pid; 2508 2509 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { 2510 continue; 2511 } 2512 2513 /* skip if no limit set */ 2514 if (p->p_memstat_memlimit <= 0) { 2515 continue; 2516 } 2517 2518 /* skip if a currently inapplicable limit is encountered */ 2519 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { 2520 continue; 2521 } 2522 2523 footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); 2524 skip = (((int32_t)footprint) <= p->p_memstat_memlimit); 2525#if DEVELOPMENT || DEBUG 2526 if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { 2527 if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { 2528 continue; 2529 } 2530 } 2531#endif /* DEVELOPMENT || DEBUG */ 2532 2533#if CONFIG_FREEZE 2534 if (!skip) { 2535 if (p->p_memstat_state & P_MEMSTAT_LOCKED) { 2536 skip = TRUE; 2537 } else { 2538 skip = FALSE; 2539 } 2540 } 2541#endif 2542 2543 if (skip) { 2544 continue; 2545 } else { 2546 MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d Mb > 1 (%d Mb)\n", 2547 (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit); 2548 2549 if (memorystatus_jetsam_snapshot_count == 0) { 2550 memorystatus_jetsam_snapshot_procs_locked(); 2551 new_snapshot = TRUE; 2552 } 2553 2554 p->p_memstat_state |= P_MEMSTAT_TERMINATED; 2555 2556#if DEVELOPMENT || DEBUG 2557 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 2558 MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); 2559 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); 2560 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; 2561 2562 p = proc_ref_locked(p); 2563 proc_list_unlock(); 2564 if (p) { 2565 task_suspend(p->task); 2566 proc_rele(p); 2567 killed = TRUE; 2568 } 2569 2570 goto exit; 2571 } else 2572#endif /* DEVELOPMENT || DEBUG */ 2573 { 2574 memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat); 2575 2576 p = proc_ref_locked(p); 2577 proc_list_unlock(); 2578 if (p) { 2579 printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", 2580 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); 2581 killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); 2582 } 2583 2584 /* Success? */ 2585 if (killed) { 2586 proc_rele(p); 2587 goto exit; 2588 } 2589 2590 /* Failure - unwind and restart. */ 2591 proc_list_lock(); 2592 proc_rele_locked(p); 2593 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; 2594 p->p_memstat_state |= P_MEMSTAT_ERROR; 2595 *errors += 1; 2596 i = 0; 2597 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2598 } 2599 } 2600 } 2601 2602 proc_list_unlock(); 2603 2604exit: 2605 /* Clear snapshot if freshly captured and no target was found */ 2606 if (new_snapshot && !killed) { 2607 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 2608 } 2609 2610 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, 2611 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); 2612 2613 return killed; 2614} 2615 2616#endif /* LEGACY_HIWATER */ 2617 2618static boolean_t 2619memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { 2620 /* TODO: allow a general async path */ 2621 if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage && cause != kMemorystatusKilledVMThrashing && 2622 cause != kMemorystatusKilledFCThrashing)) { 2623 return FALSE; 2624 } 2625 2626 kill_under_pressure_cause = cause; 2627 memorystatus_thread_wake(); 2628 return TRUE; 2629} 2630 2631static boolean_t 2632memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { 2633 boolean_t res; 2634 uint32_t errors = 0; 2635 2636 if (victim_pid == -1) { 2637 /* No pid, so kill first process */ 2638 res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors); 2639 } else { 2640 res = memorystatus_kill_specific_process(victim_pid, cause); 2641 } 2642 2643 if (errors) { 2644 memorystatus_clear_errors(); 2645 } 2646 2647 if (res == TRUE) { 2648 /* Fire off snapshot notification */ 2649 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + 2650 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; 2651 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); 2652 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); 2653 } 2654 2655 return res; 2656} 2657 2658boolean_t 2659memorystatus_kill_on_VM_page_shortage(boolean_t async) { 2660 if (async) { 2661 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); 2662 } else { 2663 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage); 2664 } 2665} 2666 2667boolean_t 2668memorystatus_kill_on_VM_thrashing(boolean_t async) { 2669 if (async) { 2670 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing); 2671 } else { 2672 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing); 2673 } 2674} 2675 2676boolean_t 2677memorystatus_kill_on_FC_thrashing(boolean_t async) { 2678 if (async) { 2679 return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing); 2680 } else { 2681 return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing); 2682 } 2683} 2684 2685boolean_t 2686memorystatus_kill_on_vnode_limit(void) { 2687 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); 2688} 2689 2690#endif /* CONFIG_JETSAM */ 2691 2692#if CONFIG_FREEZE 2693 2694__private_extern__ void 2695memorystatus_freeze_init(void) 2696{ 2697 kern_return_t result; 2698 thread_t thread; 2699 2700 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); 2701 if (result == KERN_SUCCESS) { 2702 thread_deallocate(thread); 2703 } else { 2704 panic("Could not create memorystatus_freeze_thread"); 2705 } 2706} 2707 2708static int 2709memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) 2710{ 2711 pid_t aPid = 0; 2712 int ret = -1; 2713 proc_t p = PROC_NULL, next_p = PROC_NULL; 2714 unsigned int i = 0; 2715 2716 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, 2717 memorystatus_available_pages, 0, 0, 0, 0); 2718 2719 proc_list_lock(); 2720 2721 next_p = memorystatus_get_first_proc_locked(&i, TRUE); 2722 while (next_p) { 2723 kern_return_t kr; 2724 uint32_t purgeable, wired, clean, dirty; 2725 boolean_t shared; 2726 uint32_t pages; 2727 uint32_t max_pages = 0; 2728 uint32_t state; 2729 2730 p = next_p; 2731 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); 2732 2733 aPid = p->p_pid; 2734 state = p->p_memstat_state; 2735 2736 /* Ensure the process is eligible for freezing */ 2737 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { 2738 continue; // with lock held 2739 } 2740 2741 /* Only freeze processes meeting our minimum resident page criteria */ 2742 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); 2743 if (pages < memorystatus_freeze_pages_min) { 2744 continue; // with lock held 2745 } 2746 2747 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { 2748 /* Ensure there's enough free space to freeze this process. */ 2749 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); 2750 if (max_pages < memorystatus_freeze_pages_min) { 2751 *memorystatus_freeze_swap_low = TRUE; 2752 proc_list_unlock(); 2753 goto exit; 2754 } 2755 } else { 2756 max_pages = UINT32_MAX - 1; 2757 } 2758 2759 /* Mark as locked temporarily to avoid kill */ 2760 p->p_memstat_state |= P_MEMSTAT_LOCKED; 2761 2762 p = proc_ref_locked(p); 2763 proc_list_unlock(); 2764 if (!p) { 2765 goto exit; 2766 } 2767 2768 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); 2769 2770 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " 2771 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", 2772 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), 2773 memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); 2774 2775 proc_list_lock(); 2776 p->p_memstat_state &= ~P_MEMSTAT_LOCKED; 2777 2778 /* Success? */ 2779 if (KERN_SUCCESS == kr) { 2780 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; 2781 2782 memorystatus_frozen_count++; 2783 2784 p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); 2785 2786 /* Update stats */ 2787 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { 2788 throttle_intervals[i].pageouts += dirty; 2789 } 2790 2791 memorystatus_freeze_pageouts += dirty; 2792 memorystatus_freeze_count++; 2793 2794 proc_list_unlock(); 2795 2796 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); 2797 2798 /* Return the number of reclaimed pages */ 2799 ret = dirty; 2800 2801 } else { 2802 proc_list_unlock(); 2803 } 2804 2805 proc_rele(p); 2806 goto exit; 2807 } 2808 2809 proc_list_unlock(); 2810 2811exit: 2812 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, 2813 memorystatus_available_pages, aPid, 0, 0, 0); 2814 2815 return ret; 2816} 2817 2818static inline boolean_t 2819memorystatus_can_freeze_processes(void) 2820{ 2821 boolean_t ret; 2822 2823 proc_list_lock(); 2824 2825 if (memorystatus_suspended_count) { 2826 uint32_t average_resident_pages, estimated_processes; 2827 2828 /* Estimate the number of suspended processes we can fit */ 2829 average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count; 2830 estimated_processes = memorystatus_suspended_count + 2831 ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages); 2832 2833 /* If it's predicted that no freeze will occur, lower the threshold temporarily */ 2834 if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) { 2835 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW; 2836 } else { 2837 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; 2838 } 2839 2840 MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", 2841 memorystatus_suspended_count, average_resident_pages, estimated_processes); 2842 2843 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { 2844 ret = TRUE; 2845 } else { 2846 ret = FALSE; 2847 } 2848 } else { 2849 ret = FALSE; 2850 } 2851 2852 proc_list_unlock(); 2853 2854 return ret; 2855} 2856 2857static boolean_t 2858memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) 2859{ 2860 /* Only freeze if we're sufficiently low on memory; this holds off freeze right 2861 after boot, and is generally is a no-op once we've reached steady state. */ 2862 if (memorystatus_available_pages > memorystatus_freeze_threshold) { 2863 return FALSE; 2864 } 2865 2866 /* Check minimum suspended process threshold. */ 2867 if (!memorystatus_can_freeze_processes()) { 2868 return FALSE; 2869 } 2870 2871 /* Is swap running low? */ 2872 if (*memorystatus_freeze_swap_low) { 2873 /* If there's been no movement in free swap pages since we last attempted freeze, return. */ 2874 if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { 2875 return FALSE; 2876 } 2877 2878 /* Pages have been freed - we can retry. */ 2879 *memorystatus_freeze_swap_low = FALSE; 2880 } 2881 2882 /* OK */ 2883 return TRUE; 2884} 2885 2886static void 2887memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval) 2888{ 2889 if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) { 2890 if (!interval->max_pageouts) { 2891 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60))); 2892 } else { 2893 printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins); 2894 } 2895 interval->ts.tv_sec = interval->mins * 60; 2896 interval->ts.tv_nsec = 0; 2897 ADD_MACH_TIMESPEC(&interval->ts, ts); 2898 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ 2899 if (interval->pageouts > interval->max_pageouts) { 2900 interval->pageouts -= interval->max_pageouts; 2901 } else { 2902 interval->pageouts = 0; 2903 } 2904 interval->throttle = FALSE; 2905 } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) { 2906 printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins); 2907 interval->throttle = TRUE; 2908 } 2909 2910 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", 2911 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, 2912 interval->throttle ? "on" : "off"); 2913} 2914 2915static boolean_t 2916memorystatus_freeze_update_throttle(void) 2917{ 2918 clock_sec_t sec; 2919 clock_nsec_t nsec; 2920 mach_timespec_t ts; 2921 uint32_t i; 2922 boolean_t throttled = FALSE; 2923 2924#if DEVELOPMENT || DEBUG 2925 if (!memorystatus_freeze_throttle_enabled) 2926 return FALSE; 2927#endif 2928 2929 clock_get_system_nanotime(&sec, &nsec); 2930 ts.tv_sec = sec; 2931 ts.tv_nsec = nsec; 2932 2933 /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget. 2934 * 2935 * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has 2936 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in 2937 * order to allow for bursts of activity. 2938 */ 2939 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { 2940 memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]); 2941 if (throttle_intervals[i].throttle == TRUE) 2942 throttled = TRUE; 2943 } 2944 2945 return throttled; 2946} 2947 2948static void 2949memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) 2950{ 2951 static boolean_t memorystatus_freeze_swap_low = FALSE; 2952 2953 if (memorystatus_freeze_enabled) { 2954 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { 2955 /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */ 2956 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS || 2957 !memorystatus_freeze_update_throttle()) { 2958 memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); 2959 } else { 2960 printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); 2961 memorystatus_freeze_throttle_count++; /* Throttled, update stats */ 2962 } 2963 } 2964 } 2965 2966 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); 2967 thread_block((thread_continue_t) memorystatus_freeze_thread); 2968} 2969 2970#endif /* CONFIG_FREEZE */ 2971 2972#if VM_PRESSURE_EVENTS 2973 2974#if CONFIG_MEMORYSTATUS 2975 2976static int 2977memorystatus_send_note(int event_code, void *data, size_t data_length) { 2978 int ret; 2979 struct kev_msg ev_msg; 2980 2981 ev_msg.vendor_code = KEV_VENDOR_APPLE; 2982 ev_msg.kev_class = KEV_SYSTEM_CLASS; 2983 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; 2984 2985 ev_msg.event_code = event_code; 2986 2987 ev_msg.dv[0].data_length = data_length; 2988 ev_msg.dv[0].data_ptr = data; 2989 ev_msg.dv[1].data_length = 0; 2990 2991 ret = kev_post_msg(&ev_msg); 2992 if (ret) { 2993 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); 2994 } 2995 2996 return ret; 2997} 2998 2999boolean_t 3000memorystatus_warn_process(pid_t pid, boolean_t critical) { 3001 3002 boolean_t ret = FALSE; 3003 struct knote *kn = NULL; 3004 3005 /* 3006 * See comment in sysctl_memorystatus_vm_pressure_send. 3007 */ 3008 3009 memorystatus_klist_lock(); 3010 kn = vm_find_knote_from_pid(pid, &memorystatus_klist); 3011 if (kn) { 3012 /* 3013 * By setting the "fflags" here, we are forcing 3014 * a process to deal with the case where it's 3015 * bumping up into its memory limits. If we don't 3016 * do this here, we will end up depending on the 3017 * system pressure snapshot evaluation in 3018 * filt_memorystatus(). 3019 */ 3020 3021 if (critical) { 3022 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; 3023 } else { 3024 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; 3025 } 3026 KNOTE(&memorystatus_klist, kMemorystatusPressure); 3027 ret = TRUE; 3028 } else { 3029 if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) { 3030 ret = TRUE; 3031 } 3032 } 3033 memorystatus_klist_unlock(); 3034 3035 return ret; 3036} 3037 3038int 3039memorystatus_send_pressure_note(pid_t pid) { 3040 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); 3041 return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); 3042} 3043 3044void 3045memorystatus_send_low_swap_note(void) { 3046 3047 struct knote *kn = NULL; 3048 3049 memorystatus_klist_lock(); 3050 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { 3051 if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { 3052 KNOTE(&memorystatus_klist, kMemorystatusLowSwap); 3053 } 3054 } 3055 memorystatus_klist_unlock(); 3056} 3057 3058boolean_t 3059memorystatus_bg_pressure_eligible(proc_t p) { 3060 boolean_t eligible = FALSE; 3061 3062 proc_list_lock(); 3063 3064 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); 3065 3066 /* Foreground processes have already been dealt with at this point, so just test for eligibility */ 3067 if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { 3068 eligible = TRUE; 3069 } 3070 3071 proc_list_unlock(); 3072 3073 return eligible; 3074} 3075 3076boolean_t 3077memorystatus_is_foreground_locked(proc_t p) { 3078 return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || 3079 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); 3080} 3081#endif /* CONFIG_MEMORYSTATUS */ 3082 3083/* 3084 * Trigger levels to test the mechanism. 3085 * Can be used via a sysctl. 3086 */ 3087#define TEST_LOW_MEMORY_TRIGGER_ONE 1 3088#define TEST_LOW_MEMORY_TRIGGER_ALL 2 3089#define TEST_PURGEABLE_TRIGGER_ONE 3 3090#define TEST_PURGEABLE_TRIGGER_ALL 4 3091#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 3092#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 3093 3094boolean_t memorystatus_manual_testing_on = FALSE; 3095vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; 3096 3097extern struct knote * 3098vm_pressure_select_optimal_candidate_to_notify(struct klist *, int, boolean_t); 3099 3100extern 3101kern_return_t vm_pressure_notification_without_levels(boolean_t); 3102 3103extern void vm_pressure_klist_lock(void); 3104extern void vm_pressure_klist_unlock(void); 3105 3106extern void vm_reset_active_list(void); 3107 3108extern void delay(int); 3109 3110#define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ 3111 3112void memorystatus_on_pageout_scan_end(void) { 3113 /* No-op */ 3114} 3115 3116/* 3117 * kn_max - knote 3118 * 3119 * knote_pressure_level - to check if the knote is registered for this notification level. 3120 * 3121 * task - task whose bits we'll be modifying 3122 * 3123 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. 3124 * 3125 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. 3126 * 3127 */ 3128 3129boolean_t 3130is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) 3131{ 3132 if (kn_max->kn_sfflags & knote_pressure_level) { 3133 3134 if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) { 3135 3136 task_clear_has_been_notified(task, pressure_level_to_clear); 3137 } 3138 3139 task_mark_has_been_notified(task, pressure_level_to_set); 3140 return TRUE; 3141 } 3142 3143 return FALSE; 3144} 3145 3146extern kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); 3147 3148#define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */ 3149 3150kern_return_t 3151memorystatus_update_vm_pressure(boolean_t target_foreground_process) 3152{ 3153 struct knote *kn_max = NULL; 3154 pid_t target_pid = -1; 3155 struct klist dispatch_klist = { NULL }; 3156 proc_t target_proc = PROC_NULL; 3157 struct task *task = NULL; 3158 boolean_t found_candidate = FALSE; 3159 3160 static vm_pressure_level_t level_snapshot = kVMPressureNormal; 3161 static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal; 3162 boolean_t smoothing_window_started = FALSE; 3163 struct timeval smoothing_window_start_tstamp = {0, 0}; 3164 struct timeval curr_tstamp = {0, 0}; 3165 int elapsed_msecs = 0; 3166 3167#if !CONFIG_JETSAM 3168#define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */ 3169 3170 int idle_kill_counter = 0; 3171 3172 /* 3173 * On desktop we take this opportunity to free up memory pressure 3174 * by immediately killing idle exitable processes. We use a delay 3175 * to avoid overkill. And we impose a max counter as a fail safe 3176 * in case daemons re-launch too fast. 3177 */ 3178 while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) { 3179 if (memorystatus_idle_exit_from_VM() == FALSE) { 3180 /* No idle exitable processes left to kill */ 3181 break; 3182 } 3183 idle_kill_counter++; 3184 delay(1000000); /* 1 second */ 3185 } 3186#endif /* !CONFIG_JETSAM */ 3187 3188 while (1) { 3189 3190 /* 3191 * There is a race window here. But it's not clear 3192 * how much we benefit from having extra synchronization. 3193 */ 3194 level_snapshot = memorystatus_vm_pressure_level; 3195 3196 if (prev_level_snapshot > level_snapshot) { 3197 /* 3198 * Pressure decreased? Let's take a little breather 3199 * and see if this condition stays. 3200 */ 3201 if (smoothing_window_started == FALSE) { 3202 3203 smoothing_window_started = TRUE; 3204 microuptime(&smoothing_window_start_tstamp); 3205 } 3206 3207 microuptime(&curr_tstamp); 3208 timevalsub(&curr_tstamp, &smoothing_window_start_tstamp); 3209 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; 3210 3211 if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) { 3212 3213 delay(INTER_NOTIFICATION_DELAY); 3214 continue; 3215 } 3216 } 3217 3218 prev_level_snapshot = level_snapshot; 3219 smoothing_window_started = FALSE; 3220 3221 memorystatus_klist_lock(); 3222 kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process); 3223 3224 if (kn_max == NULL) { 3225 memorystatus_klist_unlock(); 3226 3227 /* 3228 * No more level-based clients to notify. 3229 * Try the non-level based notification clients. 3230 * 3231 * However, these non-level clients don't understand 3232 * the "return-to-normal" notification. 3233 * 3234 * So don't consider them for those notifications. Just 3235 * return instead. 3236 * 3237 */ 3238 3239 if (level_snapshot != kVMPressureNormal) { 3240 goto try_dispatch_vm_clients; 3241 } else { 3242 return KERN_FAILURE; 3243 } 3244 } 3245 3246 target_proc = kn_max->kn_kq->kq_p; 3247 3248 proc_list_lock(); 3249 if (target_proc != proc_ref_locked(target_proc)) { 3250 target_proc = PROC_NULL; 3251 proc_list_unlock(); 3252 memorystatus_klist_unlock(); 3253 continue; 3254 } 3255 proc_list_unlock(); 3256 memorystatus_klist_unlock(); 3257 3258 target_pid = target_proc->p_pid; 3259 3260 task = (struct task *)(target_proc->task); 3261 3262 if (level_snapshot != kVMPressureNormal) { 3263 3264 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { 3265 3266 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) { 3267 found_candidate = TRUE; 3268 } 3269 } else { 3270 if (level_snapshot == kVMPressureCritical) { 3271 3272 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) { 3273 found_candidate = TRUE; 3274 } 3275 } 3276 } 3277 } else { 3278 if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 3279 3280 task_clear_has_been_notified(task, kVMPressureWarning); 3281 task_clear_has_been_notified(task, kVMPressureCritical); 3282 3283 found_candidate = TRUE; 3284 } 3285 } 3286 3287 if (found_candidate == FALSE) { 3288 continue; 3289 } 3290 3291 memorystatus_klist_lock(); 3292 KNOTE_DETACH(&memorystatus_klist, kn_max); 3293 KNOTE_ATTACH(&dispatch_klist, kn_max); 3294 memorystatus_klist_unlock(); 3295 3296 KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); 3297 3298 memorystatus_klist_lock(); 3299 KNOTE_DETACH(&dispatch_klist, kn_max); 3300 KNOTE_ATTACH(&memorystatus_klist, kn_max); 3301 memorystatus_klist_unlock(); 3302 3303 microuptime(&target_proc->vm_pressure_last_notify_tstamp); 3304 proc_rele(target_proc); 3305 3306 if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) { 3307 break; 3308 } 3309 3310try_dispatch_vm_clients: 3311 if (kn_max == NULL && level_snapshot != kVMPressureNormal) { 3312 /* 3313 * We will exit this loop when we are done with 3314 * notification clients (level and non-level based). 3315 */ 3316 if ((vm_pressure_notify_dispatch_vm_clients(target_foreground_process) == KERN_FAILURE) && (kn_max == NULL)) { 3317 /* 3318 * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications 3319 * AND 3320 * we have failed to find any eligible clients for the non-level based notifications too. 3321 * So, we are done. 3322 */ 3323 3324 return KERN_FAILURE; 3325 } 3326 } 3327 3328 /* 3329 * LD: This block of code below used to be invoked in the older memory notification scheme on embedded everytime 3330 * a process was sent a memory pressure notification. The "memorystatus_klist" list was used to hold these 3331 * privileged listeners. But now we have moved to the newer scheme and are trying to move away from the extra 3332 * notifications. So the code is here in case we break compat. and need to send out notifications to the privileged 3333 * apps. 3334 */ 3335#if 0 3336#endif /* 0 */ 3337 3338 if (memorystatus_manual_testing_on == TRUE) { 3339 /* 3340 * Testing out the pressure notification scheme. 3341 * No need for delays etc. 3342 */ 3343 } else { 3344 3345 uint32_t sleep_interval = INTER_NOTIFICATION_DELAY; 3346#if CONFIG_JETSAM 3347 unsigned int page_delta = 0; 3348 unsigned int skip_delay_page_threshold = 0; 3349 3350 assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base); 3351 3352 page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2; 3353 skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta; 3354 3355 if (memorystatus_available_pages <= skip_delay_page_threshold) { 3356 /* 3357 * We are nearing the critcal mark fast and can't afford to wait between 3358 * notifications. 3359 */ 3360 sleep_interval = 0; 3361 } 3362#endif /* CONFIG_JETSAM */ 3363 3364 if (sleep_interval) { 3365 delay(sleep_interval); 3366 } 3367 } 3368 } 3369 3370 return KERN_SUCCESS; 3371} 3372 3373vm_pressure_level_t 3374convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); 3375 3376vm_pressure_level_t 3377convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) 3378{ 3379 vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 3380 3381 switch (internal_pressure_level) { 3382 3383 case kVMPressureNormal: 3384 { 3385 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 3386 break; 3387 } 3388 3389 case kVMPressureWarning: 3390 case kVMPressureUrgent: 3391 { 3392 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; 3393 break; 3394 } 3395 3396 case kVMPressureCritical: 3397 { 3398 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; 3399 break; 3400 } 3401 3402 default: 3403 break; 3404 } 3405 3406 return dispatch_level; 3407} 3408 3409static int 3410sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS 3411{ 3412#pragma unused(arg1, arg2, oidp) 3413 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); 3414 3415 return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); 3416} 3417 3418#if DEBUG || DEVELOPMENT 3419 3420SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, 3421 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); 3422 3423#else /* DEBUG || DEVELOPMENT */ 3424 3425SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, 3426 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); 3427 3428#endif /* DEBUG || DEVELOPMENT */ 3429 3430extern int memorystatus_purge_on_warning; 3431extern int memorystatus_purge_on_critical; 3432 3433static int 3434sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS 3435{ 3436#pragma unused(arg1, arg2) 3437 3438 int level = 0; 3439 int error = 0; 3440 int pressure_level = 0; 3441 int trigger_request = 0; 3442 int force_purge; 3443 3444 error = sysctl_handle_int(oidp, &level, 0, req); 3445 if (error || !req->newptr) { 3446 return (error); 3447 } 3448 3449 memorystatus_manual_testing_on = TRUE; 3450 3451 trigger_request = (level >> 16) & 0xFFFF; 3452 pressure_level = (level & 0xFFFF); 3453 3454 if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || 3455 trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { 3456 return EINVAL; 3457 } 3458 switch (pressure_level) { 3459 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: 3460 case NOTE_MEMORYSTATUS_PRESSURE_WARN: 3461 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: 3462 break; 3463 default: 3464 return EINVAL; 3465 } 3466 3467 /* 3468 * The pressure level is being set from user-space. 3469 * And user-space uses the constants in sys/event.h 3470 * So we translate those events to our internal levels here. 3471 */ 3472 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 3473 3474 memorystatus_manual_testing_level = kVMPressureNormal; 3475 force_purge = 0; 3476 3477 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { 3478 3479 memorystatus_manual_testing_level = kVMPressureWarning; 3480 force_purge = memorystatus_purge_on_warning; 3481 3482 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { 3483 3484 memorystatus_manual_testing_level = kVMPressureCritical; 3485 force_purge = memorystatus_purge_on_critical; 3486 } 3487 3488 memorystatus_vm_pressure_level = memorystatus_manual_testing_level; 3489 3490 /* purge according to the new pressure level */ 3491 switch (trigger_request) { 3492 case TEST_PURGEABLE_TRIGGER_ONE: 3493 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: 3494 if (force_purge == 0) { 3495 /* no purging requested */ 3496 break; 3497 } 3498 vm_purgeable_object_purge_one_unlocked(force_purge); 3499 break; 3500 case TEST_PURGEABLE_TRIGGER_ALL: 3501 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: 3502 if (force_purge == 0) { 3503 /* no purging requested */ 3504 break; 3505 } 3506 while (vm_purgeable_object_purge_one_unlocked(force_purge)); 3507 break; 3508 } 3509 3510 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || 3511 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { 3512 3513 memorystatus_update_vm_pressure(TRUE); 3514 } 3515 3516 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || 3517 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { 3518 3519 while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { 3520 continue; 3521 } 3522 } 3523 3524 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 3525 memorystatus_manual_testing_on = FALSE; 3526 3527 vm_pressure_klist_lock(); 3528 vm_reset_active_list(); 3529 vm_pressure_klist_unlock(); 3530 } else { 3531 3532 vm_pressure_klist_lock(); 3533 vm_pressure_notification_without_levels(FALSE); 3534 vm_pressure_klist_unlock(); 3535 } 3536 3537 return 0; 3538} 3539 3540SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, 3541 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); 3542 3543 3544extern int memorystatus_purge_on_warning; 3545extern int memorystatus_purge_on_urgent; 3546extern int memorystatus_purge_on_critical; 3547 3548SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, ""); 3549SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, ""); 3550SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); 3551 3552 3553#endif /* VM_PRESSURE_EVENTS */ 3554 3555/* Return both allocated and actual size, since there's a race between allocation and list compilation */ 3556static int 3557memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) 3558{ 3559 uint32_t list_count, i = 0; 3560 memorystatus_priority_entry_t *list_entry; 3561 proc_t p; 3562 3563 list_count = memorystatus_list_count; 3564 *list_size = sizeof(memorystatus_priority_entry_t) * list_count; 3565 3566 /* Just a size check? */ 3567 if (size_only) { 3568 return 0; 3569 } 3570 3571 /* Otherwise, validate the size of the buffer */ 3572 if (*buffer_size < *list_size) { 3573 return EINVAL; 3574 } 3575 3576 *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size); 3577 if (!list_ptr) { 3578 return ENOMEM; 3579 } 3580 3581 memset(*list_ptr, 0, *list_size); 3582 3583 *buffer_size = *list_size; 3584 *list_size = 0; 3585 3586 list_entry = *list_ptr; 3587 3588 proc_list_lock(); 3589 3590 p = memorystatus_get_first_proc_locked(&i, TRUE); 3591 while (p && (*list_size < *buffer_size)) { 3592 list_entry->pid = p->p_pid; 3593 list_entry->priority = p->p_memstat_effectivepriority; 3594 list_entry->user_data = p->p_memstat_userdata; 3595#if LEGACY_HIWATER 3596 if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) || 3597 (p->p_memstat_memlimit <= 0)) { 3598 task_get_phys_footprint_limit(p->task, &list_entry->limit); 3599 } else { 3600 list_entry->limit = p->p_memstat_memlimit; 3601 } 3602#else 3603 task_get_phys_footprint_limit(p->task, &list_entry->limit); 3604#endif 3605 list_entry->state = memorystatus_build_state(p); 3606 list_entry++; 3607 3608 *list_size += sizeof(memorystatus_priority_entry_t); 3609 3610 p = memorystatus_get_next_proc_locked(&i, p, TRUE); 3611 } 3612 3613 proc_list_unlock(); 3614 3615 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); 3616 3617 return 0; 3618} 3619 3620static int 3621memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) { 3622 int error = EINVAL; 3623 boolean_t size_only; 3624 memorystatus_priority_entry_t *list = NULL; 3625 size_t list_size; 3626 3627 size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); 3628 3629 error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); 3630 if (error) { 3631 goto out; 3632 } 3633 3634 if (!size_only) { 3635 error = copyout(list, buffer, list_size); 3636 } 3637 3638 if (error == 0) { 3639 *retval = list_size; 3640 } 3641out: 3642 3643 if (list) { 3644 kfree(list, buffer_size); 3645 } 3646 3647 return error; 3648} 3649 3650#if CONFIG_JETSAM 3651 3652static void 3653memorystatus_clear_errors(void) 3654{ 3655 proc_t p; 3656 unsigned int i = 0; 3657 3658 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); 3659 3660 proc_list_lock(); 3661 3662 p = memorystatus_get_first_proc_locked(&i, TRUE); 3663 while (p) { 3664 if (p->p_memstat_state & P_MEMSTAT_ERROR) { 3665 p->p_memstat_state &= ~P_MEMSTAT_ERROR; 3666 } 3667 p = memorystatus_get_next_proc_locked(&i, p, TRUE); 3668 } 3669 3670 proc_list_unlock(); 3671 3672 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); 3673} 3674 3675static void 3676memorystatus_update_levels_locked(boolean_t critical_only) { 3677 3678 memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; 3679 3680 /* 3681 * If there's an entry in the first bucket, we have idle processes. 3682 */ 3683 memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; 3684 if (first_bucket->count) { 3685 memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; 3686 3687 if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { 3688 /* 3689 * The critical threshold must never exceed the pressure threshold 3690 */ 3691 memorystatus_available_pages_critical = memorystatus_available_pages_pressure; 3692 } 3693 } 3694 3695#if DEBUG || DEVELOPMENT 3696 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 3697 memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; 3698 3699 if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { 3700 /* 3701 * The critical threshold must never exceed the pressure threshold 3702 */ 3703 memorystatus_available_pages_critical = memorystatus_available_pages_pressure; 3704 } 3705 } 3706#endif 3707 3708 if (critical_only) { 3709 return; 3710 } 3711 3712#if VM_PRESSURE_EVENTS 3713 memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; 3714#if DEBUG || DEVELOPMENT 3715 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { 3716 memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic; 3717 } 3718#endif 3719#endif 3720} 3721 3722static int 3723memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { 3724 size_t input_size = *snapshot_size; 3725 3726 if (memorystatus_jetsam_snapshot_count > 0) { 3727 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); 3728 } else { 3729 *snapshot_size = 0; 3730 } 3731 3732 if (size_only) { 3733 return 0; 3734 } 3735 3736 if (input_size < *snapshot_size) { 3737 return EINVAL; 3738 } 3739 3740 *snapshot = memorystatus_jetsam_snapshot; 3741 3742 MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size); 3743 3744 return 0; 3745} 3746 3747 3748static int 3749memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { 3750 int error = EINVAL; 3751 boolean_t size_only; 3752 memorystatus_jetsam_snapshot_t *snapshot; 3753 3754 size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); 3755 3756 error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only); 3757 if (error) { 3758 goto out; 3759 } 3760 3761 /* Copy out and reset */ 3762 if (!size_only) { 3763 if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { 3764 snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; 3765 } 3766 } 3767 3768 if (error == 0) { 3769 *retval = buffer_size; 3770 } 3771out: 3772 return error; 3773} 3774 3775/* 3776 * Routine: memorystatus_cmd_grp_set_properties 3777 * Purpose: Update properties for a group of processes. 3778 * 3779 * Supported Properties: 3780 * [priority] 3781 * Move each process out of its effective priority 3782 * band and into a new priority band. 3783 * Maintains relative order from lowest to highest priority. 3784 * In single band, maintains relative order from head to tail. 3785 * 3786 * eg: before [effectivepriority | pid] 3787 * [18 | p101 ] 3788 * [17 | p55, p67, p19 ] 3789 * [12 | p103 p10 ] 3790 * [ 7 | p25 ] 3791 * [ 0 | p71, p82, ] 3792 * 3793 * after [ new band | pid] 3794 * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101] 3795 * 3796 * Returns: 0 on success, else non-zero. 3797 * 3798 * Caveat: We know there is a race window regarding recycled pids. 3799 * A process could be killed before the kernel can act on it here. 3800 * If a pid cannot be found in any of the jetsam priority bands, 3801 * then we simply ignore it. No harm. 3802 * But, if the pid has been recycled then it could be an issue. 3803 * In that scenario, we might move an unsuspecting process to the new 3804 * priority band. It's not clear how the kernel can safeguard 3805 * against this, but it would be an extremely rare case anyway. 3806 * The caller of this api might avoid such race conditions by 3807 * ensuring that the processes passed in the pid list are suspended. 3808 */ 3809 3810 3811/* This internal structure can expand when we add support for more properties */ 3812typedef struct memorystatus_internal_properties 3813{ 3814 proc_t proc; 3815 int32_t priority; /* see memorytstatus_priority_entry_t : priority */ 3816} memorystatus_internal_properties_t; 3817 3818 3819static int 3820memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { 3821 3822#pragma unused (flags) 3823 3824 /* 3825 * We only handle setting priority 3826 * per process 3827 */ 3828 3829 int error = 0; 3830 memorystatus_priority_entry_t *entries = NULL; 3831 uint32_t entry_count = 0; 3832 3833 /* This will be the ordered proc list */ 3834 memorystatus_internal_properties_t *table = NULL; 3835 size_t table_size = 0; 3836 uint32_t table_count = 0; 3837 3838 uint32_t i = 0; 3839 uint32_t bucket_index = 0; 3840 boolean_t head_insert; 3841 int32_t new_priority; 3842 3843 proc_t p; 3844 3845 /* Verify inputs */ 3846 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0) || ((buffer_size % sizeof(memorystatus_priority_entry_t)) != 0)) { 3847 error = EINVAL; 3848 goto out; 3849 } 3850 3851 entry_count = (buffer_size / sizeof(memorystatus_priority_entry_t)); 3852 if ((entries = (memorystatus_priority_entry_t *)kalloc(buffer_size)) == NULL) { 3853 error = ENOMEM; 3854 goto out; 3855 } 3856 3857 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, entry_count, 0, 0, 0, 0); 3858 3859 if ((error = copyin(buffer, entries, buffer_size)) != 0) { 3860 goto out; 3861 } 3862 3863 /* Verify sanity of input priorities */ 3864 for (i=0; i < entry_count; i++) { 3865 if (entries[i].priority == -1) { 3866 /* Use as shorthand for default priority */ 3867 entries[i].priority = JETSAM_PRIORITY_DEFAULT; 3868 } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_DEFERRED) { 3869 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; 3870 * if requested, adjust to JETSAM_PRIORITY_IDLE. */ 3871 entries[i].priority = JETSAM_PRIORITY_IDLE; 3872 } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { 3873 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle 3874 * queue */ 3875 /* Deal with this later */ 3876 } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) { 3877 /* Sanity check */ 3878 error = EINVAL; 3879 goto out; 3880 } 3881 } 3882 3883 table_size = sizeof(memorystatus_internal_properties_t) * entry_count; 3884 if ( (table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) { 3885 error = ENOMEM; 3886 goto out; 3887 } 3888 memset(table, 0, table_size); 3889 3890 3891 /* 3892 * For each jetsam bucket entry, spin through the input property list. 3893 * When a matching pid is found, populate an adjacent table with the 3894 * appropriate proc pointer and new property values. 3895 * This traversal automatically preserves order from lowest 3896 * to highest priority. 3897 */ 3898 3899 bucket_index=0; 3900 3901 proc_list_lock(); 3902 3903 /* Create the ordered table */ 3904 p = memorystatus_get_first_proc_locked(&bucket_index, TRUE); 3905 while (p && (table_count < entry_count)) { 3906 for (i=0; i < entry_count; i++ ) { 3907 if (p->p_pid == entries[i].pid) { 3908 /* Build the table data */ 3909 table[table_count].proc = p; 3910 table[table_count].priority = entries[i].priority; 3911 table_count++; 3912 break; 3913 } 3914 } 3915 p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE); 3916 } 3917 3918 /* We now have ordered list of procs ready to move */ 3919 for (i=0; i < table_count; i++) { 3920 p = table[i].proc; 3921 assert(p != NULL); 3922 3923 /* Allow head inserts -- but relative order is now */ 3924 if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { 3925 new_priority = JETSAM_PRIORITY_IDLE; 3926 head_insert = true; 3927 } else { 3928 new_priority = table[i].priority; 3929 head_insert = false; 3930 } 3931 3932 /* Not allowed */ 3933 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 3934 continue; 3935 } 3936 3937 /* 3938 * Take appropriate steps if moving proc out of the 3939 * JETSAM_PRIORITY_IDLE_DEFERRED band. 3940 */ 3941 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { 3942 memorystatus_invalidate_idle_demotion_locked(p, TRUE); 3943 } 3944 3945 memorystatus_update_priority_locked(p, new_priority, head_insert); 3946 } 3947 3948 proc_list_unlock(); 3949 3950 /* 3951 * if (table_count != entry_count) 3952 * then some pids were not found in a jetsam band. 3953 * harmless but interesting... 3954 */ 3955 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, entry_count, table_count, 0, 0, 0); 3956 3957out: 3958 if (entries) 3959 kfree(entries, buffer_size); 3960 if (table) 3961 kfree(table, table_size); 3962 3963 return (error); 3964} 3965 3966 3967/* 3968 * This routine is meant solely for the purpose of adjusting jetsam priorities and bands. 3969 * It is _not_ meant to be used for the setting of memory limits, especially, since we can't 3970 * tell if the memory limit being set is fatal or not. 3971 * 3972 * So the the last 5 args to the memorystatus_update() call below, related to memory limits, are all 0 or FALSE. 3973 */ 3974 3975static int 3976memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { 3977 const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ 3978 3979 int error; 3980 uint32_t i; 3981 uint32_t entry_count; 3982 memorystatus_priority_properties_t *entries; 3983 3984 /* Validate inputs */ 3985 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) { 3986 return EINVAL; 3987 } 3988 3989 /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */ 3990 entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t)); 3991 if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) { 3992 return EINVAL; 3993 } 3994 3995 entries = (memorystatus_priority_properties_t *)kalloc(buffer_size); 3996 3997 error = copyin(buffer, entries, buffer_size); 3998 3999 for (i = 0; i < entry_count; i++) { 4000 proc_t p; 4001 4002 if (error) { 4003 break; 4004 } 4005 4006 p = proc_find(pid); 4007 if (!p) { 4008 error = ESRCH; 4009 break; 4010 } 4011 4012 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { 4013 error = EPERM; 4014 proc_rele(p); 4015 break; 4016 } 4017 4018 error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE); 4019 proc_rele(p); 4020 } 4021 4022 kfree(entries, buffer_size); 4023 4024 return error; 4025} 4026 4027static int 4028memorystatus_cmd_get_pressure_status(int32_t *retval) { 4029 int error; 4030 4031 /* Need privilege for check */ 4032 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); 4033 if (error) { 4034 return (error); 4035 } 4036 4037 /* Inherently racy, so it's not worth taking a lock here */ 4038 *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; 4039 4040 return error; 4041} 4042 4043/* 4044 * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. 4045 */ 4046 4047static int 4048memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) { 4049 int error = 0; 4050 4051 proc_t p = proc_find(pid); 4052 if (!p) { 4053 return ESRCH; 4054 } 4055 4056 if (high_water_mark <= 0) { 4057 high_water_mark = -1; /* Disable */ 4058 } 4059 4060 proc_list_lock(); 4061 4062 p->p_memstat_memlimit = high_water_mark; 4063 if (memorystatus_highwater_enabled) { 4064 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { 4065 4066 memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false); 4067 4068 /* 4069 * The update priority call above takes care to set/reset the fatal memory limit state 4070 * IF the process is transitioning between foreground <-> background and has a background 4071 * memory limit. 4072 * Here, however, the process won't be doing any such transitions and so we explicitly tackle 4073 * the fatal limit state. 4074 */ 4075 is_fatal_limit = FALSE; 4076 4077 } else { 4078 error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; 4079 } 4080 } 4081 4082 if (error == 0) { 4083 if (is_fatal_limit == TRUE) { 4084 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; 4085 } else { 4086 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; 4087 } 4088 } 4089 4090 proc_list_unlock(); 4091 proc_rele(p); 4092 4093 return error; 4094} 4095 4096/* 4097 * Returns the jetsam priority (effective or requested) of the process 4098 * associated with this task. 4099 */ 4100int 4101proc_get_memstat_priority(proc_t p, boolean_t effective_priority) 4102{ 4103 if (p) { 4104 if (effective_priority) { 4105 return p->p_memstat_effectivepriority; 4106 } else { 4107 return p->p_memstat_requestedpriority; 4108 } 4109 } 4110 return 0; 4111} 4112#endif /* CONFIG_JETSAM */ 4113 4114int 4115memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) { 4116 int error = EINVAL; 4117 4118#if !CONFIG_JETSAM 4119 #pragma unused(ret) 4120#endif 4121 4122 /* Root only for now */ 4123 if (!kauth_cred_issuser(kauth_cred_get())) { 4124 error = EPERM; 4125 goto out; 4126 } 4127 4128 /* Sanity check */ 4129 if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { 4130 error = EINVAL; 4131 goto out; 4132 } 4133 4134 switch (args->command) { 4135 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: 4136 error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret); 4137 break; 4138#if CONFIG_JETSAM 4139 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: 4140 error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); 4141 break; 4142 case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: 4143 error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); 4144 break; 4145 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: 4146 error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); 4147 break; 4148 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: 4149 error = memorystatus_cmd_get_pressure_status(ret); 4150 break; 4151 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: 4152 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); 4153 break; 4154 case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: 4155 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); 4156 break; 4157 /* Test commands */ 4158#if DEVELOPMENT || DEBUG 4159 case MEMORYSTATUS_CMD_TEST_JETSAM: 4160 error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; 4161 break; 4162 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: 4163 error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); 4164 break; 4165#endif /* DEVELOPMENT || DEBUG */ 4166#endif /* CONFIG_JETSAM */ 4167 default: 4168 break; 4169 } 4170 4171out: 4172 return error; 4173} 4174 4175 4176static int 4177filt_memorystatusattach(struct knote *kn) 4178{ 4179 kn->kn_flags |= EV_CLEAR; 4180 return memorystatus_knote_register(kn); 4181} 4182 4183static void 4184filt_memorystatusdetach(struct knote *kn) 4185{ 4186 memorystatus_knote_unregister(kn); 4187} 4188 4189static int 4190filt_memorystatus(struct knote *kn __unused, long hint) 4191{ 4192 if (hint) { 4193 switch (hint) { 4194 case kMemorystatusNoPressure: 4195 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { 4196 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL; 4197 } 4198 break; 4199 case kMemorystatusPressure: 4200 if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { 4201 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { 4202 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; 4203 } 4204 } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { 4205 4206 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { 4207 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; 4208 } 4209 } 4210 break; 4211 case kMemorystatusLowSwap: 4212 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { 4213 kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP; 4214 } 4215 break; 4216 default: 4217 break; 4218 } 4219 } 4220 4221 return (kn->kn_fflags != 0); 4222} 4223 4224static void 4225memorystatus_klist_lock(void) { 4226 lck_mtx_lock(&memorystatus_klist_mutex); 4227} 4228 4229static void 4230memorystatus_klist_unlock(void) { 4231 lck_mtx_unlock(&memorystatus_klist_mutex); 4232} 4233 4234void 4235memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) { 4236 lck_mtx_init(&memorystatus_klist_mutex, grp, attr); 4237 klist_init(&memorystatus_klist); 4238} 4239 4240int 4241memorystatus_knote_register(struct knote *kn) { 4242 int error = 0; 4243 4244 memorystatus_klist_lock(); 4245 4246 if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) { 4247 4248 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { 4249 error = suser(kauth_cred_get(), 0); 4250 } 4251 4252 if (error == 0) { 4253 KNOTE_ATTACH(&memorystatus_klist, kn); 4254 } 4255 } else { 4256 error = ENOTSUP; 4257 } 4258 4259 memorystatus_klist_unlock(); 4260 4261 return error; 4262} 4263 4264void 4265memorystatus_knote_unregister(struct knote *kn __unused) { 4266 memorystatus_klist_lock(); 4267 KNOTE_DETACH(&memorystatus_klist, kn); 4268 memorystatus_klist_unlock(); 4269} 4270 4271 4272#if 0 4273#if CONFIG_JETSAM && VM_PRESSURE_EVENTS 4274static boolean_t 4275memorystatus_issue_pressure_kevent(boolean_t pressured) { 4276 memorystatus_klist_lock(); 4277 KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); 4278 memorystatus_klist_unlock(); 4279 return TRUE; 4280} 4281#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ 4282#endif /* 0 */ 4283