1/* 2 * Copyright (c) 2009-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <libkern/libkern.h> 30#include <mach/mach_types.h> 31#include <mach/task.h> 32#include <sys/proc_internal.h> 33#include <sys/event.h> 34#include <sys/eventvar.h> 35#include <kern/locks.h> 36#include <sys/queue.h> 37#include <kern/vm_pressure.h> 38#include <sys/malloc.h> 39#include <sys/errno.h> 40#include <sys/systm.h> 41#include <sys/types.h> 42#include <sys/sysctl.h> 43#include <kern/assert.h> 44#include <kern/task.h> 45#include <vm/vm_pageout.h> 46 47#include <kern/task.h> 48 49#if CONFIG_MEMORYSTATUS 50#include <sys/kern_memorystatus.h> 51#endif 52 53/* 54 * This value is the threshold that a process must meet to be considered for scavenging. 55 */ 56#define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */ 57#define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */ 58 59void vm_pressure_klist_lock(void); 60void vm_pressure_klist_unlock(void); 61 62static void vm_dispatch_memory_pressure(void); 63void vm_reset_active_list(void); 64 65#if !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) 66static kern_return_t vm_try_pressure_candidates(void); 67#endif 68 69static lck_mtx_t vm_pressure_klist_mutex; 70 71struct klist vm_pressure_klist; 72struct klist vm_pressure_klist_dormant; 73 74#if DEBUG 75#define VM_PRESSURE_DEBUG(cond, format, ...) \ 76do { \ 77 if (cond) { printf(format, ##__VA_ARGS__); } \ 78} while(0) 79#else 80#define VM_PRESSURE_DEBUG(cond, format, ...) 81#endif 82 83void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) { 84 lck_mtx_init(&vm_pressure_klist_mutex, grp, attr); 85} 86 87void vm_pressure_klist_lock(void) { 88 lck_mtx_lock(&vm_pressure_klist_mutex); 89} 90 91void vm_pressure_klist_unlock(void) { 92 lck_mtx_unlock(&vm_pressure_klist_mutex); 93} 94 95int vm_knote_register(struct knote *kn) { 96 int rv = 0; 97 98 vm_pressure_klist_lock(); 99 100 if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) { 101 KNOTE_ATTACH(&vm_pressure_klist, kn); 102 } else { 103 rv = ENOTSUP; 104 } 105 106 vm_pressure_klist_unlock(); 107 108 return rv; 109} 110 111void vm_knote_unregister(struct knote *kn) { 112 struct knote *kn_temp; 113 114 vm_pressure_klist_lock(); 115 116 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid); 117 118 SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) { 119 if (kn_temp == kn) { 120 KNOTE_DETACH(&vm_pressure_klist, kn); 121 vm_pressure_klist_unlock(); 122 return; 123 } 124 } 125 126 SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) { 127 if (kn_temp == kn) { 128 KNOTE_DETACH(&vm_pressure_klist_dormant, kn); 129 vm_pressure_klist_unlock(); 130 return; 131 } 132 } 133 134 vm_pressure_klist_unlock(); 135} 136 137void vm_pressure_proc_cleanup(proc_t p) 138{ 139 struct knote *kn = NULL; 140 141 vm_pressure_klist_lock(); 142 143 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid); 144 145 SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) { 146 if (kn->kn_kq->kq_p == p) { 147 KNOTE_DETACH(&vm_pressure_klist, kn); 148 vm_pressure_klist_unlock(); 149 return; 150 } 151 } 152 153 SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) { 154 if (kn->kn_kq->kq_p == p) { 155 KNOTE_DETACH(&vm_pressure_klist_dormant, kn); 156 vm_pressure_klist_unlock(); 157 return; 158 } 159 } 160 161 vm_pressure_klist_unlock(); 162} 163 164/* 165 * Used by the vm_pressure_thread which is 166 * signalled from within vm_pageout_scan(). 167 */ 168void consider_vm_pressure_events(void) 169{ 170 vm_dispatch_memory_pressure(); 171} 172 173#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM 174 175static void vm_dispatch_memory_pressure(void) 176{ 177 /* Update the pressure level and target the foreground or next-largest process as appropriate */ 178 memorystatus_update_vm_pressure(FALSE); 179} 180 181/* Jetsam aware version. Called with lock held */ 182 183static struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) { 184 struct knote *kn = NULL; 185 186 SLIST_FOREACH(kn, list, kn_selnext) { 187 struct proc *p; 188 pid_t current_pid; 189 190 p = kn->kn_kq->kq_p; 191 current_pid = p->p_pid; 192 193 if (current_pid == pid) { 194 break; 195 } 196 } 197 198 return kn; 199} 200 201int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) { 202 int ret = EINVAL; 203 struct knote *kn; 204 205 VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid); 206 207 if (!locked) { 208 vm_pressure_klist_lock(); 209 } 210 211 /* 212 * Because we're specifically targeting a process here, we don't care 213 * if a warning has already been sent and it's moved to the dormant 214 * list; check that too. 215 */ 216 kn = vm_find_knote_from_pid(pid, &vm_pressure_klist); 217 if (kn) { 218 KNOTE(&vm_pressure_klist, pid); 219 ret = 0; 220 } else { 221 kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant); 222 if (!kn) { 223 KNOTE(&vm_pressure_klist_dormant, pid); 224 } 225 } 226 227 if (!locked) { 228 vm_pressure_klist_unlock(); 229 } 230 231 return ret; 232} 233 234void vm_find_pressure_foreground_candidates(void) 235{ 236 struct knote *kn, *kn_tmp; 237 struct klist dispatch_klist = { NULL }; 238 239 vm_pressure_klist_lock(); 240 proc_list_lock(); 241 242 /* Find the foreground processes. */ 243 SLIST_FOREACH_SAFE(kn, &vm_pressure_klist, kn_selnext, kn_tmp) { 244 proc_t p = kn->kn_kq->kq_p; 245 246 if (memorystatus_is_foreground_locked(p)) { 247 KNOTE_DETACH(&vm_pressure_klist, kn); 248 KNOTE_ATTACH(&dispatch_klist, kn); 249 } 250 } 251 252 SLIST_FOREACH_SAFE(kn, &vm_pressure_klist_dormant, kn_selnext, kn_tmp) { 253 proc_t p = kn->kn_kq->kq_p; 254 255 if (memorystatus_is_foreground_locked(p)) { 256 KNOTE_DETACH(&vm_pressure_klist_dormant, kn); 257 KNOTE_ATTACH(&dispatch_klist, kn); 258 } 259 } 260 261 proc_list_unlock(); 262 263 /* Dispatch pressure notifications accordingly */ 264 SLIST_FOREACH_SAFE(kn, &dispatch_klist, kn_selnext, kn_tmp) { 265 proc_t p = kn->kn_kq->kq_p; 266 267 proc_list_lock(); 268 if (p != proc_ref_locked(p)) { 269 proc_list_unlock(); 270 KNOTE_DETACH(&dispatch_klist, kn); 271 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); 272 continue; 273 } 274 proc_list_unlock(); 275 276 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn->kn_kq->kq_p->p_pid); 277 KNOTE(&dispatch_klist, p->p_pid); 278 KNOTE_DETACH(&dispatch_klist, kn); 279 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); 280 microuptime(&p->vm_pressure_last_notify_tstamp); 281 memorystatus_send_pressure_note(p->p_pid); 282 proc_rele(p); 283 } 284 285 vm_pressure_klist_unlock(); 286} 287 288void vm_find_pressure_candidate(void) 289{ 290 struct knote *kn = NULL, *kn_max = NULL; 291 unsigned int resident_max = 0; 292 pid_t target_pid = -1; 293 struct klist dispatch_klist = { NULL }; 294 struct timeval curr_tstamp = {0, 0}; 295 int elapsed_msecs = 0; 296 proc_t target_proc = PROC_NULL; 297 kern_return_t kr = KERN_SUCCESS; 298 299 microuptime(&curr_tstamp); 300 301 vm_pressure_klist_lock(); 302 303 SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {\ 304 struct mach_task_basic_info basic_info; 305 mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; 306 unsigned int resident_size = 0; 307 proc_t p = PROC_NULL; 308 struct task* t = TASK_NULL; 309 310 p = kn->kn_kq->kq_p; 311 proc_list_lock(); 312 if (p != proc_ref_locked(p)) { 313 p = PROC_NULL; 314 proc_list_unlock(); 315 continue; 316 } 317 proc_list_unlock(); 318 319 t = (struct task *)(p->task); 320 321 timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); 322 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; 323 324 if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) { 325 proc_rele(p); 326 continue; 327 } 328 329 if (!memorystatus_bg_pressure_eligible(p)) { 330 VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); 331 proc_rele(p); 332 continue; 333 } 334 335 if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { 336 VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p->p_pid); 337 proc_rele(p); 338 continue; 339 } 340 341 /* 342 * We don't want a small process to block large processes from 343 * being notified again. <rdar://problem/7955532> 344 */ 345 resident_size = (basic_info.resident_size)/(1024 * 1024); 346 if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { 347 if (resident_size > resident_max) { 348 resident_max = resident_size; 349 kn_max = kn; 350 target_pid = p->p_pid; 351 target_proc = p; 352 } 353 } else { 354 /* There was no candidate with enough resident memory to scavenge */ 355 VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); 356 } 357 proc_rele(p); 358 } 359 360 if (kn_max == NULL || target_pid == -1) { 361 VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n"); 362 goto exit; 363 } 364 365 VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); 366 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); 367 368 KNOTE_DETACH(&vm_pressure_klist, kn_max); 369 370 target_proc = proc_find(target_pid); 371 if (target_proc != PROC_NULL) { 372 KNOTE_ATTACH(&dispatch_klist, kn_max); 373 KNOTE(&dispatch_klist, target_pid); 374 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); 375 memorystatus_send_pressure_note(target_pid); 376 microuptime(&target_proc->vm_pressure_last_notify_tstamp); 377 proc_rele(target_proc); 378 } 379 380exit: 381 vm_pressure_klist_unlock(); 382} 383 384#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ 385 386struct knote * 387vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level); 388 389kern_return_t vm_pressure_notification_without_levels(void); 390kern_return_t vm_pressure_notify_dispatch_vm_clients(void); 391 392kern_return_t 393vm_pressure_notify_dispatch_vm_clients(void) 394{ 395 vm_pressure_klist_lock(); 396 397 if (SLIST_EMPTY(&vm_pressure_klist)) { 398 vm_reset_active_list(); 399 } 400 401 if (!SLIST_EMPTY(&vm_pressure_klist)) { 402 403 VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n"); 404 405 if (KERN_SUCCESS == vm_try_pressure_candidates()) { 406 vm_pressure_klist_unlock(); 407 return KERN_SUCCESS; 408 } 409 } 410 411 VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n"); 412 413 vm_pressure_klist_unlock(); 414 415 return KERN_FAILURE; 416} 417 418static void vm_dispatch_memory_pressure(void) 419{ 420 memorystatus_update_vm_pressure(FALSE); 421} 422 423extern vm_pressure_level_t 424convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); 425 426struct knote * 427vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level) 428{ 429 struct knote *kn = NULL, *kn_max = NULL; 430 unsigned int resident_max = 0; 431 kern_return_t kr = KERN_SUCCESS; 432 struct timeval curr_tstamp = {0, 0}; 433 int elapsed_msecs = 0; 434 int selected_task_importance = 0; 435 static int pressure_snapshot = -1; 436 boolean_t pressure_increase = FALSE; 437 438 if (level != -1) { 439 440 if (pressure_snapshot == -1) { 441 /* 442 * Initial snapshot. 443 */ 444 pressure_snapshot = level; 445 pressure_increase = TRUE; 446 } else { 447 448 if (level >= pressure_snapshot) { 449 pressure_increase = TRUE; 450 } else { 451 pressure_increase = FALSE; 452 } 453 454 pressure_snapshot = level; 455 } 456 } 457 458 if ((level > 0) && (pressure_increase) == TRUE) { 459 /* 460 * We'll start by considering the largest 461 * unimportant task in our list. 462 */ 463 selected_task_importance = INT_MAX; 464 } else { 465 /* 466 * We'll start by considering the largest 467 * important task in our list. 468 */ 469 selected_task_importance = 0; 470 } 471 472 microuptime(&curr_tstamp); 473 474 SLIST_FOREACH(kn, candidate_list, kn_selnext) { 475 476 struct mach_task_basic_info basic_info; 477 mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; 478 unsigned int resident_size = 0; 479 proc_t p = PROC_NULL; 480 struct task* t = TASK_NULL; 481 int curr_task_importance = 0; 482 boolean_t consider_knote = FALSE; 483 484 p = kn->kn_kq->kq_p; 485 proc_list_lock(); 486 if (p != proc_ref_locked(p)) { 487 p = PROC_NULL; 488 proc_list_unlock(); 489 continue; 490 } 491 proc_list_unlock(); 492 493 t = (struct task *)(p->task); 494 495 timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); 496 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; 497 498 if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) { 499 proc_rele(p); 500 continue; 501 } 502 503 if (level != -1) { 504 /* 505 * For the level based notifications, check and see if this knote is 506 * registered for the current level. 507 */ 508 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level); 509 510 if ((kn->kn_sfflags & dispatch_level) == 0) { 511 proc_rele(p); 512 continue; 513 } 514 } 515 516 if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { 517 VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr); 518 proc_rele(p); 519 continue; 520 } 521 522 curr_task_importance = task_importance_estimate(t); 523 524 /* 525 * We don't want a small process to block large processes from 526 * being notified again. <rdar://problem/7955532> 527 */ 528 resident_size = (basic_info.resident_size)/(MB); 529 530 if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { 531 532 if (level > 0) { 533 /* 534 * Warning or Critical Pressure. 535 */ 536 if (pressure_increase) { 537 if ((curr_task_importance <= selected_task_importance) && (resident_size > resident_max)) { 538 if (task_has_been_notified(t, level) == FALSE) { 539 consider_knote = TRUE; 540 } 541 } 542 } else { 543 if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { 544 if (task_has_been_notified(t, level) == FALSE) { 545 consider_knote = TRUE; 546 } 547 } 548 } 549 } else if (level == 0) { 550 /* 551 * Pressure back to normal. 552 */ 553 if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { 554 555 if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) { 556 consider_knote = TRUE; 557 } 558 } 559 } else if (level == -1) { 560 561 /* 562 * Simple (importance and level)-free behavior based solely on RSIZE. 563 */ 564 if (resident_size > resident_max) { 565 consider_knote = TRUE; 566 } 567 } 568 569 570 if (consider_knote) { 571 resident_max = resident_size; 572 kn_max = kn; 573 selected_task_importance = curr_task_importance; 574 consider_knote = FALSE; /* reset for the next candidate */ 575 } 576 } else { 577 /* There was no candidate with enough resident memory to scavenge */ 578 VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); 579 } 580 proc_rele(p); 581 } 582 583 if (kn_max) { 584 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); 585 } 586 587 return kn_max; 588} 589 590/* 591 * vm_pressure_klist_lock is held for this routine. 592 */ 593kern_return_t vm_pressure_notification_without_levels(void) 594{ 595 struct knote *kn_max = NULL; 596 pid_t target_pid = -1; 597 struct klist dispatch_klist = { NULL }; 598 proc_t target_proc = PROC_NULL; 599 600 kn_max = vm_pressure_select_optimal_candidate_to_notify(&vm_pressure_klist, -1); 601 602 if (kn_max == NULL) { 603 return KERN_FAILURE; 604 } 605 606 target_proc = kn_max->kn_kq->kq_p; 607 608 KNOTE_DETACH(&vm_pressure_klist, kn_max); 609 610 if (target_proc != PROC_NULL) { 611 612 target_pid = target_proc->p_pid; 613 614 memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE); 615 616 KNOTE_ATTACH(&dispatch_klist, kn_max); 617 KNOTE(&dispatch_klist, target_pid); 618 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); 619 620 microuptime(&target_proc->vm_pressure_last_notify_tstamp); 621 } 622 623 return KERN_SUCCESS; 624} 625 626static kern_return_t vm_try_pressure_candidates(void) 627{ 628 /* 629 * This takes care of candidates that use NOTE_VM_PRESSURE. 630 * It's a notification without indication of the level 631 * of memory pressure. 632 */ 633 return (vm_pressure_notification_without_levels()); 634} 635 636#endif /* !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) */ 637 638/* 639 * Remove all elements from the dormant list and place them on the active list. 640 * Called with klist lock held. 641 */ 642void vm_reset_active_list(void) { 643 /* Re-charge the main list from the dormant list if possible */ 644 if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) { 645 struct knote *kn; 646 647 VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n"); 648 649 while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) { 650 kn = SLIST_FIRST(&vm_pressure_klist_dormant); 651 SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext); 652 SLIST_INSERT_HEAD(&vm_pressure_klist, kn, kn_selnext); 653 } 654 } 655} 656