1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <vm/vm_compressor.h> 30 31#if CONFIG_PHANTOM_CACHE 32#include <vm/vm_phantom_cache.h> 33#endif 34 35#include <vm/vm_map.h> 36#include <vm/vm_pageout.h> 37#include <vm/memory_object.h> 38#include <mach/mach_host.h> /* for host_info() */ 39#include <kern/ledger.h> 40 41#include <default_pager/default_pager_alerts.h> 42#include <default_pager/default_pager_object_server.h> 43 44#include <IOKit/IOHibernatePrivate.h> 45 46/* 47 * vm_compressor_mode has a heirarchy of control to set its value. 48 * boot-args are checked first, then device-tree, and finally 49 * the default value that is defined below. See vm_fault_init() for 50 * the boot-arg & device-tree code. 51 */ 52 53extern ipc_port_t min_pages_trigger_port; 54extern lck_mtx_t paging_segments_lock; 55#define PSL_LOCK() lck_mtx_lock(&paging_segments_lock) 56#define PSL_UNLOCK() lck_mtx_unlock(&paging_segments_lock) 57 58 59int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP; 60int vm_scale = 16; 61 62 63int vm_compression_limit = 0; 64 65extern boolean_t vm_swap_up; 66extern void vm_pageout_io_throttle(void); 67 68#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA 69extern unsigned int hash_string(char *cp, int len); 70#endif 71 72 73struct c_slot { 74 uint64_t c_offset:C_SEG_OFFSET_BITS, 75 c_size:12, 76 c_packed_ptr:36; 77#if CHECKSUM_THE_DATA 78 unsigned int c_hash_data; 79#endif 80#if CHECKSUM_THE_COMPRESSED_DATA 81 unsigned int c_hash_compressed_data; 82#endif 83 84}; 85 86#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size) 87#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size)) 88 89 90struct c_slot_mapping { 91 uint32_t s_cseg:22, /* segment number + 1 */ 92 s_cindx:10; /* index in the segment */ 93}; 94 95typedef struct c_slot_mapping *c_slot_mapping_t; 96 97 98union c_segu { 99 c_segment_t c_seg; 100 uint32_t c_segno; 101}; 102 103 104 105#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) >> 2) 106#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) 107 108 109uint32_t c_segment_count = 0; 110 111uint64_t c_generation_id = 0; 112uint64_t c_generation_id_flush_barrier; 113 114 115#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120 116 117boolean_t hibernate_no_swapspace = FALSE; 118clock_sec_t hibernate_flushing_deadline = 0; 119 120 121#if TRACK_BAD_C_SEGMENTS 122queue_head_t c_bad_list_head; 123uint32_t c_bad_count = 0; 124#endif 125 126queue_head_t c_age_list_head; 127queue_head_t c_swapout_list_head; 128queue_head_t c_swappedin_list_head; 129queue_head_t c_swappedout_list_head; 130queue_head_t c_swappedout_sparse_list_head; 131 132uint32_t c_age_count = 0; 133uint32_t c_swapout_count = 0; 134uint32_t c_swappedin_count = 0; 135uint32_t c_swappedout_count = 0; 136uint32_t c_swappedout_sparse_count = 0; 137 138queue_head_t c_minor_list_head; 139uint32_t c_minor_count = 0; 140 141union c_segu *c_segments; 142caddr_t c_segments_next_page; 143boolean_t c_segments_busy; 144uint32_t c_segments_available; 145uint32_t c_segments_limit; 146uint32_t c_segments_nearing_limit; 147uint32_t c_segment_pages_compressed; 148uint32_t c_segment_pages_compressed_limit; 149uint32_t c_segment_pages_compressed_nearing_limit; 150uint32_t c_free_segno_head = (uint32_t)-1; 151 152uint32_t vm_compressor_minorcompact_threshold_divisor = 10; 153uint32_t vm_compressor_majorcompact_threshold_divisor = 10; 154uint32_t vm_compressor_unthrottle_threshold_divisor = 10; 155uint32_t vm_compressor_catchup_threshold_divisor = 10; 156 157#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu)) 158 159 160lck_grp_attr_t vm_compressor_lck_grp_attr; 161lck_attr_t vm_compressor_lck_attr; 162lck_grp_t vm_compressor_lck_grp; 163 164 165#if __i386__ || __x86_64__ 166lck_mtx_t *c_list_lock; 167#else /* __i386__ || __x86_64__ */ 168lck_spin_t *c_list_lock; 169#endif /* __i386__ || __x86_64__ */ 170 171lck_rw_t c_master_lock; 172boolean_t decompressions_blocked = FALSE; 173 174zone_t compressor_segment_zone; 175int c_compressor_swap_trigger = 0; 176 177uint32_t compressor_cpus; 178char *compressor_scratch_bufs; 179 180 181clock_sec_t start_of_sample_period_sec = 0; 182clock_nsec_t start_of_sample_period_nsec = 0; 183clock_sec_t start_of_eval_period_sec = 0; 184clock_nsec_t start_of_eval_period_nsec = 0; 185uint32_t sample_period_decompression_count = 0; 186uint32_t sample_period_compression_count = 0; 187uint32_t last_eval_decompression_count = 0; 188uint32_t last_eval_compression_count = 0; 189 190#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30) 191 192uint32_t swapout_target_age = 0; 193uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE]; 194uint32_t overage_decompressions_during_sample_period = 0; 195 196void do_fastwake_warmup(void); 197boolean_t fastwake_warmup = FALSE; 198boolean_t fastwake_recording_in_progress = FALSE; 199clock_sec_t dont_trim_until_ts = 0; 200 201uint64_t c_segment_warmup_count; 202uint64_t first_c_segment_to_warm_generation_id = 0; 203uint64_t last_c_segment_to_warm_generation_id = 0; 204boolean_t hibernate_flushing = FALSE; 205 206int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0; 207int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0; 208int64_t compressor_bytes_used __attribute__((aligned(8))) = 0; 209uint64_t compressor_kvspace_used __attribute__((aligned(8))) = 0; 210uint64_t compressor_kvwaste_limit = 0; 211 212static boolean_t compressor_needs_to_swap(void); 213static void vm_compressor_swap_trigger_thread(void); 214static void vm_compressor_do_delayed_compactions(boolean_t); 215static void vm_compressor_compact_and_swap(boolean_t); 216static void vm_compressor_age_swapped_in_segments(boolean_t); 217 218boolean_t vm_compressor_low_on_space(void); 219 220void compute_swapout_target_age(void); 221 222boolean_t c_seg_major_compact(c_segment_t, c_segment_t); 223boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t); 224 225int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t); 226int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t); 227void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg); 228void c_seg_need_delayed_compaction(c_segment_t); 229 230void c_seg_move_to_sparse_list(c_segment_t); 231void c_seg_insert_into_q(queue_head_t *, c_segment_t); 232 233boolean_t c_seg_try_free(c_segment_t); 234void c_seg_free(c_segment_t); 235void c_seg_free_locked(c_segment_t); 236 237 238uint64_t vm_available_memory(void); 239uint64_t vm_compressor_pages_compressed(void); 240 241extern unsigned int dp_pages_free, dp_pages_reserve; 242 243uint64_t 244vm_available_memory(void) 245{ 246 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64); 247} 248 249 250uint64_t 251vm_compressor_pages_compressed(void) 252{ 253 return (c_segment_pages_compressed * PAGE_SIZE_64); 254} 255 256 257boolean_t 258vm_compression_available(void) 259{ 260 if ( !(COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) 261 return (FALSE); 262 263 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) 264 return (FALSE); 265 266 return (TRUE); 267} 268 269 270boolean_t 271vm_compressor_low_on_space(void) 272{ 273 if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) || 274 (c_segment_count > c_segments_nearing_limit)) 275 return (TRUE); 276 277 return (FALSE); 278} 279 280 281int 282vm_wants_task_throttled(task_t task) 283{ 284 if (task == kernel_task) 285 return (0); 286 287 if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 288 if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) && 289 (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4)) 290 return (1); 291 } else { 292 if (((dp_pages_free + dp_pages_reserve < 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) && 293 get_task_resident_size(task) > (((AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE) / 5)) 294 return (1); 295 } 296 return (0); 297} 298 299 300void 301vm_compressor_init_locks(void) 302{ 303 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr); 304 lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr); 305 lck_attr_setdefault(&vm_compressor_lck_attr); 306 307 lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 308} 309 310 311void 312vm_decompressor_lock(void) 313{ 314 PAGE_REPLACEMENT_ALLOWED(TRUE); 315 316 decompressions_blocked = TRUE; 317 318 PAGE_REPLACEMENT_ALLOWED(FALSE); 319} 320 321void 322vm_decompressor_unlock(void) 323{ 324 PAGE_REPLACEMENT_ALLOWED(TRUE); 325 326 decompressions_blocked = FALSE; 327 328 PAGE_REPLACEMENT_ALLOWED(FALSE); 329 330 thread_wakeup((event_t)&decompressions_blocked); 331} 332 333 334 335void 336vm_compressor_init(void) 337{ 338 thread_t thread; 339 struct c_slot cs_dummy; 340 c_slot_t cs = &cs_dummy; 341 342 /* 343 * ensure that any pointer that gets created from 344 * the vm_page zone can be packed properly 345 */ 346 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_min_address); 347 348 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_min_address) 349 panic("C_SLOT_UNPACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address); 350 351 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_max_address); 352 353 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_max_address) 354 panic("C_SLOT_UNPACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address); 355 356 357 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE); 358 359 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit)); 360 361 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) { 362 vm_compressor_minorcompact_threshold_divisor = 11; 363 vm_compressor_majorcompact_threshold_divisor = 13; 364 vm_compressor_unthrottle_threshold_divisor = 20; 365 vm_compressor_catchup_threshold_divisor = 35; 366 } else { 367 vm_compressor_minorcompact_threshold_divisor = 20; 368 vm_compressor_majorcompact_threshold_divisor = 25; 369 vm_compressor_unthrottle_threshold_divisor = 35; 370 vm_compressor_catchup_threshold_divisor = 50; 371 } 372 /* 373 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks 374 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can 375 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor. 376 */ 377 378#if __i386__ || __x86_64__ 379 c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); 380#else /* __i386__ || __x86_64__ */ 381 c_list_lock = lck_spin_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); 382#endif /* __i386__ || __x86_64__ */ 383 384#if TRACK_BAD_C_SEGMENTS 385 queue_init(&c_bad_list_head); 386#endif 387 queue_init(&c_age_list_head); 388 queue_init(&c_minor_list_head); 389 queue_init(&c_swapout_list_head); 390 queue_init(&c_swappedin_list_head); 391 queue_init(&c_swappedout_list_head); 392 queue_init(&c_swappedout_sparse_list_head); 393 394 compressor_segment_zone = zinit(sizeof (struct c_segment), 395 128000 * sizeof (struct c_segment), 396 8192, "compressor_segment"); 397 zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE); 398 zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE); 399 400 401 c_free_segno_head = -1; 402 c_segments_available = 0; 403 404 if (vm_compression_limit == 0) { 405 c_segment_pages_compressed_limit = (uint32_t)((max_mem / PAGE_SIZE)) * vm_scale; 406 407#define OLD_SWAP_LIMIT (1024 * 1024 * 16) 408#define MAX_SWAP_LIMIT (1024 * 1024 * 128) 409 410 if (c_segment_pages_compressed_limit > (OLD_SWAP_LIMIT)) 411 c_segment_pages_compressed_limit = OLD_SWAP_LIMIT; 412 413 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE_64)) 414 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE_64); 415 } else { 416 if (vm_compression_limit < MAX_SWAP_LIMIT) 417 c_segment_pages_compressed_limit = vm_compression_limit; 418 else 419 c_segment_pages_compressed_limit = MAX_SWAP_LIMIT; 420 } 421 if ((c_segments_limit = c_segment_pages_compressed_limit / (C_SEG_BUFSIZE / PAGE_SIZE)) > C_SEG_MAX_LIMIT) 422 c_segments_limit = C_SEG_MAX_LIMIT; 423 424 c_segment_pages_compressed_nearing_limit = (c_segment_pages_compressed_limit * 98) / 100; 425 c_segments_nearing_limit = (c_segments_limit * 98) / 100; 426 427 compressor_kvwaste_limit = (vm_map_max(kernel_map) - vm_map_min(kernel_map)) / 16; 428 429 c_segments_busy = FALSE; 430 431 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY) != KERN_SUCCESS) 432 panic("vm_compressor_init: kernel_memory_allocate failed\n"); 433 434 c_segments_next_page = (caddr_t)c_segments; 435 436 { 437 host_basic_info_data_t hinfo; 438 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; 439 440#define BSD_HOST 1 441 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); 442 443 compressor_cpus = hinfo.max_cpus; 444 445 compressor_scratch_bufs = kalloc(compressor_cpus * WKdm_SCRATCH_BUF_SIZE); 446 } 447 448 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL, 449 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { 450 panic("vm_compressor_swap_trigger_thread: create failed"); 451 } 452 thread->options |= TH_OPT_VMPRIV; 453 454 thread_deallocate(thread); 455 456 assert(default_pager_init_flag == 0); 457 458 if (vm_pageout_internal_start() != KERN_SUCCESS) { 459 panic("vm_compressor_init: Failed to start the internal pageout thread.\n"); 460 } 461 462 if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) || 463 (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) { 464 vm_compressor_swap_init(); 465 } 466 467#if CONFIG_FREEZE 468 memorystatus_freeze_enabled = TRUE; 469#endif /* CONFIG_FREEZE */ 470 471 default_pager_init_flag = 1; 472 473 vm_page_reactivate_all_throttled(); 474} 475 476 477#if VALIDATE_C_SEGMENTS 478 479static void 480c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact) 481{ 482 int c_indx; 483 int32_t bytes_used; 484 int32_t bytes_unused; 485 uint32_t c_rounded_size; 486 uint32_t c_size; 487 c_slot_t cs; 488 489 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) { 490 c_indx = c_seg->c_firstemptyslot; 491 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 492 493 if (cs == NULL) 494 panic("c_seg_validate: no slot backing c_firstemptyslot"); 495 496 if (cs->c_size) 497 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size); 498 } 499 bytes_used = 0; 500 bytes_unused = 0; 501 502 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) { 503 504 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 505 506 c_size = UNPACK_C_SIZE(cs); 507 508 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 509 510 bytes_used += c_rounded_size; 511 512#if CHECKSUM_THE_COMPRESSED_DATA 513 if (c_size && cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) 514 panic("compressed data doesn't match original"); 515#endif 516 } 517 518 if (bytes_used != c_seg->c_bytes_used) 519 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used); 520 521 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) 522 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 523 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 524 525 if (must_be_compact) { 526 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) 527 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 528 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 529 } 530} 531 532#endif 533 534 535void 536c_seg_need_delayed_compaction(c_segment_t c_seg) 537{ 538 boolean_t clear_busy = FALSE; 539 540 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 541 C_SEG_BUSY(c_seg); 542 543 lck_mtx_unlock_always(&c_seg->c_lock); 544 lck_mtx_lock_spin_always(c_list_lock); 545 lck_mtx_lock_spin_always(&c_seg->c_lock); 546 547 clear_busy = TRUE; 548 } 549 if (!c_seg->c_on_minorcompact_q && !c_seg->c_ondisk && !c_seg->c_on_swapout_q) { 550 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list); 551 c_seg->c_on_minorcompact_q = 1; 552 c_minor_count++; 553 } 554 lck_mtx_unlock_always(c_list_lock); 555 556 if (clear_busy == TRUE) 557 C_SEG_WAKEUP_DONE(c_seg); 558} 559 560 561unsigned int c_seg_moved_to_sparse_list = 0; 562 563void 564c_seg_move_to_sparse_list(c_segment_t c_seg) 565{ 566 boolean_t clear_busy = FALSE; 567 568 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 569 C_SEG_BUSY(c_seg); 570 571 lck_mtx_unlock_always(&c_seg->c_lock); 572 lck_mtx_lock_spin_always(c_list_lock); 573 lck_mtx_lock_spin_always(&c_seg->c_lock); 574 575 clear_busy = TRUE; 576 } 577 assert(c_seg->c_ondisk); 578 assert(c_seg->c_on_swappedout_q); 579 assert(!c_seg->c_on_swappedout_sparse_q); 580 581 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 582 c_seg->c_on_swappedout_q = 0; 583 c_swappedout_count--; 584 585 c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg); 586 c_seg->c_on_swappedout_sparse_q = 1; 587 c_swappedout_sparse_count++; 588 589 c_seg_moved_to_sparse_list++; 590 591 lck_mtx_unlock_always(c_list_lock); 592 593 if (clear_busy == TRUE) 594 C_SEG_WAKEUP_DONE(c_seg); 595} 596 597 598void 599c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg) 600{ 601 c_segment_t c_seg_next; 602 603 if (queue_empty(qhead)) { 604 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 605 } else { 606 c_seg_next = (c_segment_t)queue_first(qhead); 607 608 while (TRUE) { 609 610 if (c_seg->c_generation_id < c_seg_next->c_generation_id) { 611 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list); 612 break; 613 } 614 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list); 615 616 if (queue_end(qhead, (queue_entry_t) c_seg_next)) { 617 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 618 break; 619 } 620 } 621 } 622} 623 624 625int try_minor_compaction_failed = 0; 626int try_minor_compaction_succeeded = 0; 627 628void 629c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg) 630{ 631 632 assert(c_seg->c_on_minorcompact_q); 633 /* 634 * c_seg is currently on the delayed minor compaction 635 * queue and we have c_seg locked... if we can get the 636 * c_list_lock w/o blocking (if we blocked we could deadlock 637 * because the lock order is c_list_lock then c_seg's lock) 638 * we'll pull it from the delayed list and free it directly 639 */ 640 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 641 /* 642 * c_list_lock is held, we need to bail 643 */ 644 try_minor_compaction_failed++; 645 646 lck_mtx_unlock_always(&c_seg->c_lock); 647 } else { 648 try_minor_compaction_succeeded++; 649 650 C_SEG_BUSY(c_seg); 651 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE); 652 } 653} 654 655 656int 657c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement) 658{ 659 int c_seg_freed; 660 661 assert(c_seg->c_busy); 662 663 if (!c_seg->c_on_minorcompact_q) { 664 if (clear_busy == TRUE) 665 C_SEG_WAKEUP_DONE(c_seg); 666 667 lck_mtx_unlock_always(&c_seg->c_lock); 668 669 return (0); 670 } 671 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); 672 c_seg->c_on_minorcompact_q = 0; 673 c_minor_count--; 674 675 lck_mtx_unlock_always(c_list_lock); 676 677 if (disallow_page_replacement == TRUE) { 678 lck_mtx_unlock_always(&c_seg->c_lock); 679 680 PAGE_REPLACEMENT_DISALLOWED(TRUE); 681 682 lck_mtx_lock_spin_always(&c_seg->c_lock); 683 } 684 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy); 685 686 if (disallow_page_replacement == TRUE) 687 PAGE_REPLACEMENT_DISALLOWED(FALSE); 688 689 if (need_list_lock == TRUE) 690 lck_mtx_lock_spin_always(c_list_lock); 691 692 return (c_seg_freed); 693} 694 695 696void 697c_seg_wait_on_busy(c_segment_t c_seg) 698{ 699 c_seg->c_wanted = 1; 700 assert_wait((event_t) (c_seg), THREAD_UNINT); 701 702 lck_mtx_unlock_always(&c_seg->c_lock); 703 thread_block(THREAD_CONTINUE_NULL); 704} 705 706 707 708int try_free_succeeded = 0; 709int try_free_failed = 0; 710 711boolean_t 712c_seg_try_free(c_segment_t c_seg) 713{ 714 /* 715 * c_seg is currently on the delayed minor compaction 716 * or the spapped out sparse queue and we have c_seg locked... 717 * if we can get the c_list_lock w/o blocking (if we blocked we 718 * could deadlock because the lock order is c_list_lock then c_seg's lock) 719 * we'll pull it from the appropriate queue and free it 720 */ 721 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 722 /* 723 * c_list_lock is held, we need to bail 724 */ 725 try_free_failed++; 726 return (FALSE); 727 } 728 if (c_seg->c_on_minorcompact_q) { 729 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); 730 c_seg->c_on_minorcompact_q = 0; 731 c_minor_count--; 732 } else { 733 assert(c_seg->c_on_swappedout_sparse_q); 734 735 /* 736 * c_seg_free_locked will remove it from the swappedout sparse list 737 */ 738 } 739 if (!c_seg->c_busy_swapping) 740 C_SEG_BUSY(c_seg); 741 742 c_seg_free_locked(c_seg); 743 744 try_free_succeeded++; 745 746 return (TRUE); 747} 748 749 750void 751c_seg_free(c_segment_t c_seg) 752{ 753 assert(c_seg->c_busy); 754 755 lck_mtx_unlock_always(&c_seg->c_lock); 756 lck_mtx_lock_spin_always(c_list_lock); 757 lck_mtx_lock_spin_always(&c_seg->c_lock); 758 759 c_seg_free_locked(c_seg); 760} 761 762 763void 764c_seg_free_locked(c_segment_t c_seg) 765{ 766 int segno, i; 767 int pages_populated; 768 int32_t *c_buffer = NULL; 769 uint64_t c_swap_handle; 770 771 assert(!c_seg->c_on_minorcompact_q); 772 773 if (c_seg->c_on_age_q) { 774 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); 775 c_seg->c_on_age_q = 0; 776 c_age_count--; 777 } else if (c_seg->c_on_swappedin_q) { 778 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 779 c_seg->c_on_swappedin_q = 0; 780 c_swappedin_count--; 781 } else if (c_seg->c_on_swapout_q) { 782 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 783 c_seg->c_on_swapout_q = 0; 784 c_swapout_count--; 785 thread_wakeup((event_t)&compaction_swapper_running); 786 } else if (c_seg->c_on_swappedout_q) { 787 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 788 c_seg->c_on_swappedout_q = 0; 789 c_swappedout_count--; 790 } else if (c_seg->c_on_swappedout_sparse_q) { 791 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 792 c_seg->c_on_swappedout_sparse_q = 0; 793 c_swappedout_sparse_count--; 794 } 795#if TRACK_BAD_C_SEGMENTS 796 else if (c_seg->c_on_bad_q) { 797 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 798 c_seg->c_on_bad_q = 0; 799 c_bad_count--; 800 } 801#endif 802 segno = c_seg->c_mysegno; 803 c_segments[segno].c_segno = c_free_segno_head; 804 c_free_segno_head = segno; 805 c_segment_count--; 806 807 lck_mtx_unlock_always(c_list_lock); 808 809 if (c_seg->c_wanted) { 810 thread_wakeup((event_t) (c_seg)); 811 c_seg->c_wanted = 0; 812 } 813 if (c_seg->c_busy_swapping) { 814 c_seg->c_must_free = 1; 815 816 lck_mtx_unlock_always(&c_seg->c_lock); 817 return; 818 } 819 if (c_seg->c_ondisk == 0) { 820 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 821 822 c_buffer = c_seg->c_store.c_buffer; 823 c_seg->c_store.c_buffer = NULL; 824 } else { 825 /* 826 * Free swap space on disk. 827 */ 828 c_swap_handle = c_seg->c_store.c_swap_handle; 829 c_seg->c_store.c_swap_handle = (uint64_t)-1; 830 } 831 lck_mtx_unlock_always(&c_seg->c_lock); 832 833 if (c_buffer) { 834 if (pages_populated) 835 kernel_memory_depopulate(kernel_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); 836 837 kmem_free(kernel_map, (vm_offset_t) c_buffer, C_SEG_ALLOCSIZE); 838 OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used); 839 840 } else if (c_swap_handle) 841 vm_swap_free(c_swap_handle); 842 843 844#if __i386__ || __x86_64__ 845 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); 846#else /* __i386__ || __x86_64__ */ 847 lck_spin_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); 848#endif /* __i386__ || __x86_64__ */ 849 850 for (i = 0; i < C_SEG_SLOT_ARRAYS; i++) { 851 if (c_seg->c_slots[i] == 0) 852 break; 853 854 kfree((char *)c_seg->c_slots[i], sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); 855 } 856 zfree(compressor_segment_zone, c_seg); 857} 858 859 860int c_seg_trim_page_count = 0; 861 862void 863c_seg_trim_tail(c_segment_t c_seg) 864{ 865 c_slot_t cs; 866 uint32_t c_size; 867 uint32_t c_offset; 868 uint32_t c_rounded_size; 869 uint16_t current_nextslot; 870 uint32_t current_populated_offset; 871 872 if (c_seg->c_bytes_used == 0) 873 return; 874 current_nextslot = c_seg->c_nextslot; 875 current_populated_offset = c_seg->c_populated_offset; 876 877 while (c_seg->c_nextslot) { 878 879 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1)); 880 881 c_size = UNPACK_C_SIZE(cs); 882 883 if (c_size) { 884 if (current_nextslot != c_seg->c_nextslot) { 885 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 886 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size); 887 888 c_seg->c_nextoffset = c_offset; 889 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 890 891 if (c_seg->c_firstemptyslot > c_seg->c_nextslot) 892 c_seg->c_firstemptyslot = c_seg->c_nextslot; 893 894 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) - 895 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE); 896 } 897 break; 898 } 899 c_seg->c_nextslot--; 900 } 901 assert(c_seg->c_nextslot); 902} 903 904 905int 906c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy) 907{ 908 c_slot_mapping_t slot_ptr; 909 uint32_t c_offset = 0; 910 uint32_t old_populated_offset; 911 uint32_t c_rounded_size; 912 uint32_t c_size; 913 int c_indx = 0; 914 int i; 915 c_slot_t c_dst; 916 c_slot_t c_src; 917 boolean_t need_unlock = TRUE; 918 919 assert(c_seg->c_busy); 920 921#if VALIDATE_C_SEGMENTS 922 c_seg_validate(c_seg, FALSE); 923#endif 924 if (c_seg->c_bytes_used == 0) { 925 c_seg_free(c_seg); 926 return (1); 927 } 928 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) 929 goto done; 930 931#if VALIDATE_C_SEGMENTS 932 c_seg->c_was_minor_compacted++; 933#endif 934 c_indx = c_seg->c_firstemptyslot; 935 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 936 937 old_populated_offset = c_seg->c_populated_offset; 938 c_offset = c_dst->c_offset; 939 940 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) { 941 942 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i); 943 944 c_size = UNPACK_C_SIZE(c_src); 945 946 if (c_size == 0) 947 continue; 948 949 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_size); 950 951#if CHECKSUM_THE_DATA 952 c_dst->c_hash_data = c_src->c_hash_data; 953#endif 954#if CHECKSUM_THE_COMPRESSED_DATA 955 c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; 956#endif 957 c_dst->c_size = c_src->c_size; 958 c_dst->c_packed_ptr = c_src->c_packed_ptr; 959 c_dst->c_offset = c_offset; 960 961 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); 962 slot_ptr->s_cindx = c_indx; 963 964 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 965 966 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 967 PACK_C_SIZE(c_src, 0); 968 c_indx++; 969 970 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 971 } 972 c_seg->c_firstemptyslot = c_indx; 973 c_seg->c_nextslot = c_indx; 974 c_seg->c_nextoffset = c_offset; 975 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 976 c_seg->c_bytes_unused = 0; 977 978#if VALIDATE_C_SEGMENTS 979 c_seg_validate(c_seg, TRUE); 980#endif 981 982 if (old_populated_offset > c_seg->c_populated_offset) { 983 uint32_t gc_size; 984 int32_t *gc_ptr; 985 986 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset); 987 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset]; 988 989 lck_mtx_unlock_always(&c_seg->c_lock); 990 991 kernel_memory_depopulate(kernel_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR); 992 993 if (clear_busy == TRUE) 994 lck_mtx_lock_spin_always(&c_seg->c_lock); 995 else 996 need_unlock = FALSE; 997 } 998done: 999 if (need_unlock == TRUE) { 1000 if (clear_busy == TRUE) 1001 C_SEG_WAKEUP_DONE(c_seg); 1002 1003 lck_mtx_unlock_always(&c_seg->c_lock); 1004 } 1005 return (0); 1006} 1007 1008 1009 1010struct { 1011 uint64_t asked_permission; 1012 uint64_t compactions; 1013 uint64_t moved_slots; 1014 uint64_t moved_bytes; 1015 uint64_t wasted_space_in_swapouts; 1016 uint64_t count_of_swapouts; 1017} c_seg_major_compact_stats; 1018 1019 1020#define C_MAJOR_COMPACTION_AGE_APPROPRIATE 30 1021#define C_MAJOR_COMPACTION_OLD_ENOUGH 300 1022#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 80) / 100) 1023 1024 1025boolean_t 1026c_seg_major_compact_ok( 1027 c_segment_t c_seg_dst, 1028 c_segment_t c_seg_src) 1029{ 1030 1031 c_seg_major_compact_stats.asked_permission++; 1032 1033 if (c_seg_src->c_filling) { 1034 /* 1035 * we're at or near the head... don't compact 1036 */ 1037 return (FALSE); 1038 } 1039 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE && 1040 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) 1041 return (FALSE); 1042 1043 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { 1044 /* 1045 * destination segment is full... can't compact 1046 */ 1047 return (FALSE); 1048 } 1049 1050 return (TRUE); 1051} 1052 1053 1054boolean_t 1055c_seg_major_compact( 1056 c_segment_t c_seg_dst, 1057 c_segment_t c_seg_src) 1058{ 1059 c_slot_mapping_t slot_ptr; 1060 uint32_t c_rounded_size; 1061 uint32_t c_size; 1062 uint16_t dst_slot; 1063 int i; 1064 c_slot_t c_dst; 1065 c_slot_t c_src; 1066 int slotarray; 1067 boolean_t keep_compacting = TRUE; 1068 1069 /* 1070 * segments are not locked but they are both marked c_busy 1071 * which keeps c_decompress from working on them... 1072 * we can safely allocate new pages, move compressed data 1073 * from c_seg_src to c_seg_dst and update both c_segment's 1074 * state w/o holding the master lock 1075 */ 1076 1077#if VALIDATE_C_SEGMENTS 1078 c_seg_dst->c_was_major_compacted++; 1079 c_seg_src->c_was_major_donor++; 1080#endif 1081 c_seg_major_compact_stats.compactions++; 1082 1083 dst_slot = c_seg_dst->c_nextslot; 1084 1085 for (i = 0; i < c_seg_src->c_nextslot; i++) { 1086 1087 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i); 1088 1089 c_size = UNPACK_C_SIZE(c_src); 1090 1091 if (c_size == 0) { 1092 /* BATCH: move what we have so far; */ 1093 continue; 1094 } 1095 1096 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) { 1097 /* doesn't fit */ 1098 if ((C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) == C_SEG_BUFSIZE)) { 1099 /* can't fit */ 1100 keep_compacting = FALSE; 1101 break; 1102 } 1103 kernel_memory_populate(kernel_map, 1104 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset], 1105 PAGE_SIZE, 1106 KMA_COMPRESSOR); 1107 1108 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); 1109 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE); 1110 } 1111 1112 slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 1113 1114 if (c_seg_dst->c_slots[slotarray] == 0) { 1115 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); 1116 c_seg_dst->c_slots[slotarray] = (struct c_slot *) 1117 kalloc(sizeof(struct c_slot) * 1118 C_SEG_SLOT_ARRAY_SIZE); 1119 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); 1120 } 1121 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 1122 1123 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size); 1124 1125 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 1126 1127 c_seg_major_compact_stats.moved_slots++; 1128 c_seg_major_compact_stats.moved_bytes += c_size; 1129 1130#if CHECKSUM_THE_DATA 1131 c_dst->c_hash_data = c_src->c_hash_data; 1132#endif 1133#if CHECKSUM_THE_COMPRESSED_DATA 1134 c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; 1135#endif 1136 c_dst->c_size = c_src->c_size; 1137 c_dst->c_packed_ptr = c_src->c_packed_ptr; 1138 c_dst->c_offset = c_seg_dst->c_nextoffset; 1139 1140 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) 1141 c_seg_dst->c_firstemptyslot++; 1142 c_seg_dst->c_nextslot++; 1143 c_seg_dst->c_bytes_used += c_rounded_size; 1144 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 1145 1146 PACK_C_SIZE(c_src, 0); 1147 1148 c_seg_src->c_bytes_used -= c_rounded_size; 1149 c_seg_src->c_bytes_unused += c_rounded_size; 1150 c_seg_src->c_firstemptyslot = 0; 1151 1152 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { 1153 /* dest segment is now full */ 1154 keep_compacting = FALSE; 1155 break; 1156 } 1157 } 1158 if (dst_slot < c_seg_dst->c_nextslot) { 1159 1160 PAGE_REPLACEMENT_ALLOWED(TRUE); 1161 /* 1162 * we've now locked out c_decompress from 1163 * converting the slot passed into it into 1164 * a c_segment_t which allows us to use 1165 * the backptr to change which c_segment and 1166 * index the slot points to 1167 */ 1168 while (dst_slot < c_seg_dst->c_nextslot) { 1169 1170 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot); 1171 1172 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); 1173 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 1174 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1; 1175 slot_ptr->s_cindx = dst_slot++; 1176 } 1177 PAGE_REPLACEMENT_ALLOWED(FALSE); 1178 } 1179 return (keep_compacting); 1180} 1181 1182 1183uint64_t 1184vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) 1185{ 1186 uint64_t end_msecs; 1187 uint64_t start_msecs; 1188 1189 end_msecs = (end_sec * 1000) + end_nsec / 1000000; 1190 start_msecs = (start_sec * 1000) + start_nsec / 1000000; 1191 1192 return (end_msecs - start_msecs); 1193} 1194 1195 1196 1197uint32_t compressor_eval_period_in_msecs = 250; 1198uint32_t compressor_sample_min_in_msecs = 500; 1199uint32_t compressor_sample_max_in_msecs = 10000; 1200uint32_t compressor_thrashing_threshold_per_10msecs = 50; 1201uint32_t compressor_thrashing_min_per_10msecs = 20; 1202 1203/* When true, reset sample data next chance we get. */ 1204static boolean_t compressor_need_sample_reset = FALSE; 1205 1206extern uint32_t vm_page_filecache_min; 1207 1208 1209void 1210compute_swapout_target_age(void) 1211{ 1212 clock_sec_t cur_ts_sec; 1213 clock_nsec_t cur_ts_nsec; 1214 uint32_t min_operations_needed_in_this_sample; 1215 uint64_t elapsed_msecs_in_eval; 1216 uint64_t elapsed_msecs_in_sample; 1217 boolean_t need_eval_reset = FALSE; 1218 1219 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 1220 1221 elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); 1222 1223 if (compressor_need_sample_reset || 1224 elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { 1225 compressor_need_sample_reset = TRUE; 1226 need_eval_reset = TRUE; 1227 goto done; 1228 } 1229 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); 1230 1231 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) 1232 goto done; 1233 need_eval_reset = TRUE; 1234 1235 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0); 1236 1237 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10; 1238 1239 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample || 1240 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) { 1241 1242 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count, 1243 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0); 1244 1245 swapout_target_age = 0; 1246 1247 compressor_need_sample_reset = TRUE; 1248 need_eval_reset = TRUE; 1249 goto done; 1250 } 1251 last_eval_compression_count = sample_period_compression_count; 1252 last_eval_decompression_count = sample_period_decompression_count; 1253 1254 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) { 1255 1256 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0); 1257 goto done; 1258 } 1259 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) { 1260 1261 uint64_t running_total; 1262 uint64_t working_target; 1263 uint64_t aging_target; 1264 uint32_t oldest_age_of_csegs_sampled = 0; 1265 uint64_t working_set_approximation = 0; 1266 1267 swapout_target_age = 0; 1268 1269 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */ 1270 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */ 1271 running_total = 0; 1272 1273 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) { 1274 1275 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1276 1277 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1278 1279 if (running_total >= working_target) 1280 break; 1281 } 1282 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) { 1283 1284 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample; 1285 1286 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) { 1287 1288 running_total = overage_decompressions_during_sample_period; 1289 1290 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) { 1291 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1292 1293 if (running_total >= aging_target) 1294 break; 1295 } 1296 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled; 1297 1298 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0); 1299 } else { 1300 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0); 1301 } 1302 } else 1303 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0); 1304 1305 compressor_need_sample_reset = TRUE; 1306 need_eval_reset = TRUE; 1307 } else 1308 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0); 1309done: 1310 if (compressor_need_sample_reset == TRUE) { 1311 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period)); 1312 overage_decompressions_during_sample_period = 0; 1313 1314 start_of_sample_period_sec = cur_ts_sec; 1315 start_of_sample_period_nsec = cur_ts_nsec; 1316 sample_period_decompression_count = 0; 1317 sample_period_compression_count = 0; 1318 last_eval_decompression_count = 0; 1319 last_eval_compression_count = 0; 1320 compressor_need_sample_reset = FALSE; 1321 } 1322 if (need_eval_reset == TRUE) { 1323 start_of_eval_period_sec = cur_ts_sec; 1324 start_of_eval_period_nsec = cur_ts_nsec; 1325 } 1326} 1327 1328 1329int compaction_swapper_inited = 0; 1330int compaction_swapper_init_now = 0; 1331int compaction_swapper_running = 0; 1332int compaction_swapper_abort = 0; 1333 1334 1335#if CONFIG_JETSAM 1336boolean_t memorystatus_kill_on_VM_thrashing(boolean_t); 1337boolean_t memorystatus_kill_on_FC_thrashing(boolean_t); 1338int compressor_thrashing_induced_jetsam = 0; 1339int filecache_thrashing_induced_jetsam = 0; 1340static boolean_t vm_compressor_thrashing_detected = FALSE; 1341#endif /* CONFIG_JETSAM */ 1342 1343static boolean_t 1344compressor_needs_to_swap(void) 1345{ 1346 boolean_t should_swap = FALSE; 1347 1348 if (vm_swap_up == TRUE) { 1349 if (COMPRESSOR_NEEDS_TO_SWAP()) { 1350 return (TRUE); 1351 } 1352 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) { 1353 return (TRUE); 1354 } 1355 if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) 1356 return (TRUE); 1357 } 1358 compute_swapout_target_age(); 1359 1360 if (swapout_target_age) { 1361 c_segment_t c_seg; 1362 1363 lck_mtx_lock_spin_always(c_list_lock); 1364 1365 if (!queue_empty(&c_age_list_head)) { 1366 1367 c_seg = (c_segment_t) queue_first(&c_age_list_head); 1368 1369 if (c_seg->c_creation_ts > swapout_target_age) 1370 swapout_target_age = 0; 1371 } 1372 lck_mtx_unlock_always(c_list_lock); 1373 } 1374#if CONFIG_PHANTOM_CACHE 1375 if (vm_phantom_cache_check_pressure()) 1376 should_swap = TRUE; 1377#endif 1378 if (swapout_target_age) 1379 should_swap = TRUE; 1380 1381 if (vm_swap_up == FALSE) { 1382 1383 if (should_swap) { 1384#if CONFIG_JETSAM 1385 if (vm_compressor_thrashing_detected == FALSE) { 1386 vm_compressor_thrashing_detected = TRUE; 1387 1388 if (swapout_target_age) { 1389 memorystatus_kill_on_VM_thrashing(TRUE /* async */); 1390 compressor_thrashing_induced_jetsam++; 1391 } else { 1392 memorystatus_kill_on_FC_thrashing(TRUE /* async */); 1393 filecache_thrashing_induced_jetsam++; 1394 } 1395 /* 1396 * let the jetsam take precedence over 1397 * any major compactions we might have 1398 * been able to do... otherwise we run 1399 * the risk of doing major compactions 1400 * on segments we're about to free up 1401 * due to the jetsam activity. 1402 */ 1403 should_swap = FALSE; 1404 } 1405#endif /* CONFIG_JETSAM */ 1406 } else 1407 should_swap = COMPRESSOR_NEEDS_TO_MAJOR_COMPACT(); 1408 } 1409 1410 /* 1411 * returning TRUE when swap_supported == FALSE 1412 * will cause the major compaction engine to 1413 * run, but will not trigger any swapping... 1414 * segments that have been major compacted 1415 * will be moved to the swapped_out_q 1416 * but will not have the c_ondisk flag set 1417 */ 1418 return (should_swap); 1419} 1420 1421#if CONFIG_JETSAM 1422/* 1423 * This function is called from the jetsam thread after killing something to 1424 * mitigate thrashing. 1425 * 1426 * We need to restart our thrashing detection heuristics since memory pressure 1427 * has potentially changed significantly, and we don't want to detect on old 1428 * data from before the jetsam. 1429 */ 1430void 1431vm_thrashing_jetsam_done(void) 1432{ 1433 vm_compressor_thrashing_detected = FALSE; 1434 1435 /* Were we compressor-thrashing or filecache-thrashing? */ 1436 if (swapout_target_age) { 1437 swapout_target_age = 0; 1438 compressor_need_sample_reset = TRUE; 1439 } 1440#if CONFIG_PHANTOM_CACHE 1441 else { 1442 vm_phantom_cache_restart_sample(); 1443 } 1444#endif 1445} 1446#endif /* CONFIG_JETSAM */ 1447 1448uint32_t vm_wake_compactor_swapper_calls = 0; 1449 1450void 1451vm_wake_compactor_swapper(void) 1452{ 1453 boolean_t need_major_compaction = FALSE; 1454 1455 if (compaction_swapper_running) 1456 return; 1457 1458 if (c_minor_count == 0 && need_major_compaction == FALSE) 1459 return; 1460 1461 lck_mtx_lock_spin_always(c_list_lock); 1462 1463 fastwake_warmup = FALSE; 1464 1465 if (compaction_swapper_running == 0) { 1466 vm_wake_compactor_swapper_calls++; 1467 1468 thread_wakeup((event_t)&c_compressor_swap_trigger); 1469 1470 compaction_swapper_running = 1; 1471 } 1472 lck_mtx_unlock_always(c_list_lock); 1473} 1474 1475 1476void 1477vm_consider_waking_compactor_swapper(void) 1478{ 1479 boolean_t need_wakeup = FALSE; 1480 1481 if (compaction_swapper_running) 1482 return; 1483 1484 if (!compaction_swapper_inited && !compaction_swapper_init_now) { 1485 compaction_swapper_init_now = 1; 1486 need_wakeup = TRUE; 1487 } 1488 1489 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) { 1490 1491 need_wakeup = TRUE; 1492 1493 } else if (compressor_needs_to_swap()) { 1494 1495 need_wakeup = TRUE; 1496 1497 } else if (c_minor_count) { 1498 uint64_t total_bytes; 1499 1500 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64; 1501 1502 if ((total_bytes - compressor_bytes_used) > total_bytes / 10) 1503 need_wakeup = TRUE; 1504 } 1505 if (need_wakeup == TRUE) { 1506 1507 lck_mtx_lock_spin_always(c_list_lock); 1508 1509 fastwake_warmup = FALSE; 1510 1511 if (compaction_swapper_running == 0) { 1512 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE); 1513 1514 thread_wakeup((event_t)&c_compressor_swap_trigger); 1515 1516 compaction_swapper_running = 1; 1517 } 1518 lck_mtx_unlock_always(c_list_lock); 1519 } 1520} 1521 1522 1523#define C_SWAPOUT_LIMIT 4 1524#define DELAYED_COMPACTIONS_PER_PASS 30 1525 1526void 1527vm_compressor_do_delayed_compactions(boolean_t flush_all) 1528{ 1529 c_segment_t c_seg; 1530 int number_compacted = 0; 1531 boolean_t needs_to_swap = FALSE; 1532 1533 1534 lck_mtx_assert(c_list_lock, LCK_MTX_ASSERT_OWNED); 1535 1536 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) { 1537 1538 c_seg = (c_segment_t)queue_first(&c_minor_list_head); 1539 1540 lck_mtx_lock_spin_always(&c_seg->c_lock); 1541 1542 if (c_seg->c_busy) { 1543 1544 lck_mtx_unlock_always(c_list_lock); 1545 c_seg_wait_on_busy(c_seg); 1546 lck_mtx_lock_spin_always(c_list_lock); 1547 1548 continue; 1549 } 1550 C_SEG_BUSY(c_seg); 1551 1552 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE); 1553 1554 if (vm_swap_up == TRUE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) { 1555 1556 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT) 1557 needs_to_swap = TRUE; 1558 1559 number_compacted = 0; 1560 } 1561 lck_mtx_lock_spin_always(c_list_lock); 1562 } 1563} 1564 1565 1566#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10 1567 1568static void 1569vm_compressor_age_swapped_in_segments(boolean_t flush_all) 1570{ 1571 c_segment_t c_seg; 1572 clock_sec_t now; 1573 clock_nsec_t nsec; 1574 1575 clock_get_system_nanotime(&now, &nsec); 1576 1577 while (!queue_empty(&c_swappedin_list_head)) { 1578 1579 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head); 1580 1581 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) 1582 break; 1583 1584 lck_mtx_lock_spin_always(&c_seg->c_lock); 1585 1586 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 1587 c_seg->c_on_swappedin_q = 0; 1588 c_swappedin_count--; 1589 1590 c_seg_insert_into_q(&c_age_list_head, c_seg); 1591 c_seg->c_on_age_q = 1; 1592 c_age_count++; 1593 1594 lck_mtx_unlock_always(&c_seg->c_lock); 1595 } 1596} 1597 1598 1599void 1600vm_compressor_flush(void) 1601{ 1602 uint64_t vm_swap_put_failures_at_start; 1603 wait_result_t wait_result = 0; 1604 AbsoluteTime startTime, endTime; 1605 clock_sec_t now_sec; 1606 clock_nsec_t now_nsec; 1607 uint64_t nsec; 1608 1609 HIBLOG("vm_compressor_flush - starting\n"); 1610 1611 clock_get_uptime(&startTime); 1612 1613 lck_mtx_lock_spin_always(c_list_lock); 1614 1615 fastwake_warmup = FALSE; 1616 compaction_swapper_abort = 1; 1617 1618 while (compaction_swapper_running) { 1619 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); 1620 1621 lck_mtx_unlock_always(c_list_lock); 1622 1623 thread_block(THREAD_CONTINUE_NULL); 1624 1625 lck_mtx_lock_spin_always(c_list_lock); 1626 } 1627 compaction_swapper_abort = 0; 1628 compaction_swapper_running = 1; 1629 1630 hibernate_flushing = TRUE; 1631 hibernate_no_swapspace = FALSE; 1632 c_generation_id_flush_barrier = c_generation_id + 1000; 1633 1634 clock_get_system_nanotime(&now_sec, &now_nsec); 1635 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE; 1636 1637 vm_swap_put_failures_at_start = vm_swap_put_failures; 1638 1639 vm_compressor_compact_and_swap(TRUE); 1640 1641 while (!queue_empty(&c_swapout_list_head)) { 1642 1643 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC); 1644 1645 lck_mtx_unlock_always(c_list_lock); 1646 1647 wait_result = thread_block(THREAD_CONTINUE_NULL); 1648 1649 lck_mtx_lock_spin_always(c_list_lock); 1650 1651 if (wait_result == THREAD_TIMED_OUT) 1652 break; 1653 } 1654 hibernate_flushing = FALSE; 1655 compaction_swapper_running = 0; 1656 1657 if (vm_swap_put_failures > vm_swap_put_failures_at_start) 1658 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n", 1659 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT); 1660 1661 lck_mtx_unlock_always(c_list_lock); 1662 1663 clock_get_uptime(&endTime); 1664 SUB_ABSOLUTETIME(&endTime, &startTime); 1665 absolutetime_to_nanoseconds(endTime, &nsec); 1666 1667 HIBLOG("vm_compressor_flush completed - took %qd msecs\n", nsec / 1000000ULL); 1668} 1669 1670 1671extern void vm_swap_file_set_tuneables(void); 1672int compaction_swap_trigger_thread_awakened = 0; 1673 1674 1675static void 1676vm_compressor_swap_trigger_thread(void) 1677{ 1678 /* 1679 * compaction_swapper_init_now is set when the first call to 1680 * vm_consider_waking_compactor_swapper is made from 1681 * vm_pageout_scan... since this function is called upon 1682 * thread creation, we want to make sure to delay adjusting 1683 * the tuneables until we are awakened via vm_pageout_scan 1684 * so that we are at a point where the vm_swapfile_open will 1685 * be operating on the correct directory (in case the default 1686 * of /var/vm/ is overridden by the dymanic_pager 1687 */ 1688 if (compaction_swapper_init_now && !compaction_swapper_inited) { 1689 if (vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) 1690 vm_swap_file_set_tuneables(); 1691 1692 compaction_swapper_inited = 1; 1693 } 1694 lck_mtx_lock_spin_always(c_list_lock); 1695 1696 compaction_swap_trigger_thread_awakened++; 1697 1698 vm_compressor_compact_and_swap(FALSE); 1699 1700 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT); 1701 1702 compaction_swapper_running = 0; 1703 thread_wakeup((event_t)&compaction_swapper_running); 1704 1705 lck_mtx_unlock_always(c_list_lock); 1706 1707 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread); 1708 1709 /* NOTREACHED */ 1710} 1711 1712 1713void 1714vm_compressor_record_warmup_start(void) 1715{ 1716 c_segment_t c_seg; 1717 1718 lck_mtx_lock_spin_always(c_list_lock); 1719 1720 if (first_c_segment_to_warm_generation_id == 0) { 1721 if (!queue_empty(&c_age_list_head)) { 1722 1723 c_seg = (c_segment_t)queue_last(&c_age_list_head); 1724 1725 first_c_segment_to_warm_generation_id = c_seg->c_generation_id; 1726 } else 1727 first_c_segment_to_warm_generation_id = 0; 1728 1729 fastwake_recording_in_progress = TRUE; 1730 } 1731 lck_mtx_unlock_always(c_list_lock); 1732} 1733 1734 1735void 1736vm_compressor_record_warmup_end(void) 1737{ 1738 c_segment_t c_seg; 1739 1740 lck_mtx_lock_spin_always(c_list_lock); 1741 1742 if (fastwake_recording_in_progress == TRUE) { 1743 1744 if (!queue_empty(&c_age_list_head)) { 1745 1746 c_seg = (c_segment_t)queue_last(&c_age_list_head); 1747 1748 last_c_segment_to_warm_generation_id = c_seg->c_generation_id; 1749 } else 1750 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id; 1751 1752 fastwake_recording_in_progress = FALSE; 1753 1754 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 1755 } 1756 lck_mtx_unlock_always(c_list_lock); 1757} 1758 1759 1760#define DELAY_TRIM_ON_WAKE_SECS 4 1761 1762void 1763vm_compressor_delay_trim(void) 1764{ 1765 clock_sec_t sec; 1766 clock_nsec_t nsec; 1767 1768 clock_get_system_nanotime(&sec, &nsec); 1769 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS; 1770} 1771 1772 1773void 1774vm_compressor_do_warmup(void) 1775{ 1776 lck_mtx_lock_spin_always(c_list_lock); 1777 1778 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) { 1779 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 1780 1781 lck_mtx_unlock_always(c_list_lock); 1782 return; 1783 } 1784 1785 if (compaction_swapper_running == 0) { 1786 1787 fastwake_warmup = TRUE; 1788 compaction_swapper_running = 1; 1789 thread_wakeup((event_t)&c_compressor_swap_trigger); 1790 } 1791 lck_mtx_unlock_always(c_list_lock); 1792} 1793 1794 1795void 1796do_fastwake_warmup(void) 1797{ 1798 uint64_t my_thread_id; 1799 c_segment_t c_seg = NULL; 1800 AbsoluteTime startTime, endTime; 1801 uint64_t nsec; 1802 1803 1804 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 1805 1806 clock_get_uptime(&startTime); 1807 1808 lck_mtx_unlock_always(c_list_lock); 1809 1810 my_thread_id = current_thread()->thread_id; 1811 proc_set_task_policy_thread(kernel_task, my_thread_id, 1812 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); 1813 1814 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1815 1816 lck_mtx_lock_spin_always(c_list_lock); 1817 1818 while (!queue_empty(&c_swappedout_list_head) && fastwake_warmup == TRUE) { 1819 1820 c_seg = (c_segment_t) queue_first(&c_swappedout_list_head); 1821 1822 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id || 1823 c_seg->c_generation_id > last_c_segment_to_warm_generation_id) 1824 break; 1825 1826 lck_mtx_lock_spin_always(&c_seg->c_lock); 1827 lck_mtx_unlock_always(c_list_lock); 1828 1829 if (c_seg->c_busy) { 1830 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1831 c_seg_wait_on_busy(c_seg); 1832 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1833 } else { 1834 c_seg_swapin(c_seg, TRUE); 1835 1836 lck_mtx_unlock_always(&c_seg->c_lock); 1837 c_segment_warmup_count++; 1838 1839 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1840 vm_pageout_io_throttle(); 1841 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1842 } 1843 lck_mtx_lock_spin_always(c_list_lock); 1844 } 1845 lck_mtx_unlock_always(c_list_lock); 1846 1847 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1848 1849 proc_set_task_policy_thread(kernel_task, my_thread_id, 1850 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0); 1851 1852 clock_get_uptime(&endTime); 1853 SUB_ABSOLUTETIME(&endTime, &startTime); 1854 absolutetime_to_nanoseconds(endTime, &nsec); 1855 1856 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL); 1857 1858 lck_mtx_lock_spin_always(c_list_lock); 1859 1860 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 1861} 1862 1863 1864void 1865vm_compressor_compact_and_swap(boolean_t flush_all) 1866{ 1867 c_segment_t c_seg, c_seg_next; 1868 boolean_t keep_compacting; 1869 1870 1871 if (fastwake_warmup == TRUE) { 1872 uint64_t starting_warmup_count; 1873 1874 starting_warmup_count = c_segment_warmup_count; 1875 1876 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count, 1877 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0); 1878 do_fastwake_warmup(); 1879 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0); 1880 1881 fastwake_warmup = FALSE; 1882 } 1883 1884 /* 1885 * it's possible for the c_age_list_head to be empty if we 1886 * hit our limits for growing the compressor pool and we subsequently 1887 * hibernated... on the next hibernation we could see the queue as 1888 * empty and not proceeed even though we have a bunch of segments on 1889 * the swapped in queue that need to be dealt with. 1890 */ 1891 vm_compressor_do_delayed_compactions(flush_all); 1892 1893 vm_compressor_age_swapped_in_segments(flush_all); 1894 1895 1896 while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) { 1897 1898 if (hibernate_flushing == TRUE) { 1899 clock_sec_t sec; 1900 clock_nsec_t nsec; 1901 1902 if (hibernate_should_abort()) { 1903 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n"); 1904 break; 1905 } 1906 if (hibernate_no_swapspace == TRUE) { 1907 HIBLOG("vm_compressor_flush - out of swap space\n"); 1908 break; 1909 } 1910 clock_get_system_nanotime(&sec, &nsec); 1911 1912 if (sec > hibernate_flushing_deadline) { 1913 HIBLOG("vm_compressor_flush - failed to finish before deadline\n"); 1914 break; 1915 } 1916 } 1917 if (c_swapout_count >= C_SWAPOUT_LIMIT) { 1918 1919 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC); 1920 1921 lck_mtx_unlock_always(c_list_lock); 1922 1923 thread_block(THREAD_CONTINUE_NULL); 1924 1925 lck_mtx_lock_spin_always(c_list_lock); 1926 } 1927 /* 1928 * Minor compactions 1929 */ 1930 vm_compressor_do_delayed_compactions(flush_all); 1931 1932 vm_compressor_age_swapped_in_segments(flush_all); 1933 1934 if (c_swapout_count >= C_SWAPOUT_LIMIT) { 1935 /* 1936 * we timed out on the above thread_block 1937 * let's loop around and try again 1938 * the timeout allows us to continue 1939 * to do minor compactions to make 1940 * more memory available 1941 */ 1942 continue; 1943 } 1944 1945 /* 1946 * Swap out segments? 1947 */ 1948 if (flush_all == FALSE) { 1949 boolean_t needs_to_swap; 1950 1951 lck_mtx_unlock_always(c_list_lock); 1952 1953 needs_to_swap = compressor_needs_to_swap(); 1954 1955 lck_mtx_lock_spin_always(c_list_lock); 1956 1957 if (needs_to_swap == FALSE) 1958 break; 1959 } 1960 if (queue_empty(&c_age_list_head)) 1961 break; 1962 c_seg = (c_segment_t) queue_first(&c_age_list_head); 1963 1964 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) 1965 break; 1966 1967 if (c_seg->c_filling) { 1968 /* 1969 * we're at or near the head... no more work to do 1970 */ 1971 break; 1972 } 1973 lck_mtx_lock_spin_always(&c_seg->c_lock); 1974 1975 if (c_seg->c_busy) { 1976 1977 lck_mtx_unlock_always(c_list_lock); 1978 c_seg_wait_on_busy(c_seg); 1979 lck_mtx_lock_spin_always(c_list_lock); 1980 1981 continue; 1982 } 1983 C_SEG_BUSY(c_seg); 1984 1985 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) { 1986 /* 1987 * found an empty c_segment and freed it 1988 * so go grab the next guy in the queue 1989 */ 1990 continue; 1991 } 1992 /* 1993 * Major compaction 1994 */ 1995 keep_compacting = TRUE; 1996 1997 while (keep_compacting == TRUE) { 1998 1999 assert(c_seg->c_busy); 2000 2001 /* look for another segment to consolidate */ 2002 2003 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list); 2004 2005 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next)) 2006 break; 2007 2008 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE) 2009 break; 2010 2011 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 2012 2013 if (c_seg_next->c_busy) { 2014 2015 lck_mtx_unlock_always(c_list_lock); 2016 c_seg_wait_on_busy(c_seg_next); 2017 lck_mtx_lock_spin_always(c_list_lock); 2018 2019 continue; 2020 } 2021 /* grab that segment */ 2022 C_SEG_BUSY(c_seg_next); 2023 2024 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) { 2025 /* 2026 * found an empty c_segment and freed it 2027 * so we can't continue to use c_seg_next 2028 */ 2029 continue; 2030 } 2031 2032 /* unlock the list ... */ 2033 lck_mtx_unlock_always(c_list_lock); 2034 2035 /* do the major compaction */ 2036 2037 keep_compacting = c_seg_major_compact(c_seg, c_seg_next); 2038 2039 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2040 2041 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 2042 /* 2043 * run a minor compaction on the donor segment 2044 * since we pulled at least some of it's 2045 * data into our target... if we've emptied 2046 * it, now is a good time to free it which 2047 * c_seg_minor_compaction_and_unlock also takes care of 2048 * 2049 * by passing TRUE, we ask for c_busy to be cleared 2050 * and c_wanted to be taken care of 2051 */ 2052 c_seg_minor_compaction_and_unlock(c_seg_next, TRUE); 2053 2054 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2055 2056 /* relock the list */ 2057 lck_mtx_lock_spin_always(c_list_lock); 2058 2059 } /* major compaction */ 2060 2061 c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used; 2062 c_seg_major_compact_stats.count_of_swapouts++; 2063 2064 lck_mtx_lock_spin_always(&c_seg->c_lock); 2065 2066 assert(c_seg->c_busy); 2067 assert(c_seg->c_on_age_q); 2068 assert(!c_seg->c_on_minorcompact_q); 2069 2070 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); 2071 c_seg->c_on_age_q = 0; 2072 c_age_count--; 2073 2074 if (vm_swap_up == TRUE) { 2075 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 2076 c_seg->c_on_swapout_q = 1; 2077 c_swapout_count++; 2078 } else { 2079 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 2080 c_seg->c_on_swappedout_q = 1; 2081 c_swappedout_count++; 2082 } 2083 C_SEG_WAKEUP_DONE(c_seg); 2084 2085 lck_mtx_unlock_always(&c_seg->c_lock); 2086 2087 if (c_swapout_count) { 2088 lck_mtx_unlock_always(c_list_lock); 2089 2090 thread_wakeup((event_t)&c_swapout_list_head); 2091 2092 lck_mtx_lock_spin_always(c_list_lock); 2093 } 2094 } 2095} 2096 2097 2098static uint32_t no_paging_space_action_in_progress = 0; 2099extern void memorystatus_send_low_swap_note(void); 2100 2101 2102static c_segment_t 2103c_seg_allocate(c_segment_t *current_chead) 2104{ 2105 clock_sec_t sec; 2106 clock_nsec_t nsec; 2107 c_segment_t c_seg; 2108 int slotarray; 2109 2110 if ( (c_seg = *current_chead) == NULL ) { 2111 uint32_t c_segno; 2112 2113 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { 2114 2115 if (no_paging_space_action_in_progress == 0) { 2116 2117 if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) { 2118 2119 if (no_paging_space_action()) { 2120 memorystatus_send_low_swap_note(); 2121 } 2122 2123 no_paging_space_action_in_progress = 0; 2124 } 2125 } 2126 } 2127 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START, 0, 0, 0, 0, 0); 2128 2129 lck_mtx_lock_spin_always(c_list_lock); 2130 2131 while (c_segments_busy == TRUE) { 2132 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT); 2133 2134 lck_mtx_unlock_always(c_list_lock); 2135 2136 thread_block(THREAD_CONTINUE_NULL); 2137 2138 lck_mtx_lock_spin_always(c_list_lock); 2139 } 2140 if (c_free_segno_head == (uint32_t)-1) { 2141 2142 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) { 2143 lck_mtx_unlock_always(c_list_lock); 2144 2145 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 1, 0); 2146 return (NULL); 2147 } 2148 c_segments_busy = TRUE; 2149 lck_mtx_unlock_always(c_list_lock); 2150 2151 kernel_memory_populate(kernel_map, (vm_offset_t)c_segments_next_page, PAGE_SIZE, KMA_KOBJECT); 2152 c_segments_next_page += PAGE_SIZE; 2153 2154 for (c_segno = c_segments_available + 1; c_segno < (c_segments_available + C_SEGMENTS_PER_PAGE); c_segno++) 2155 c_segments[c_segno - 1].c_segno = c_segno; 2156 2157 lck_mtx_lock_spin_always(c_list_lock); 2158 2159 c_segments[c_segno - 1].c_segno = c_free_segno_head; 2160 c_free_segno_head = c_segments_available; 2161 c_segments_available += C_SEGMENTS_PER_PAGE; 2162 2163 c_segments_busy = FALSE; 2164 thread_wakeup((event_t) (&c_segments_busy)); 2165 } 2166 c_segno = c_free_segno_head; 2167 c_free_segno_head = c_segments[c_segno].c_segno; 2168 2169 lck_mtx_unlock_always(c_list_lock); 2170 2171 c_seg = (c_segment_t)zalloc(compressor_segment_zone); 2172 bzero((char *)c_seg, sizeof(struct c_segment)); 2173 2174 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_seg->c_store.c_buffer), C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) { 2175 zfree(compressor_segment_zone, c_seg); 2176 2177 lck_mtx_lock_spin_always(c_list_lock); 2178 2179 c_segments[c_segno].c_segno = c_free_segno_head; 2180 c_free_segno_head = c_segno; 2181 2182 lck_mtx_unlock_always(c_list_lock); 2183 2184 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 2, 0); 2185 2186 return (NULL); 2187 } 2188 OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); 2189 2190#if __i386__ || __x86_64__ 2191 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 2192#else /* __i386__ || __x86_64__ */ 2193 lck_spin_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 2194#endif /* __i386__ || __x86_64__ */ 2195 2196 kernel_memory_populate(kernel_map, (vm_offset_t)(c_seg->c_store.c_buffer), 3 * PAGE_SIZE, KMA_COMPRESSOR); 2197 2198 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(3 * PAGE_SIZE); 2199 c_seg->c_firstemptyslot = C_SLOT_MAX; 2200 c_seg->c_mysegno = c_segno; 2201 c_seg->c_filling = 1; 2202 2203 lck_mtx_lock_spin_always(c_list_lock); 2204 2205 c_segment_count++; 2206 c_segments[c_segno].c_seg = c_seg; 2207 2208 c_seg->c_generation_id = c_generation_id++; 2209 2210 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list); 2211 c_seg->c_on_age_q = 1; 2212 c_age_count++; 2213 2214 lck_mtx_unlock_always(c_list_lock); 2215 2216 clock_get_system_nanotime(&sec, &nsec); 2217 c_seg->c_creation_ts = (uint32_t)sec; 2218 2219 *current_chead = c_seg; 2220 2221 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, c_seg, 0, 0, 3, 0); 2222 } 2223 slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg, c_seg->c_nextslot); 2224 2225 if (c_seg->c_slots[slotarray] == 0) { 2226 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); 2227 2228 c_seg->c_slots[slotarray] = (struct c_slot *)kalloc(sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); 2229 2230 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); 2231 } 2232 2233 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2234 2235 lck_mtx_lock_spin_always(&c_seg->c_lock); 2236 2237 return (c_seg); 2238} 2239 2240 2241 2242static void 2243c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead) 2244{ 2245 uint32_t unused_bytes; 2246 uint32_t offset_to_depopulate; 2247 2248 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset)); 2249 2250 if (unused_bytes) { 2251 2252 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))); 2253 2254 /* 2255 * release the extra physical page(s) at the end of the segment 2256 */ 2257 lck_mtx_unlock_always(&c_seg->c_lock); 2258 2259 kernel_memory_depopulate( 2260 kernel_map, 2261 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate], 2262 unused_bytes, 2263 KMA_COMPRESSOR); 2264 2265 lck_mtx_lock_spin_always(&c_seg->c_lock); 2266 2267 c_seg->c_populated_offset = offset_to_depopulate; 2268 } 2269 c_seg->c_filling = 0; 2270 2271 if (C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) 2272 c_seg_need_delayed_compaction(c_seg); 2273 2274 lck_mtx_unlock_always(&c_seg->c_lock); 2275 2276 *current_chead = NULL; 2277} 2278 2279 2280/* 2281 * returns with c_seg locked 2282 */ 2283void 2284c_seg_swapin_requeue(c_segment_t c_seg) 2285{ 2286 clock_sec_t sec; 2287 clock_nsec_t nsec; 2288 2289 clock_get_system_nanotime(&sec, &nsec); 2290 2291 lck_mtx_lock_spin_always(c_list_lock); 2292 lck_mtx_lock_spin_always(&c_seg->c_lock); 2293 2294 if (c_seg->c_on_swappedout_q) { 2295 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 2296 c_seg->c_on_swappedout_q = 0; 2297 c_swappedout_count--; 2298 } else { 2299 assert(c_seg->c_on_swappedout_sparse_q); 2300 2301 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 2302 c_seg->c_on_swappedout_sparse_q = 0; 2303 c_swappedout_sparse_count--; 2304 } 2305 if (c_seg->c_store.c_buffer) { 2306 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 2307 c_seg->c_on_swappedin_q = 1; 2308 c_swappedin_count++; 2309 } 2310#if TRACK_BAD_C_SEGMENTS 2311 else { 2312 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 2313 c_seg->c_on_bad_q = 1; 2314 c_bad_count++; 2315 } 2316#endif 2317 c_seg->c_swappedin_ts = (uint32_t)sec; 2318 c_seg->c_ondisk = 0; 2319 c_seg->c_was_swapped_in = 1; 2320 2321 lck_mtx_unlock_always(c_list_lock); 2322} 2323 2324 2325 2326/* 2327 * c_seg has to be locked and is returned locked. 2328 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE 2329 */ 2330 2331void 2332c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) 2333{ 2334 vm_offset_t addr = 0; 2335 uint32_t io_size = 0; 2336 uint64_t f_offset; 2337 2338#if !CHECKSUM_THE_SWAP 2339 if (c_seg->c_ondisk) 2340 c_seg_trim_tail(c_seg); 2341#endif 2342 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 2343 f_offset = c_seg->c_store.c_swap_handle; 2344 2345 C_SEG_BUSY(c_seg); 2346 lck_mtx_unlock_always(&c_seg->c_lock); 2347 2348 if (c_seg->c_ondisk) { 2349 2350 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2351 2352 if (kernel_memory_allocate(kernel_map, &addr, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) 2353 panic("c_seg_swapin: kernel_memory_allocate failed\n"); 2354 2355 kernel_memory_populate(kernel_map, addr, io_size, KMA_COMPRESSOR); 2356 2357 if (vm_swap_get(addr, f_offset, io_size) != KERN_SUCCESS) { 2358 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2359 2360 kernel_memory_depopulate(kernel_map, addr, io_size, KMA_COMPRESSOR); 2361 kmem_free(kernel_map, addr, C_SEG_ALLOCSIZE); 2362 2363 c_seg->c_store.c_buffer = (int32_t*) NULL; 2364 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); 2365 } else { 2366 c_seg->c_store.c_buffer = (int32_t*) addr; 2367#if ENCRYPTED_SWAP 2368 vm_swap_decrypt(c_seg); 2369#endif /* ENCRYPTED_SWAP */ 2370 2371#if CHECKSUM_THE_SWAP 2372 if (c_seg->cseg_swap_size != io_size) 2373 panic("swapin size doesn't match swapout size"); 2374 2375 if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) { 2376 panic("c_seg_swapin - Swap hash mismatch\n"); 2377 } 2378#endif /* CHECKSUM_THE_SWAP */ 2379 2380 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2381 2382 if (force_minor_compaction == TRUE) { 2383 lck_mtx_lock_spin_always(&c_seg->c_lock); 2384 2385 c_seg_minor_compaction_and_unlock(c_seg, FALSE); 2386 } 2387 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); 2388 OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); 2389 } 2390 } 2391 c_seg_swapin_requeue(c_seg); 2392 2393 C_SEG_WAKEUP_DONE(c_seg); 2394} 2395 2396 2397static int 2398c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf) 2399{ 2400 int c_size; 2401 int c_rounded_size; 2402 int max_csize; 2403 c_slot_t cs; 2404 c_segment_t c_seg; 2405 2406 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0); 2407retry: 2408 if ((c_seg = c_seg_allocate(current_chead)) == NULL) 2409 return (1); 2410 /* 2411 * returns with c_seg lock held 2412 * and PAGE_REPLACEMENT_DISALLOWED(TRUE) 2413 */ 2414 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot); 2415 2416 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr); 2417 assert(slot_ptr == (c_slot_mapping_t)C_SLOT_UNPACK_PTR(cs)); 2418 2419 cs->c_offset = c_seg->c_nextoffset; 2420 2421 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset); 2422 2423 if (max_csize > PAGE_SIZE) 2424 max_csize = PAGE_SIZE; 2425 2426 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - 2427 c_seg->c_nextoffset) 2428 < (unsigned) max_csize + PAGE_SIZE && 2429 (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) 2430 < C_SEG_ALLOCSIZE)) { 2431 lck_mtx_unlock_always(&c_seg->c_lock); 2432 2433 kernel_memory_populate(kernel_map, 2434 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset], 2435 PAGE_SIZE, 2436 KMA_COMPRESSOR); 2437 2438 lck_mtx_lock_spin_always(&c_seg->c_lock); 2439 2440 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); 2441 } 2442 2443#if CHECKSUM_THE_DATA 2444 cs->c_hash_data = hash_string(src, PAGE_SIZE); 2445#endif 2446 2447 c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 2448 (WK_word *)(uintptr_t)scratch_buf, max_csize - 4); 2449 assert(c_size <= (max_csize - 4) && c_size >= -1); 2450 2451 if (c_size == -1) { 2452 2453 if (max_csize < PAGE_SIZE) { 2454 c_current_seg_filled(c_seg, current_chead); 2455 2456 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2457 2458 goto retry; 2459 } 2460 c_size = PAGE_SIZE; 2461 2462 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size); 2463 } 2464#if CHECKSUM_THE_COMPRESSED_DATA 2465 cs->c_hash_compressed_data = hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size); 2466#endif 2467 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 2468 2469 PACK_C_SIZE(cs, c_size); 2470 c_seg->c_bytes_used += c_rounded_size; 2471 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 2472 2473 slot_ptr->s_cindx = c_seg->c_nextslot++; 2474 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 2475 slot_ptr->s_cseg = c_seg->c_mysegno + 1; 2476 2477 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX) 2478 c_current_seg_filled(c_seg, current_chead); 2479 else 2480 lck_mtx_unlock_always(&c_seg->c_lock); 2481 2482 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2483 2484 OSAddAtomic64(c_rounded_size, &compressor_bytes_used); 2485 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes); 2486 OSAddAtomic64(c_size, &c_segment_compressed_bytes); 2487 2488 OSAddAtomic(1, &c_segment_pages_compressed); 2489 OSAddAtomic(1, &sample_period_compression_count); 2490 2491 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0); 2492 2493 return (0); 2494} 2495 2496 2497static int 2498c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot) 2499{ 2500 c_slot_t cs; 2501 c_segment_t c_seg; 2502 int c_indx; 2503 int c_rounded_size; 2504 uint32_t c_size; 2505 int retval = 0; 2506 boolean_t c_seg_has_data = TRUE; 2507 boolean_t c_seg_swappedin = FALSE; 2508 boolean_t need_unlock = TRUE; 2509 boolean_t consider_defragmenting = FALSE; 2510 2511ReTry: 2512 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2513 2514#if HIBERNATION 2515 /* 2516 * if hibernation is enabled, it indicates (via a call 2517 * to 'vm_decompressor_lock' that no further 2518 * decompressions are allowed once it reaches 2519 * the point of flushing all of the currently dirty 2520 * anonymous memory through the compressor and out 2521 * to disk... in this state we allow freeing of compressed 2522 * pages and must honor the C_DONT_BLOCK case 2523 */ 2524 if (dst && decompressions_blocked == TRUE) { 2525 if (flags & C_DONT_BLOCK) { 2526 2527 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2528 2529 *zeroslot = 0; 2530 return (-2); 2531 } 2532 /* 2533 * it's safe to atomically assert and block behind the 2534 * lock held in shared mode because "decompressions_blocked" is 2535 * only set and cleared and the thread_wakeup done when the lock 2536 * is held exclusively 2537 */ 2538 assert_wait((event_t)&decompressions_blocked, THREAD_UNINT); 2539 2540 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2541 2542 thread_block(THREAD_CONTINUE_NULL); 2543 2544 goto ReTry; 2545 } 2546#endif 2547 /* s_cseg is actually "segno+1" */ 2548 c_seg = c_segments[slot_ptr->s_cseg - 1].c_seg; 2549 2550 lck_mtx_lock_spin_always(&c_seg->c_lock); 2551 2552 if (flags & C_DONT_BLOCK) { 2553 if (c_seg->c_busy || (c_seg->c_ondisk && dst)) { 2554 2555 retval = -2; 2556 *zeroslot = 0; 2557 2558 goto done; 2559 } 2560 } 2561 if (c_seg->c_busy) { 2562 2563 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2564 2565 c_seg_wait_on_busy(c_seg); 2566 2567 goto ReTry; 2568 } 2569 c_indx = slot_ptr->s_cindx; 2570 2571 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 2572 2573 c_size = UNPACK_C_SIZE(cs); 2574 2575 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 2576 2577 if (dst) { 2578 uint32_t age_of_cseg; 2579 clock_sec_t cur_ts_sec; 2580 clock_nsec_t cur_ts_nsec; 2581 2582 if (c_seg->c_on_swappedout_q || c_seg->c_on_swappedout_sparse_q) { 2583 if (c_seg->c_ondisk) 2584 c_seg_swappedin = TRUE; 2585 c_seg_swapin(c_seg, FALSE); 2586 } 2587 if (c_seg->c_store.c_buffer == NULL) { 2588 c_seg_has_data = FALSE; 2589 goto c_seg_invalid_data; 2590 } 2591#if CHECKSUM_THE_COMPRESSED_DATA 2592 if (cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) 2593 panic("compressed data doesn't match original"); 2594#endif 2595 if (c_rounded_size == PAGE_SIZE) { 2596 /* 2597 * page wasn't compressible... just copy it out 2598 */ 2599 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE); 2600 } else { 2601 uint32_t my_cpu_no; 2602 char *scratch_buf; 2603 2604 /* 2605 * we're behind the c_seg lock held in spin mode 2606 * which means pre-emption is disabled... therefore 2607 * the following sequence is atomic and safe 2608 */ 2609 my_cpu_no = cpu_number(); 2610 2611 assert(my_cpu_no < compressor_cpus); 2612 2613 scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE]; 2614 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 2615 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); 2616 } 2617 2618#if CHECKSUM_THE_DATA 2619 if (cs->c_hash_data != hash_string(dst, PAGE_SIZE)) 2620 panic("decompressed data doesn't match original"); 2621#endif 2622 if (!c_seg->c_was_swapped_in) { 2623 2624 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 2625 2626 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts; 2627 2628 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) 2629 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]); 2630 else 2631 OSAddAtomic(1, &overage_decompressions_during_sample_period); 2632 2633 OSAddAtomic(1, &sample_period_decompression_count); 2634 } 2635 } else { 2636 if (c_seg->c_store.c_buffer == NULL) 2637 c_seg_has_data = FALSE; 2638 } 2639c_seg_invalid_data: 2640 2641 if (c_seg_has_data == TRUE) { 2642 if (c_seg_swappedin == TRUE) 2643 retval = 1; 2644 else 2645 retval = 0; 2646 } else 2647 retval = -1; 2648 2649 if (flags & C_KEEP) { 2650 *zeroslot = 0; 2651 goto done; 2652 } 2653 c_seg->c_bytes_unused += c_rounded_size; 2654 c_seg->c_bytes_used -= c_rounded_size; 2655 PACK_C_SIZE(cs, 0); 2656 2657 if (c_indx < c_seg->c_firstemptyslot) 2658 c_seg->c_firstemptyslot = c_indx; 2659 2660 OSAddAtomic(-1, &c_segment_pages_compressed); 2661 2662 if (c_seg_has_data == TRUE && !c_seg->c_ondisk) { 2663 /* 2664 * c_ondisk == TRUE can occur when we're doing a 2665 * free of a compressed page (i.e. dst == NULL) 2666 */ 2667 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used); 2668 } 2669 if (!c_seg->c_filling) { 2670 if (c_seg->c_bytes_used == 0) { 2671 if (!c_seg->c_ondisk) { 2672 int pages_populated; 2673 2674 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 2675 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); 2676 2677 if (pages_populated) { 2678 assert(c_seg->c_store.c_buffer != NULL); 2679 2680 C_SEG_BUSY(c_seg); 2681 lck_mtx_unlock_always(&c_seg->c_lock); 2682 2683 kernel_memory_depopulate(kernel_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); 2684 2685 lck_mtx_lock_spin_always(&c_seg->c_lock); 2686 C_SEG_WAKEUP_DONE(c_seg); 2687 } 2688 if (!c_seg->c_on_minorcompact_q && !c_seg->c_on_swapout_q) 2689 c_seg_need_delayed_compaction(c_seg); 2690 } else 2691 assert(c_seg->c_on_swappedout_sparse_q); 2692 2693 } else if (c_seg->c_on_minorcompact_q) { 2694 2695 if (C_SEG_INCORE_IS_SPARSE(c_seg)) { 2696 c_seg_try_minor_compaction_and_unlock(c_seg); 2697 need_unlock = FALSE; 2698 } 2699 } else if (!c_seg->c_ondisk) { 2700 2701 if (c_seg_has_data == TRUE && !c_seg->c_on_swapout_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { 2702 c_seg_need_delayed_compaction(c_seg); 2703 } 2704 } else if (!c_seg->c_on_swappedout_sparse_q && C_SEG_ONDISK_IS_SPARSE(c_seg)) { 2705 2706 c_seg_move_to_sparse_list(c_seg); 2707 consider_defragmenting = TRUE; 2708 } 2709 } 2710done: 2711 if (need_unlock == TRUE) 2712 lck_mtx_unlock_always(&c_seg->c_lock); 2713 2714 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2715 2716 if (consider_defragmenting == TRUE) 2717 vm_swap_consider_defragmenting(); 2718 2719 2720 return (retval); 2721} 2722 2723 2724int 2725vm_compressor_get(ppnum_t pn, int *slot, int flags) 2726{ 2727 char *dst; 2728 int zeroslot = 1; 2729 int retval; 2730 2731#if __x86_64__ 2732 dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); 2733#else 2734#error "unsupported architecture" 2735#endif 2736 2737 retval = c_decompress_page(dst, (c_slot_mapping_t)slot, flags, &zeroslot); 2738 2739 /* 2740 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP) 2741 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set 2742 */ 2743 if (zeroslot) { 2744 *slot = 0; 2745 } 2746 /* 2747 * returns 0 if we successfully decompressed a page from a segment already in memory 2748 * returns 1 if we had to first swap in the segment, before successfully decompressing the page 2749 * returns -1 if we encountered an error swapping in the segment - decompression failed 2750 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set 2751 */ 2752 return (retval); 2753} 2754 2755 2756int 2757vm_compressor_free(int *slot, int flags) 2758{ 2759 int zeroslot = 1; 2760 int retval; 2761 2762 assert(flags == 0 || flags == C_DONT_BLOCK); 2763 2764 retval = c_decompress_page(NULL, (c_slot_mapping_t)slot, flags, &zeroslot); 2765 /* 2766 * returns 0 if we successfully freed the specified compressed page 2767 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set 2768 */ 2769 2770 if (retval == 0) 2771 *slot = 0; 2772 2773 return (retval); 2774} 2775 2776 2777int 2778vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf) 2779{ 2780 char *src; 2781 int retval; 2782 2783#if __x86_64__ 2784 src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); 2785#else 2786#error "unsupported architecture" 2787#endif 2788 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf); 2789 2790 return (retval); 2791} 2792 2793void 2794vm_compressor_transfer( 2795 int *dst_slot_p, 2796 int *src_slot_p) 2797{ 2798 c_slot_mapping_t dst_slot, src_slot; 2799 c_segment_t c_seg; 2800 int c_indx; 2801 c_slot_t cs; 2802 2803 dst_slot = (c_slot_mapping_t) dst_slot_p; 2804 src_slot = (c_slot_mapping_t) src_slot_p; 2805 2806Retry: 2807 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2808 /* get segment for src_slot */ 2809 c_seg = c_segments[src_slot->s_cseg -1].c_seg; 2810 /* lock segment */ 2811 lck_mtx_lock_spin_always(&c_seg->c_lock); 2812 /* wait if it's busy */ 2813 if (c_seg->c_busy) { 2814 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2815 c_seg_wait_on_busy(c_seg); 2816 goto Retry; 2817 } 2818 /* find the c_slot */ 2819 c_indx = src_slot->s_cindx; 2820 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 2821 /* point the c_slot back to dst_slot instead of src_slot */ 2822 cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot); 2823 /* transfer */ 2824 *dst_slot_p = *src_slot_p; 2825 *src_slot_p = 0; 2826 lck_mtx_unlock_always(&c_seg->c_lock); 2827 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2828} 2829