1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <vm/vm_compressor.h> 30#include <vm/vm_map.h> 31#include <vm/vm_pageout.h> 32#include <vm/memory_object.h> 33#include <mach/mach_host.h> /* for host_info() */ 34#include <kern/ledger.h> 35 36#include <default_pager/default_pager_alerts.h> 37#include <default_pager/default_pager_object_server.h> 38 39#include <IOKit/IOHibernatePrivate.h> 40 41/* 42 * vm_compressor_mode has a heirarchy of control to set its value. 43 * boot-args are checked first, then device-tree, and finally 44 * the default value that is defined below. See vm_fault_init() for 45 * the boot-arg & device-tree code. 46 */ 47 48extern ipc_port_t min_pages_trigger_port; 49extern lck_mtx_t paging_segments_lock; 50#define PSL_LOCK() lck_mtx_lock(&paging_segments_lock) 51#define PSL_UNLOCK() lck_mtx_unlock(&paging_segments_lock) 52 53 54int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP; 55int vm_scale = 16; 56 57 58int vm_compression_limit = 0; 59 60extern boolean_t vm_swap_up; 61extern void vm_pageout_io_throttle(void); 62 63#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA 64extern unsigned int hash_string(char *cp, int len); 65#endif 66 67struct c_slot { 68 uint64_t c_offset:C_SEG_OFFSET_BITS, 69 c_size:12, 70 c_packed_ptr:36; 71#if CHECKSUM_THE_DATA 72 unsigned int c_hash_data; 73#endif 74#if CHECKSUM_THE_COMPRESSED_DATA 75 unsigned int c_hash_compressed_data; 76#endif 77 78}; 79 80#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? 4096 : cs->c_size) 81#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size)) 82 83 84struct c_slot_mapping { 85 uint32_t s_cseg:22, /* segment number + 1 */ 86 s_cindx:10; /* index in the segment */ 87}; 88 89typedef struct c_slot_mapping *c_slot_mapping_t; 90 91 92union c_segu { 93 c_segment_t c_seg; 94 uint32_t c_segno; 95}; 96 97 98 99#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) >> 2) 100#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) 101 102 103uint32_t c_segment_count = 0; 104 105uint64_t c_generation_id = 0; 106uint64_t c_generation_id_flush_barrier; 107 108 109#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120 110 111boolean_t hibernate_no_swapspace = FALSE; 112clock_sec_t hibernate_flushing_deadline = 0; 113 114 115#if TRACK_BAD_C_SEGMENTS 116queue_head_t c_bad_list_head; 117uint32_t c_bad_count = 0; 118#endif 119 120queue_head_t c_age_list_head; 121queue_head_t c_swapout_list_head; 122queue_head_t c_swappedin_list_head; 123queue_head_t c_swappedout_list_head; 124queue_head_t c_swappedout_sparse_list_head; 125 126uint32_t c_age_count = 0; 127uint32_t c_swapout_count = 0; 128uint32_t c_swappedin_count = 0; 129uint32_t c_swappedout_count = 0; 130uint32_t c_swappedout_sparse_count = 0; 131 132queue_head_t c_minor_list_head; 133uint32_t c_minor_count = 0; 134 135union c_segu *c_segments; 136caddr_t c_segments_next_page; 137boolean_t c_segments_busy; 138uint32_t c_segments_available; 139uint32_t c_segments_limit; 140uint32_t c_segment_pages_compressed; 141uint32_t c_segment_pages_compressed_limit; 142uint32_t c_free_segno_head = (uint32_t)-1; 143 144uint32_t vm_compressor_minorcompact_threshold_divisor = 10; 145uint32_t vm_compressor_majorcompact_threshold_divisor = 10; 146uint32_t vm_compressor_unthrottle_threshold_divisor = 10; 147uint32_t vm_compressor_catchup_threshold_divisor = 10; 148 149#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu)) 150 151 152lck_grp_attr_t vm_compressor_lck_grp_attr; 153lck_attr_t vm_compressor_lck_attr; 154lck_grp_t vm_compressor_lck_grp; 155 156 157#if __i386__ || __x86_64__ 158lck_mtx_t *c_list_lock; 159#else /* __i386__ || __x86_64__ */ 160lck_spin_t *c_list_lock; 161#endif /* __i386__ || __x86_64__ */ 162 163lck_rw_t c_master_lock; 164lck_rw_t c_decompressor_lock; 165 166zone_t compressor_segment_zone; 167int c_compressor_swap_trigger = 0; 168 169uint32_t compressor_cpus; 170char *compressor_scratch_bufs; 171 172 173clock_sec_t start_of_sample_period_sec = 0; 174clock_nsec_t start_of_sample_period_nsec = 0; 175clock_sec_t start_of_eval_period_sec = 0; 176clock_nsec_t start_of_eval_period_nsec = 0; 177uint32_t sample_period_decompression_count = 0; 178uint32_t sample_period_compression_count = 0; 179uint32_t last_eval_decompression_count = 0; 180uint32_t last_eval_compression_count = 0; 181 182#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30) 183 184uint32_t swapout_target_age = 0; 185uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE]; 186uint32_t overage_decompressions_during_sample_period = 0; 187 188void do_fastwake_warmup(void); 189boolean_t fastwake_warmup = FALSE; 190boolean_t fastwake_recording_in_progress = FALSE; 191clock_sec_t dont_trim_until_ts = 0; 192 193uint64_t c_segment_warmup_count; 194uint64_t first_c_segment_to_warm_generation_id = 0; 195uint64_t last_c_segment_to_warm_generation_id = 0; 196boolean_t hibernate_flushing = FALSE; 197 198int64_t c_segment_input_bytes = 0; 199int64_t c_segment_compressed_bytes = 0; 200int64_t compressor_bytes_used = 0; 201 202static boolean_t compressor_needs_to_swap(void); 203static void vm_compressor_swap_trigger_thread(void); 204static void vm_compressor_do_delayed_compactions(boolean_t); 205static void vm_compressor_compact_and_swap(boolean_t); 206static void vm_compressor_age_swapped_in_segments(boolean_t); 207static uint64_t compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t); 208 209boolean_t vm_compressor_low_on_space(void); 210 211void compute_swapout_target_age(void); 212 213boolean_t c_seg_major_compact(c_segment_t, c_segment_t); 214boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t); 215 216int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t); 217int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t); 218void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg); 219void c_seg_need_delayed_compaction(c_segment_t); 220 221void c_seg_move_to_sparse_list(c_segment_t); 222void c_seg_insert_into_q(queue_head_t *, c_segment_t); 223 224boolean_t c_seg_try_free(c_segment_t); 225void c_seg_free(c_segment_t); 226void c_seg_free_locked(c_segment_t); 227 228 229uint64_t vm_available_memory(void); 230 231extern unsigned int dp_pages_free, dp_pages_reserve; 232 233uint64_t 234vm_available_memory(void) 235{ 236 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64); 237} 238 239 240boolean_t 241vm_compression_available(void) 242{ 243 if ( !(COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) 244 return (FALSE); 245 246 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) 247 return (FALSE); 248 249 return (TRUE); 250} 251 252 253boolean_t 254vm_compressor_low_on_space(void) 255{ 256 if ((c_segment_pages_compressed > (c_segment_pages_compressed_limit - 20000)) || 257 (c_segment_count > (c_segments_limit - 250))) 258 return (TRUE); 259 260 return (FALSE); 261} 262 263 264int 265vm_low_on_space(void) 266{ 267 if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 268 if (vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) 269 return (1); 270 } else { 271 if (((dp_pages_free + dp_pages_reserve < 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))) 272 return (1); 273 } 274 return (0); 275} 276 277 278void 279vm_compressor_init_locks(void) 280{ 281 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr); 282 lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr); 283 lck_attr_setdefault(&vm_compressor_lck_attr); 284 285 lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 286 lck_rw_init(&c_decompressor_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 287} 288 289 290void 291vm_decompressor_lock(void) 292{ 293 lck_rw_lock_exclusive(&c_decompressor_lock); 294} 295 296void 297vm_decompressor_unlock(void) 298{ 299 lck_rw_done(&c_decompressor_lock); 300 301} 302 303 304 305void 306vm_compressor_init(void) 307{ 308 thread_t thread; 309 310 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE); 311 312 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit)); 313 314 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) { 315 vm_compressor_minorcompact_threshold_divisor = 11; 316 vm_compressor_majorcompact_threshold_divisor = 13; 317 vm_compressor_unthrottle_threshold_divisor = 20; 318 vm_compressor_catchup_threshold_divisor = 35; 319 } else { 320 vm_compressor_minorcompact_threshold_divisor = 20; 321 vm_compressor_majorcompact_threshold_divisor = 25; 322 vm_compressor_unthrottle_threshold_divisor = 35; 323 vm_compressor_catchup_threshold_divisor = 50; 324 } 325 /* 326 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks 327 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can 328 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor. 329 */ 330 331#if __i386__ || __x86_64__ 332 c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); 333#else /* __i386__ || __x86_64__ */ 334 c_list_lock = lck_spin_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); 335#endif /* __i386__ || __x86_64__ */ 336 337#if TRACK_BAD_C_SEGMENTS 338 queue_init(&c_bad_list_head); 339#endif 340 queue_init(&c_age_list_head); 341 queue_init(&c_minor_list_head); 342 queue_init(&c_swapout_list_head); 343 queue_init(&c_swappedin_list_head); 344 queue_init(&c_swappedout_list_head); 345 queue_init(&c_swappedout_sparse_list_head); 346 347 compressor_segment_zone = zinit(sizeof (struct c_segment), 348 128000 * sizeof (struct c_segment), 349 8192, "compressor_segment"); 350 zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE); 351 zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE); 352 353 354 c_free_segno_head = -1; 355 c_segments_available = 0; 356 357 if (vm_compression_limit == 0) { 358 c_segment_pages_compressed_limit = (uint32_t)((max_mem / PAGE_SIZE)) * vm_scale; 359 360#define OLD_SWAP_LIMIT (1024 * 1024 * 16) 361#define MAX_SWAP_LIMIT (1024 * 1024 * 128) 362 363 if (c_segment_pages_compressed_limit > (OLD_SWAP_LIMIT)) 364 c_segment_pages_compressed_limit = OLD_SWAP_LIMIT; 365 366 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE_64)) 367 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE_64); 368 } else { 369 if (vm_compression_limit < MAX_SWAP_LIMIT) 370 c_segment_pages_compressed_limit = vm_compression_limit; 371 else 372 c_segment_pages_compressed_limit = MAX_SWAP_LIMIT; 373 } 374 if ((c_segments_limit = c_segment_pages_compressed_limit / (C_SEG_BUFSIZE / PAGE_SIZE)) > C_SEG_MAX_LIMIT) 375 c_segments_limit = C_SEG_MAX_LIMIT; 376 377 c_segments_busy = FALSE; 378 379 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY) != KERN_SUCCESS) 380 panic("vm_compressor_init: kernel_memory_allocate failed\n"); 381 382 c_segments_next_page = (caddr_t)c_segments; 383 384 { 385 host_basic_info_data_t hinfo; 386 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; 387 388#define BSD_HOST 1 389 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); 390 391 compressor_cpus = hinfo.max_cpus; 392 393 compressor_scratch_bufs = kalloc(compressor_cpus * WKdm_SCRATCH_BUF_SIZE); 394 } 395 396 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL, 397 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { 398 panic("vm_compressor_swap_trigger_thread: create failed"); 399 } 400 thread->options |= TH_OPT_VMPRIV; 401 402 thread_deallocate(thread); 403 404 assert(default_pager_init_flag == 0); 405 406 if (vm_pageout_internal_start() != KERN_SUCCESS) { 407 panic("vm_compressor_init: Failed to start the internal pageout thread.\n"); 408 } 409 410#if CONFIG_FREEZE 411 memorystatus_freeze_enabled = TRUE; 412#endif /* CONFIG_FREEZE */ 413 414 default_pager_init_flag = 1; 415 416 vm_page_reactivate_all_throttled(); 417} 418 419 420#if VALIDATE_C_SEGMENTS 421 422static void 423c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact) 424{ 425 int c_indx; 426 int32_t bytes_used; 427 int32_t bytes_unused; 428 uint32_t c_rounded_size; 429 uint32_t c_size; 430 c_slot_t cs; 431 432 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) { 433 c_indx = c_seg->c_firstemptyslot; 434 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 435 436 if (cs == NULL) 437 panic("c_seg_validate: no slot backing c_firstemptyslot"); 438 439 if (cs->c_size) 440 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size); 441 } 442 bytes_used = 0; 443 bytes_unused = 0; 444 445 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) { 446 447 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 448 449 c_size = UNPACK_C_SIZE(cs); 450 451 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 452 453 bytes_used += c_rounded_size; 454 455#if CHECKSUM_THE_COMPRESSED_DATA 456 if (c_size && cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) 457 panic("compressed data doesn't match original"); 458#endif 459 } 460 461 if (bytes_used != c_seg->c_bytes_used) 462 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used); 463 464 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) 465 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 466 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 467 468 if (must_be_compact) { 469 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) 470 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", 471 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); 472 } 473} 474 475#endif 476 477 478void 479c_seg_need_delayed_compaction(c_segment_t c_seg) 480{ 481 boolean_t clear_busy = FALSE; 482 483 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 484 c_seg->c_busy = 1; 485 486 lck_mtx_unlock_always(&c_seg->c_lock); 487 lck_mtx_lock_spin_always(c_list_lock); 488 lck_mtx_lock_spin_always(&c_seg->c_lock); 489 490 clear_busy = TRUE; 491 } 492 if (!c_seg->c_on_minorcompact_q && !c_seg->c_ondisk && !c_seg->c_on_swapout_q) { 493 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list); 494 c_seg->c_on_minorcompact_q = 1; 495 c_minor_count++; 496 } 497 lck_mtx_unlock_always(c_list_lock); 498 499 if (clear_busy == TRUE) 500 C_SEG_WAKEUP_DONE(c_seg); 501} 502 503 504unsigned int c_seg_moved_to_sparse_list = 0; 505 506void 507c_seg_move_to_sparse_list(c_segment_t c_seg) 508{ 509 boolean_t clear_busy = FALSE; 510 511 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 512 c_seg->c_busy = 1; 513 514 lck_mtx_unlock_always(&c_seg->c_lock); 515 lck_mtx_lock_spin_always(c_list_lock); 516 lck_mtx_lock_spin_always(&c_seg->c_lock); 517 518 clear_busy = TRUE; 519 } 520 assert(c_seg->c_ondisk); 521 assert(c_seg->c_on_swappedout_q); 522 assert(!c_seg->c_on_swappedout_sparse_q); 523 524 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 525 c_seg->c_on_swappedout_q = 0; 526 c_swappedout_count--; 527 528 c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg); 529 c_seg->c_on_swappedout_sparse_q = 1; 530 c_swappedout_sparse_count++; 531 532 c_seg_moved_to_sparse_list++; 533 534 lck_mtx_unlock_always(c_list_lock); 535 536 if (clear_busy == TRUE) 537 C_SEG_WAKEUP_DONE(c_seg); 538} 539 540 541void 542c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg) 543{ 544 c_segment_t c_seg_next; 545 546 if (queue_empty(qhead)) { 547 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 548 } else { 549 c_seg_next = (c_segment_t)queue_first(qhead); 550 551 while (TRUE) { 552 553 if (c_seg->c_generation_id < c_seg_next->c_generation_id) { 554 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list); 555 break; 556 } 557 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list); 558 559 if (queue_end(qhead, (queue_entry_t) c_seg_next)) { 560 queue_enter(qhead, c_seg, c_segment_t, c_age_list); 561 break; 562 } 563 } 564 } 565} 566 567 568int try_minor_compaction_failed = 0; 569int try_minor_compaction_succeeded = 0; 570 571void 572c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg) 573{ 574 575 assert(c_seg->c_on_minorcompact_q); 576 /* 577 * c_seg is currently on the delayed minor compaction 578 * queue and we have c_seg locked... if we can get the 579 * c_list_lock w/o blocking (if we blocked we could deadlock 580 * because the lock order is c_list_lock then c_seg's lock) 581 * we'll pull it from the delayed list and free it directly 582 */ 583 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 584 /* 585 * c_list_lock is held, we need to bail 586 */ 587 try_minor_compaction_failed++; 588 589 lck_mtx_unlock_always(&c_seg->c_lock); 590 } else { 591 try_minor_compaction_succeeded++; 592 593 c_seg->c_busy = 1; 594 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE); 595 } 596} 597 598 599int 600c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement) 601{ 602 int c_seg_freed; 603 604 assert(c_seg->c_busy); 605 606 if (!c_seg->c_on_minorcompact_q) { 607 if (clear_busy == TRUE) 608 C_SEG_WAKEUP_DONE(c_seg); 609 610 lck_mtx_unlock_always(&c_seg->c_lock); 611 612 return (0); 613 } 614 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); 615 c_seg->c_on_minorcompact_q = 0; 616 c_minor_count--; 617 618 lck_mtx_unlock_always(c_list_lock); 619 620 if (disallow_page_replacement == TRUE) { 621 lck_mtx_unlock_always(&c_seg->c_lock); 622 623 PAGE_REPLACEMENT_DISALLOWED(TRUE); 624 625 lck_mtx_lock_spin_always(&c_seg->c_lock); 626 } 627 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy); 628 629 if (disallow_page_replacement == TRUE) 630 PAGE_REPLACEMENT_DISALLOWED(FALSE); 631 632 if (need_list_lock == TRUE) 633 lck_mtx_lock_spin_always(c_list_lock); 634 635 return (c_seg_freed); 636} 637 638 639void 640c_seg_wait_on_busy(c_segment_t c_seg) 641{ 642 c_seg->c_wanted = 1; 643 assert_wait((event_t) (c_seg), THREAD_UNINT); 644 645 lck_mtx_unlock_always(&c_seg->c_lock); 646 thread_block(THREAD_CONTINUE_NULL); 647} 648 649 650 651int try_free_succeeded = 0; 652int try_free_failed = 0; 653 654boolean_t 655c_seg_try_free(c_segment_t c_seg) 656{ 657 /* 658 * c_seg is currently on the delayed minor compaction 659 * or the spapped out sparse queue and we have c_seg locked... 660 * if we can get the c_list_lock w/o blocking (if we blocked we 661 * could deadlock because the lock order is c_list_lock then c_seg's lock) 662 * we'll pull it from the appropriate queue and free it 663 */ 664 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { 665 /* 666 * c_list_lock is held, we need to bail 667 */ 668 try_free_failed++; 669 return (FALSE); 670 } 671 if (c_seg->c_on_minorcompact_q) { 672 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); 673 c_seg->c_on_minorcompact_q = 0; 674 c_minor_count--; 675 } else { 676 assert(c_seg->c_on_swappedout_sparse_q); 677 678 /* 679 * c_seg_free_locked will remove it from the swappedout sparse list 680 */ 681 } 682 if (!c_seg->c_busy_swapping) 683 c_seg->c_busy = 1; 684 685 c_seg_free_locked(c_seg); 686 687 try_free_succeeded++; 688 689 return (TRUE); 690} 691 692 693void 694c_seg_free(c_segment_t c_seg) 695{ 696 if (!c_seg->c_busy_swapping) 697 c_seg->c_busy = 1; 698 699 lck_mtx_unlock_always(&c_seg->c_lock); 700 lck_mtx_lock_spin_always(c_list_lock); 701 lck_mtx_lock_spin_always(&c_seg->c_lock); 702 703 c_seg_free_locked(c_seg); 704} 705 706 707void 708c_seg_free_locked(c_segment_t c_seg) 709{ 710 int segno, i; 711 int pages_populated; 712 int32_t *c_buffer = NULL; 713 uint64_t c_swap_handle; 714 715 assert(!c_seg->c_on_minorcompact_q); 716 717 if (c_seg->c_on_age_q) { 718 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); 719 c_seg->c_on_age_q = 0; 720 c_age_count--; 721 } else if (c_seg->c_on_swappedin_q) { 722 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 723 c_seg->c_on_swappedin_q = 0; 724 c_swappedin_count--; 725 } else if (c_seg->c_on_swapout_q) { 726 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 727 c_seg->c_on_swapout_q = 0; 728 c_swapout_count--; 729 thread_wakeup((event_t)&compaction_swapper_running); 730 } else if (c_seg->c_on_swappedout_q) { 731 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 732 c_seg->c_on_swappedout_q = 0; 733 c_swappedout_count--; 734 } else if (c_seg->c_on_swappedout_sparse_q) { 735 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 736 c_seg->c_on_swappedout_sparse_q = 0; 737 c_swappedout_sparse_count--; 738 } 739#if TRACK_BAD_C_SEGMENTS 740 else if (c_seg->c_on_bad_q) { 741 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 742 c_seg->c_on_bad_q = 0; 743 c_bad_count--; 744 } 745#endif 746 segno = c_seg->c_mysegno; 747 c_segments[segno].c_segno = c_free_segno_head; 748 c_free_segno_head = segno; 749 c_segment_count--; 750 751 lck_mtx_unlock_always(c_list_lock); 752 753 if (c_seg->c_wanted) { 754 thread_wakeup((event_t) (c_seg)); 755 c_seg->c_wanted = 0; 756 } 757 if (c_seg->c_busy_swapping) { 758 c_seg->c_must_free = 1; 759 760 lck_mtx_unlock_always(&c_seg->c_lock); 761 return; 762 } 763 if (c_seg->c_ondisk == 0) { 764 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 765 766 c_buffer = c_seg->c_store.c_buffer; 767 c_seg->c_store.c_buffer = NULL; 768 } else { 769 /* 770 * Free swap space on disk. 771 */ 772 c_swap_handle = c_seg->c_store.c_swap_handle; 773 c_seg->c_store.c_swap_handle = (uint64_t)-1; 774 } 775 lck_mtx_unlock_always(&c_seg->c_lock); 776 777 if (c_buffer) { 778 if (pages_populated) 779 kernel_memory_depopulate(kernel_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); 780 781 kmem_free(kernel_map, (vm_offset_t) c_buffer, C_SEG_ALLOCSIZE); 782 } else if (c_swap_handle) 783 vm_swap_free(c_swap_handle); 784 785 786#if __i386__ || __x86_64__ 787 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); 788#else /* __i386__ || __x86_64__ */ 789 lck_spin_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); 790#endif /* __i386__ || __x86_64__ */ 791 792 for (i = 0; i < C_SEG_SLOT_ARRAYS; i++) { 793 if (c_seg->c_slots[i] == 0) 794 break; 795 796 kfree((char *)c_seg->c_slots[i], sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); 797 } 798 zfree(compressor_segment_zone, c_seg); 799} 800 801 802int c_seg_trim_page_count = 0; 803 804void 805c_seg_trim_tail(c_segment_t c_seg) 806{ 807 c_slot_t cs; 808 uint32_t c_size; 809 uint32_t c_offset; 810 uint32_t c_rounded_size; 811 uint16_t current_nextslot; 812 uint32_t current_populated_offset; 813 814 if (c_seg->c_bytes_used == 0) 815 return; 816 current_nextslot = c_seg->c_nextslot; 817 current_populated_offset = c_seg->c_populated_offset; 818 819 while (c_seg->c_nextslot) { 820 821 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1)); 822 823 c_size = UNPACK_C_SIZE(cs); 824 825 if (c_size) { 826 if (current_nextslot != c_seg->c_nextslot) { 827 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 828 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size); 829 830 c_seg->c_nextoffset = c_offset; 831 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 832 833 if (c_seg->c_firstemptyslot > c_seg->c_nextslot) 834 c_seg->c_firstemptyslot = c_seg->c_nextslot; 835 836 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) - 837 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE); 838 } 839 break; 840 } 841 c_seg->c_nextslot--; 842 } 843 assert(c_seg->c_nextslot); 844} 845 846 847int 848c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy) 849{ 850 c_slot_mapping_t slot_ptr; 851 uint32_t c_offset = 0; 852 uint32_t old_populated_offset; 853 uint32_t c_rounded_size; 854 uint32_t c_size; 855 int c_indx = 0; 856 int i; 857 c_slot_t c_dst; 858 c_slot_t c_src; 859 boolean_t need_unlock = TRUE; 860 861 assert(c_seg->c_busy); 862 863#if VALIDATE_C_SEGMENTS 864 c_seg_validate(c_seg, FALSE); 865#endif 866 if (c_seg->c_bytes_used == 0) { 867 c_seg_free(c_seg); 868 return (1); 869 } 870 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) 871 goto done; 872 873#if VALIDATE_C_SEGMENTS 874 c_seg->c_was_minor_compacted++; 875#endif 876 c_indx = c_seg->c_firstemptyslot; 877 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 878 879 old_populated_offset = c_seg->c_populated_offset; 880 c_offset = c_dst->c_offset; 881 882 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) { 883 884 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i); 885 886 c_size = UNPACK_C_SIZE(c_src); 887 888 if (c_size == 0) 889 continue; 890 891 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_size); 892 893#if CHECKSUM_THE_DATA 894 c_dst->c_hash_data = c_src->c_hash_data; 895#endif 896#if CHECKSUM_THE_COMPRESSED_DATA 897 c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; 898#endif 899 c_dst->c_size = c_src->c_size; 900 c_dst->c_packed_ptr = c_src->c_packed_ptr; 901 c_dst->c_offset = c_offset; 902 903 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); 904 slot_ptr->s_cindx = c_indx; 905 906 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 907 908 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 909 PACK_C_SIZE(c_src, 0); 910 c_indx++; 911 912 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 913 } 914 c_seg->c_firstemptyslot = c_indx; 915 c_seg->c_nextslot = c_indx; 916 c_seg->c_nextoffset = c_offset; 917 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); 918 c_seg->c_bytes_unused = 0; 919 920#if VALIDATE_C_SEGMENTS 921 c_seg_validate(c_seg, TRUE); 922#endif 923 924 if (old_populated_offset > c_seg->c_populated_offset) { 925 uint32_t gc_size; 926 int32_t *gc_ptr; 927 928 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset); 929 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset]; 930 931 lck_mtx_unlock_always(&c_seg->c_lock); 932 933 kernel_memory_depopulate(kernel_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR); 934 935 if (clear_busy == TRUE) 936 lck_mtx_lock_spin_always(&c_seg->c_lock); 937 else 938 need_unlock = FALSE; 939 } 940done: 941 if (need_unlock == TRUE) { 942 if (clear_busy == TRUE) 943 C_SEG_WAKEUP_DONE(c_seg); 944 945 lck_mtx_unlock_always(&c_seg->c_lock); 946 } 947 return (0); 948} 949 950 951 952struct { 953 uint64_t asked_permission; 954 uint64_t compactions; 955 uint64_t moved_slots; 956 uint64_t moved_bytes; 957 uint64_t wasted_space_in_swapouts; 958 uint64_t count_of_swapouts; 959} c_seg_major_compact_stats; 960 961 962#define C_MAJOR_COMPACTION_AGE_APPROPRIATE 30 963#define C_MAJOR_COMPACTION_OLD_ENOUGH 300 964#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 80) / 100) 965 966 967boolean_t 968c_seg_major_compact_ok( 969 c_segment_t c_seg_dst, 970 c_segment_t c_seg_src) 971{ 972 973 c_seg_major_compact_stats.asked_permission++; 974 975 if (c_seg_src->c_filling) { 976 /* 977 * we're at or near the head... don't compact 978 */ 979 return (FALSE); 980 } 981 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE && 982 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) 983 return (FALSE); 984 985 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { 986 /* 987 * destination segment is full... can't compact 988 */ 989 return (FALSE); 990 } 991 992 return (TRUE); 993} 994 995 996boolean_t 997c_seg_major_compact( 998 c_segment_t c_seg_dst, 999 c_segment_t c_seg_src) 1000{ 1001 c_slot_mapping_t slot_ptr; 1002 uint32_t c_rounded_size; 1003 uint32_t c_size; 1004 uint16_t dst_slot; 1005 int i; 1006 c_slot_t c_dst; 1007 c_slot_t c_src; 1008 int slotarray; 1009 boolean_t keep_compacting = TRUE; 1010 1011 /* 1012 * segments are not locked but they are both marked c_busy 1013 * which keeps c_decompress from working on them... 1014 * we can safely allocate new pages, move compressed data 1015 * from c_seg_src to c_seg_dst and update both c_segment's 1016 * state w/o holding the master lock 1017 */ 1018 1019#if VALIDATE_C_SEGMENTS 1020 c_seg_dst->c_was_major_compacted++; 1021 c_seg_src->c_was_major_donor++; 1022#endif 1023 c_seg_major_compact_stats.compactions++; 1024 1025 dst_slot = c_seg_dst->c_nextslot; 1026 1027 for (i = 0; i < c_seg_src->c_nextslot; i++) { 1028 1029 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i); 1030 1031 c_size = UNPACK_C_SIZE(c_src); 1032 1033 if (c_size == 0) { 1034 /* BATCH: move what we have so far; */ 1035 continue; 1036 } 1037 1038 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) { 1039 /* doesn't fit */ 1040 if ((C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) == C_SEG_BUFSIZE)) { 1041 /* can't fit */ 1042 keep_compacting = FALSE; 1043 break; 1044 } 1045 kernel_memory_populate(kernel_map, 1046 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset], 1047 PAGE_SIZE, 1048 KMA_COMPRESSOR); 1049 1050 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); 1051 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE); 1052 } 1053 1054 slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 1055 1056 if (c_seg_dst->c_slots[slotarray] == 0) { 1057 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); 1058 c_seg_dst->c_slots[slotarray] = (struct c_slot *) 1059 kalloc(sizeof(struct c_slot) * 1060 C_SEG_SLOT_ARRAY_SIZE); 1061 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); 1062 } 1063 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); 1064 1065 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size); 1066 1067 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 1068 1069 c_seg_major_compact_stats.moved_slots++; 1070 c_seg_major_compact_stats.moved_bytes += c_size; 1071 1072#if CHECKSUM_THE_DATA 1073 c_dst->c_hash_data = c_src->c_hash_data; 1074#endif 1075#if CHECKSUM_THE_COMPRESSED_DATA 1076 c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; 1077#endif 1078 c_dst->c_size = c_src->c_size; 1079 c_dst->c_packed_ptr = c_src->c_packed_ptr; 1080 c_dst->c_offset = c_seg_dst->c_nextoffset; 1081 1082 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) 1083 c_seg_dst->c_firstemptyslot++; 1084 c_seg_dst->c_nextslot++; 1085 c_seg_dst->c_bytes_used += c_rounded_size; 1086 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 1087 1088 PACK_C_SIZE(c_src, 0); 1089 1090 c_seg_src->c_bytes_used -= c_rounded_size; 1091 c_seg_src->c_bytes_unused += c_rounded_size; 1092 c_seg_src->c_firstemptyslot = 0; 1093 1094 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { 1095 /* dest segment is now full */ 1096 keep_compacting = FALSE; 1097 break; 1098 } 1099 } 1100 if (dst_slot < c_seg_dst->c_nextslot) { 1101 1102 PAGE_REPLACEMENT_ALLOWED(TRUE); 1103 /* 1104 * we've now locked out c_decompress from 1105 * converting the slot passed into it into 1106 * a c_segment_t which allows us to use 1107 * the backptr to change which c_segment and 1108 * index the slot points to 1109 */ 1110 while (dst_slot < c_seg_dst->c_nextslot) { 1111 1112 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot); 1113 1114 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); 1115 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 1116 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1; 1117 slot_ptr->s_cindx = dst_slot++; 1118 } 1119 PAGE_REPLACEMENT_ALLOWED(FALSE); 1120 } 1121 return (keep_compacting); 1122} 1123 1124 1125static uint64_t 1126compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) 1127{ 1128 uint64_t end_msecs; 1129 uint64_t start_msecs; 1130 1131 end_msecs = (end_sec * 1000) + end_nsec / 1000000; 1132 start_msecs = (start_sec * 1000) + start_nsec / 1000000; 1133 1134 return (end_msecs - start_msecs); 1135} 1136 1137 1138 1139uint32_t compressor_eval_period_in_msecs = 250; 1140uint32_t compressor_sample_min_in_msecs = 500; 1141uint32_t compressor_sample_max_in_msecs = 10000; 1142uint32_t compressor_thrashing_threshold_per_10msecs = 50; 1143uint32_t compressor_thrashing_min_per_10msecs = 20; 1144 1145extern uint32_t vm_page_filecache_min; 1146 1147 1148void 1149compute_swapout_target_age(void) 1150{ 1151 clock_sec_t cur_ts_sec; 1152 clock_nsec_t cur_ts_nsec; 1153 uint32_t min_operations_needed_in_this_sample; 1154 uint64_t elapsed_msecs_in_eval; 1155 uint64_t elapsed_msecs_in_sample; 1156 boolean_t need_sample_reset = FALSE; 1157 boolean_t need_eval_reset = FALSE; 1158 1159 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 1160 1161 elapsed_msecs_in_sample = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); 1162 1163 if (elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { 1164 need_sample_reset = TRUE; 1165 need_eval_reset = TRUE; 1166 goto done; 1167 } 1168 elapsed_msecs_in_eval = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); 1169 1170 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) 1171 goto done; 1172 need_eval_reset = TRUE; 1173 1174 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0); 1175 1176 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10; 1177 1178 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample || 1179 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) { 1180 1181 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count, 1182 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0); 1183 1184 swapout_target_age = 0; 1185 1186 need_sample_reset = TRUE; 1187 need_eval_reset = TRUE; 1188 goto done; 1189 } 1190 last_eval_compression_count = sample_period_compression_count; 1191 last_eval_decompression_count = sample_period_decompression_count; 1192 1193 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) { 1194 1195 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0); 1196 goto done; 1197 } 1198 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) { 1199 1200 uint64_t running_total; 1201 uint64_t working_target; 1202 uint64_t aging_target; 1203 uint32_t oldest_age_of_csegs_sampled = 0; 1204 uint64_t working_set_approximation = 0; 1205 1206 swapout_target_age = 0; 1207 1208 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */ 1209 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */ 1210 running_total = 0; 1211 1212 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) { 1213 1214 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1215 1216 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1217 1218 if (running_total >= working_target) 1219 break; 1220 } 1221 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) { 1222 1223 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample; 1224 1225 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) { 1226 1227 running_total = overage_decompressions_during_sample_period; 1228 1229 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) { 1230 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; 1231 1232 if (running_total >= aging_target) 1233 break; 1234 } 1235 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled; 1236 1237 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0); 1238 } else { 1239 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0); 1240 } 1241 } else 1242 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0); 1243 1244 need_sample_reset = TRUE; 1245 need_eval_reset = TRUE; 1246 } else 1247 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0); 1248done: 1249 if (need_sample_reset == TRUE) { 1250 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period)); 1251 overage_decompressions_during_sample_period = 0; 1252 1253 start_of_sample_period_sec = cur_ts_sec; 1254 start_of_sample_period_nsec = cur_ts_nsec; 1255 sample_period_decompression_count = 0; 1256 sample_period_compression_count = 0; 1257 last_eval_decompression_count = 0; 1258 last_eval_compression_count = 0; 1259 } 1260 if (need_eval_reset == TRUE) { 1261 start_of_eval_period_sec = cur_ts_sec; 1262 start_of_eval_period_nsec = cur_ts_nsec; 1263 } 1264} 1265 1266 1267 1268int calls_since_last_considered = 0; 1269int compaction_swapper_running = 0; 1270int compaction_swapper_abort = 0; 1271 1272 1273#if CONFIG_JETSAM 1274boolean_t memorystatus_kill_on_VM_thrashing(boolean_t); 1275int compressor_thrashing_induced_jetsam = 0; 1276boolean_t vm_compressor_thrashing_detected = FALSE; 1277#endif /* CONFIG_JETSAM */ 1278 1279static boolean_t 1280compressor_needs_to_swap(void) 1281{ 1282 boolean_t should_swap = FALSE; 1283 1284 if (vm_swap_up == TRUE) { 1285 if (COMPRESSOR_NEEDS_TO_SWAP()) { 1286 return (TRUE); 1287 } 1288 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) { 1289 return (TRUE); 1290 } 1291 if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) 1292 return (TRUE); 1293 } 1294 compute_swapout_target_age(); 1295 1296 if (swapout_target_age) { 1297 c_segment_t c_seg; 1298 1299 lck_mtx_lock_spin_always(c_list_lock); 1300 1301 if (!queue_empty(&c_age_list_head)) { 1302 1303 c_seg = (c_segment_t) queue_first(&c_age_list_head); 1304 1305 if (c_seg->c_creation_ts <= swapout_target_age) 1306 should_swap = TRUE; 1307 else 1308 swapout_target_age = 0; 1309 } 1310 lck_mtx_unlock_always(c_list_lock); 1311 } 1312 1313 if (vm_swap_up == FALSE) { 1314#if CONFIG_JETSAM 1315 if (should_swap) { 1316 if (vm_compressor_thrashing_detected == FALSE) { 1317 vm_compressor_thrashing_detected = TRUE; 1318 memorystatus_kill_on_VM_thrashing(TRUE /* async */); 1319 compressor_thrashing_induced_jetsam++; 1320 /* 1321 * let the jetsam take precedence over 1322 * any major compactions we might have 1323 * been able to do... otherwise we run 1324 * the risk of doing major compactions 1325 * on segments we're about to free up 1326 * due to the jetsam activity. 1327 */ 1328 should_swap = FALSE; 1329 } 1330 } else 1331#endif /* CONFIG_JETSAM */ 1332 if (COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()) 1333 should_swap = TRUE; 1334 } 1335 /* 1336 * returning TRUE when swap_supported == FALSE 1337 * will cause the major compaction engine to 1338 * run, but will not trigger any swapping... 1339 * segments that have been major compacted 1340 * will be moved to the swapped_out_q 1341 * but will not have the c_ondisk flag set 1342 */ 1343 return (should_swap); 1344} 1345 1346uint64_t 1347vm_compressor_total_compressions(void) 1348{ 1349 processor_t processor = processor_list; 1350 vm_statistics64_t stat = &PROCESSOR_DATA(processor, vm_stat); 1351 1352 uint64_t compressions = stat->compressions; 1353 1354 if (processor_count > 1) { 1355 simple_lock(&processor_list_lock); 1356 1357 while ((processor = processor->processor_list) != NULL) { 1358 stat = &PROCESSOR_DATA(processor, vm_stat); 1359 compressions += stat->compressions; 1360 } 1361 1362 simple_unlock(&processor_list_lock); 1363 } 1364 1365 return compressions; 1366} 1367 1368uint32_t vm_wake_compactor_swapper_calls = 0; 1369 1370void 1371vm_wake_compactor_swapper(void) 1372{ 1373 if (compaction_swapper_running) 1374 return; 1375 1376 if (c_minor_count == 0) 1377 return; 1378 1379 lck_mtx_lock_spin_always(c_list_lock); 1380 1381 fastwake_warmup = FALSE; 1382 1383 if (compaction_swapper_running == 0) { 1384 vm_wake_compactor_swapper_calls++; 1385 1386 thread_wakeup((event_t)&c_compressor_swap_trigger); 1387 1388 compaction_swapper_running = 1; 1389 } 1390 lck_mtx_unlock_always(c_list_lock); 1391} 1392 1393void 1394vm_consider_waking_compactor_swapper(void) 1395{ 1396 boolean_t need_wakeup = FALSE; 1397 1398 if (calls_since_last_considered++ < 1000 || compaction_swapper_running) 1399 return; 1400 calls_since_last_considered = 0; 1401 1402 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) { 1403 1404 need_wakeup = TRUE; 1405 1406 } else if (compressor_needs_to_swap()) { 1407 1408 need_wakeup = TRUE; 1409 1410 } else if (c_minor_count) { 1411 uint64_t total_bytes; 1412 1413 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64; 1414 1415 if ((total_bytes - compressor_bytes_used) > total_bytes / 10) 1416 need_wakeup = TRUE; 1417 } 1418 if (need_wakeup == TRUE) { 1419 1420 lck_mtx_lock_spin_always(c_list_lock); 1421 1422 fastwake_warmup = FALSE; 1423 1424 if (compaction_swapper_running == 0) { 1425 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE); 1426 1427 thread_wakeup((event_t)&c_compressor_swap_trigger); 1428 1429 compaction_swapper_running = 1; 1430 } 1431 lck_mtx_unlock_always(c_list_lock); 1432 } 1433} 1434 1435 1436#define C_SWAPOUT_LIMIT 4 1437#define DELAYED_COMPACTIONS_PER_PASS 30 1438 1439void 1440vm_compressor_do_delayed_compactions(boolean_t flush_all) 1441{ 1442 c_segment_t c_seg; 1443 int number_compacted = 0; 1444 boolean_t needs_to_swap = FALSE; 1445 1446 1447 lck_mtx_assert(c_list_lock, LCK_MTX_ASSERT_OWNED); 1448 1449 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) { 1450 1451 c_seg = (c_segment_t)queue_first(&c_minor_list_head); 1452 1453 lck_mtx_lock_spin_always(&c_seg->c_lock); 1454 1455 if (c_seg->c_busy) { 1456 1457 lck_mtx_unlock_always(c_list_lock); 1458 c_seg_wait_on_busy(c_seg); 1459 lck_mtx_lock_spin_always(c_list_lock); 1460 1461 continue; 1462 } 1463 c_seg->c_busy = 1; 1464 1465 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE); 1466 1467 if (vm_swap_up == TRUE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) { 1468 1469 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT) 1470 needs_to_swap = TRUE; 1471 1472 number_compacted = 0; 1473 } 1474 lck_mtx_lock_spin_always(c_list_lock); 1475 } 1476} 1477 1478 1479#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10 1480 1481static void 1482vm_compressor_age_swapped_in_segments(boolean_t flush_all) 1483{ 1484 c_segment_t c_seg; 1485 clock_sec_t now; 1486 clock_nsec_t nsec; 1487 1488 clock_get_system_nanotime(&now, &nsec); 1489 1490 while (!queue_empty(&c_swappedin_list_head)) { 1491 1492 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head); 1493 1494 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) 1495 break; 1496 1497 lck_mtx_lock_spin_always(&c_seg->c_lock); 1498 1499 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 1500 c_seg->c_on_swappedin_q = 0; 1501 c_swappedin_count--; 1502 1503 c_seg_insert_into_q(&c_age_list_head, c_seg); 1504 c_seg->c_on_age_q = 1; 1505 c_age_count++; 1506 1507 lck_mtx_unlock_always(&c_seg->c_lock); 1508 } 1509} 1510 1511 1512void 1513vm_compressor_flush(void) 1514{ 1515 uint64_t vm_swap_put_failures_at_start; 1516 wait_result_t wait_result = 0; 1517 AbsoluteTime startTime, endTime; 1518 clock_sec_t now_sec; 1519 clock_nsec_t now_nsec; 1520 uint64_t nsec; 1521 1522 HIBLOG("vm_compressor_flush - starting\n"); 1523 1524 clock_get_uptime(&startTime); 1525 1526 lck_mtx_lock_spin_always(c_list_lock); 1527 1528 fastwake_warmup = FALSE; 1529 compaction_swapper_abort = 1; 1530 1531 while (compaction_swapper_running) { 1532 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); 1533 1534 lck_mtx_unlock_always(c_list_lock); 1535 1536 thread_block(THREAD_CONTINUE_NULL); 1537 1538 lck_mtx_lock_spin_always(c_list_lock); 1539 } 1540 compaction_swapper_abort = 0; 1541 compaction_swapper_running = 1; 1542 1543 hibernate_flushing = TRUE; 1544 hibernate_no_swapspace = FALSE; 1545 c_generation_id_flush_barrier = c_generation_id + 1000; 1546 1547 clock_get_system_nanotime(&now_sec, &now_nsec); 1548 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE; 1549 1550 vm_swap_put_failures_at_start = vm_swap_put_failures; 1551 1552 vm_compressor_compact_and_swap(TRUE); 1553 1554 while (!queue_empty(&c_swapout_list_head)) { 1555 1556 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC); 1557 1558 lck_mtx_unlock_always(c_list_lock); 1559 1560 wait_result = thread_block(THREAD_CONTINUE_NULL); 1561 1562 lck_mtx_lock_spin_always(c_list_lock); 1563 1564 if (wait_result == THREAD_TIMED_OUT) 1565 break; 1566 } 1567 hibernate_flushing = FALSE; 1568 compaction_swapper_running = 0; 1569 1570 if (vm_swap_put_failures > vm_swap_put_failures_at_start) 1571 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n", 1572 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT); 1573 1574 lck_mtx_unlock_always(c_list_lock); 1575 1576 clock_get_uptime(&endTime); 1577 SUB_ABSOLUTETIME(&endTime, &startTime); 1578 absolutetime_to_nanoseconds(endTime, &nsec); 1579 1580 HIBLOG("vm_compressor_flush completed - took %qd msecs\n", nsec / 1000000ULL); 1581} 1582 1583 1584 1585int compaction_swap_trigger_thread_awakened = 0; 1586 1587static void 1588vm_compressor_swap_trigger_thread(void) 1589{ 1590 1591 lck_mtx_lock_spin_always(c_list_lock); 1592 1593 compaction_swap_trigger_thread_awakened++; 1594 1595 vm_compressor_compact_and_swap(FALSE); 1596 1597 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT); 1598 1599 compaction_swapper_running = 0; 1600 thread_wakeup((event_t)&compaction_swapper_running); 1601 1602 lck_mtx_unlock_always(c_list_lock); 1603 1604 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread); 1605 1606 /* NOTREACHED */ 1607} 1608 1609 1610void 1611vm_compressor_record_warmup_start(void) 1612{ 1613 c_segment_t c_seg; 1614 1615 lck_mtx_lock_spin_always(c_list_lock); 1616 1617 if (first_c_segment_to_warm_generation_id == 0) { 1618 if (!queue_empty(&c_age_list_head)) { 1619 1620 c_seg = (c_segment_t)queue_last(&c_age_list_head); 1621 1622 first_c_segment_to_warm_generation_id = c_seg->c_generation_id; 1623 } else 1624 first_c_segment_to_warm_generation_id = 0; 1625 1626 fastwake_recording_in_progress = TRUE; 1627 } 1628 lck_mtx_unlock_always(c_list_lock); 1629} 1630 1631 1632void 1633vm_compressor_record_warmup_end(void) 1634{ 1635 c_segment_t c_seg; 1636 1637 lck_mtx_lock_spin_always(c_list_lock); 1638 1639 if (fastwake_recording_in_progress == TRUE) { 1640 1641 if (!queue_empty(&c_age_list_head)) { 1642 1643 c_seg = (c_segment_t)queue_last(&c_age_list_head); 1644 1645 last_c_segment_to_warm_generation_id = c_seg->c_generation_id; 1646 } else 1647 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id; 1648 1649 fastwake_recording_in_progress = FALSE; 1650 1651 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 1652 } 1653 lck_mtx_unlock_always(c_list_lock); 1654} 1655 1656 1657#define DELAY_TRIM_ON_WAKE_SECS 4 1658 1659void 1660vm_compressor_delay_trim(void) 1661{ 1662 clock_sec_t sec; 1663 clock_nsec_t nsec; 1664 1665 clock_get_system_nanotime(&sec, &nsec); 1666 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS; 1667} 1668 1669 1670void 1671vm_compressor_do_warmup(void) 1672{ 1673 lck_mtx_lock_spin_always(c_list_lock); 1674 1675 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) { 1676 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 1677 1678 lck_mtx_unlock_always(c_list_lock); 1679 return; 1680 } 1681 1682 if (compaction_swapper_running == 0) { 1683 1684 fastwake_warmup = TRUE; 1685 compaction_swapper_running = 1; 1686 thread_wakeup((event_t)&c_compressor_swap_trigger); 1687 } 1688 lck_mtx_unlock_always(c_list_lock); 1689} 1690 1691 1692void 1693do_fastwake_warmup(void) 1694{ 1695 uint64_t my_thread_id; 1696 c_segment_t c_seg = NULL; 1697 AbsoluteTime startTime, endTime; 1698 uint64_t nsec; 1699 1700 1701 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id); 1702 1703 clock_get_uptime(&startTime); 1704 1705 lck_mtx_unlock_always(c_list_lock); 1706 1707 my_thread_id = current_thread()->thread_id; 1708 proc_set_task_policy_thread(kernel_task, my_thread_id, 1709 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); 1710 1711 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1712 1713 lck_mtx_lock_spin_always(c_list_lock); 1714 1715 while (!queue_empty(&c_swappedout_list_head) && fastwake_warmup == TRUE) { 1716 1717 c_seg = (c_segment_t) queue_first(&c_swappedout_list_head); 1718 1719 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id || 1720 c_seg->c_generation_id > last_c_segment_to_warm_generation_id) 1721 break; 1722 1723 lck_mtx_lock_spin_always(&c_seg->c_lock); 1724 lck_mtx_unlock_always(c_list_lock); 1725 1726 if (c_seg->c_busy) { 1727 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1728 c_seg_wait_on_busy(c_seg); 1729 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1730 } else { 1731 c_seg_swapin(c_seg, TRUE); 1732 1733 lck_mtx_unlock_always(&c_seg->c_lock); 1734 c_segment_warmup_count++; 1735 1736 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1737 vm_pageout_io_throttle(); 1738 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1739 } 1740 lck_mtx_lock_spin_always(c_list_lock); 1741 } 1742 lck_mtx_unlock_always(c_list_lock); 1743 1744 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1745 1746 proc_set_task_policy_thread(kernel_task, my_thread_id, 1747 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0); 1748 1749 clock_get_uptime(&endTime); 1750 SUB_ABSOLUTETIME(&endTime, &startTime); 1751 absolutetime_to_nanoseconds(endTime, &nsec); 1752 1753 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL); 1754 1755 lck_mtx_lock_spin_always(c_list_lock); 1756 1757 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0; 1758} 1759 1760 1761void 1762vm_compressor_compact_and_swap(boolean_t flush_all) 1763{ 1764 c_segment_t c_seg, c_seg_next; 1765 boolean_t keep_compacting; 1766 1767 1768 if (fastwake_warmup == TRUE) { 1769 uint64_t starting_warmup_count; 1770 1771 starting_warmup_count = c_segment_warmup_count; 1772 1773 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count, 1774 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0); 1775 do_fastwake_warmup(); 1776 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0); 1777 1778 fastwake_warmup = FALSE; 1779 } 1780 1781 /* 1782 * it's possible for the c_age_list_head to be empty if we 1783 * hit our limits for growing the compressor pool and we subsequently 1784 * hibernated... on the next hibernation we could see the queue as 1785 * empty and not proceeed even though we have a bunch of segments on 1786 * the swapped in queue that need to be dealt with. 1787 */ 1788 vm_compressor_do_delayed_compactions(flush_all); 1789 1790 vm_compressor_age_swapped_in_segments(flush_all); 1791 1792 1793 while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) { 1794 1795 if (hibernate_flushing == TRUE) { 1796 clock_sec_t sec; 1797 clock_nsec_t nsec; 1798 1799 if (hibernate_should_abort()) { 1800 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n"); 1801 break; 1802 } 1803 if (hibernate_no_swapspace == TRUE) { 1804 HIBLOG("vm_compressor_flush - out of swap space\n"); 1805 break; 1806 } 1807 clock_get_system_nanotime(&sec, &nsec); 1808 1809 if (sec > hibernate_flushing_deadline) { 1810 HIBLOG("vm_compressor_flush - failed to finish before deadline\n"); 1811 break; 1812 } 1813 } 1814 if (c_swapout_count >= C_SWAPOUT_LIMIT) { 1815 1816 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC); 1817 1818 lck_mtx_unlock_always(c_list_lock); 1819 1820 thread_block(THREAD_CONTINUE_NULL); 1821 1822 lck_mtx_lock_spin_always(c_list_lock); 1823 } 1824 /* 1825 * Minor compactions 1826 */ 1827 vm_compressor_do_delayed_compactions(flush_all); 1828 1829 vm_compressor_age_swapped_in_segments(flush_all); 1830 1831 if (c_swapout_count >= C_SWAPOUT_LIMIT) { 1832 /* 1833 * we timed out on the above thread_block 1834 * let's loop around and try again 1835 * the timeout allows us to continue 1836 * to do minor compactions to make 1837 * more memory available 1838 */ 1839 continue; 1840 } 1841 1842 /* 1843 * Swap out segments? 1844 */ 1845 if (flush_all == FALSE) { 1846 boolean_t needs_to_swap; 1847 1848 lck_mtx_unlock_always(c_list_lock); 1849 1850 needs_to_swap = compressor_needs_to_swap(); 1851 1852 lck_mtx_lock_spin_always(c_list_lock); 1853 1854 if (needs_to_swap == FALSE) 1855 break; 1856 } 1857 if (queue_empty(&c_age_list_head)) 1858 break; 1859 c_seg = (c_segment_t) queue_first(&c_age_list_head); 1860 1861 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) 1862 break; 1863 1864 if (c_seg->c_filling) { 1865 /* 1866 * we're at or near the head... no more work to do 1867 */ 1868 break; 1869 } 1870 lck_mtx_lock_spin_always(&c_seg->c_lock); 1871 1872 if (c_seg->c_busy) { 1873 1874 lck_mtx_unlock_always(c_list_lock); 1875 c_seg_wait_on_busy(c_seg); 1876 lck_mtx_lock_spin_always(c_list_lock); 1877 1878 continue; 1879 } 1880 c_seg->c_busy = 1; 1881 1882 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) { 1883 /* 1884 * found an empty c_segment and freed it 1885 * so go grab the next guy in the queue 1886 */ 1887 continue; 1888 } 1889 /* 1890 * Major compaction 1891 */ 1892 keep_compacting = TRUE; 1893 1894 while (keep_compacting == TRUE) { 1895 1896 assert(c_seg->c_busy); 1897 1898 /* look for another segment to consolidate */ 1899 1900 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list); 1901 1902 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next)) 1903 break; 1904 1905 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE) 1906 break; 1907 1908 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 1909 1910 if (c_seg_next->c_busy) { 1911 1912 lck_mtx_unlock_always(c_list_lock); 1913 c_seg_wait_on_busy(c_seg_next); 1914 lck_mtx_lock_spin_always(c_list_lock); 1915 1916 continue; 1917 } 1918 /* grab that segment */ 1919 c_seg_next->c_busy = 1; 1920 1921 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) { 1922 /* 1923 * found an empty c_segment and freed it 1924 * so we can't continue to use c_seg_next 1925 */ 1926 continue; 1927 } 1928 1929 /* unlock the list ... */ 1930 lck_mtx_unlock_always(c_list_lock); 1931 1932 /* do the major compaction */ 1933 1934 keep_compacting = c_seg_major_compact(c_seg, c_seg_next); 1935 1936 PAGE_REPLACEMENT_DISALLOWED(TRUE); 1937 1938 lck_mtx_lock_spin_always(&c_seg_next->c_lock); 1939 /* 1940 * run a minor compaction on the donor segment 1941 * since we pulled at least some of it's 1942 * data into our target... if we've emptied 1943 * it, now is a good time to free it which 1944 * c_seg_minor_compaction_and_unlock also takes care of 1945 * 1946 * by passing TRUE, we ask for c_busy to be cleared 1947 * and c_wanted to be taken care of 1948 */ 1949 c_seg_minor_compaction_and_unlock(c_seg_next, TRUE); 1950 1951 PAGE_REPLACEMENT_DISALLOWED(FALSE); 1952 1953 /* relock the list */ 1954 lck_mtx_lock_spin_always(c_list_lock); 1955 1956 } /* major compaction */ 1957 1958 c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used; 1959 c_seg_major_compact_stats.count_of_swapouts++; 1960 1961 lck_mtx_lock_spin_always(&c_seg->c_lock); 1962 1963 assert(c_seg->c_busy); 1964 assert(c_seg->c_on_age_q); 1965 assert(!c_seg->c_on_minorcompact_q); 1966 1967 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); 1968 c_seg->c_on_age_q = 0; 1969 c_age_count--; 1970 1971 if (vm_swap_up == TRUE) { 1972 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 1973 c_seg->c_on_swapout_q = 1; 1974 c_swapout_count++; 1975 } else { 1976 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 1977 c_seg->c_on_swappedout_q = 1; 1978 c_swappedout_count++; 1979 } 1980 C_SEG_WAKEUP_DONE(c_seg); 1981 1982 lck_mtx_unlock_always(&c_seg->c_lock); 1983 1984 if (c_swapout_count) { 1985 lck_mtx_unlock_always(c_list_lock); 1986 1987 thread_wakeup((event_t)&c_swapout_list_head); 1988 1989 lck_mtx_lock_spin_always(c_list_lock); 1990 } 1991 } 1992} 1993 1994 1995static c_segment_t 1996c_seg_allocate(c_segment_t *current_chead) 1997{ 1998 clock_sec_t sec; 1999 clock_nsec_t nsec; 2000 c_segment_t c_seg; 2001 int slotarray; 2002 2003 if ( (c_seg = *current_chead) == NULL ) { 2004 uint32_t c_segno; 2005 2006 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START, 0, 0, 0, 0, 0); 2007 2008 lck_mtx_lock_spin_always(c_list_lock); 2009 2010 while (c_segments_busy == TRUE) { 2011 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT); 2012 2013 lck_mtx_unlock_always(c_list_lock); 2014 2015 thread_block(THREAD_CONTINUE_NULL); 2016 2017 lck_mtx_lock_spin_always(c_list_lock); 2018 } 2019 if (c_free_segno_head == (uint32_t)-1) { 2020 2021 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) { 2022 lck_mtx_unlock_always(c_list_lock); 2023 2024 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 1, 0); 2025 return (NULL); 2026 } 2027 c_segments_busy = TRUE; 2028 lck_mtx_unlock_always(c_list_lock); 2029 2030 kernel_memory_populate(kernel_map, (vm_offset_t)c_segments_next_page, PAGE_SIZE, KMA_KOBJECT); 2031 c_segments_next_page += PAGE_SIZE; 2032 2033 for (c_segno = c_segments_available + 1; c_segno < (c_segments_available + C_SEGMENTS_PER_PAGE); c_segno++) 2034 c_segments[c_segno - 1].c_segno = c_segno; 2035 2036 lck_mtx_lock_spin_always(c_list_lock); 2037 2038 c_segments[c_segno - 1].c_segno = c_free_segno_head; 2039 c_free_segno_head = c_segments_available; 2040 c_segments_available += C_SEGMENTS_PER_PAGE; 2041 2042 c_segments_busy = FALSE; 2043 thread_wakeup((event_t) (&c_segments_busy)); 2044 } 2045 c_segno = c_free_segno_head; 2046 c_free_segno_head = c_segments[c_segno].c_segno; 2047 2048 lck_mtx_unlock_always(c_list_lock); 2049 2050 c_seg = (c_segment_t)zalloc(compressor_segment_zone); 2051 bzero((char *)c_seg, sizeof(struct c_segment)); 2052 2053 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_seg->c_store.c_buffer), C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) { 2054 zfree(compressor_segment_zone, c_seg); 2055 2056 lck_mtx_lock_spin_always(c_list_lock); 2057 2058 c_segments[c_segno].c_segno = c_free_segno_head; 2059 c_free_segno_head = c_segno; 2060 2061 lck_mtx_unlock_always(c_list_lock); 2062 2063 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 2, 0); 2064 2065 return (NULL); 2066 } 2067 2068#if __i386__ || __x86_64__ 2069 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 2070#else /* __i386__ || __x86_64__ */ 2071 lck_spin_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); 2072#endif /* __i386__ || __x86_64__ */ 2073 2074 kernel_memory_populate(kernel_map, (vm_offset_t)(c_seg->c_store.c_buffer), 3 * PAGE_SIZE, KMA_COMPRESSOR); 2075 2076 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(3 * PAGE_SIZE); 2077 c_seg->c_firstemptyslot = C_SLOT_MAX; 2078 c_seg->c_mysegno = c_segno; 2079 c_seg->c_filling = 1; 2080 2081 lck_mtx_lock_spin_always(c_list_lock); 2082 2083 c_segment_count++; 2084 c_segments[c_segno].c_seg = c_seg; 2085 2086 c_seg->c_generation_id = c_generation_id++; 2087 2088 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list); 2089 c_seg->c_on_age_q = 1; 2090 c_age_count++; 2091 2092 lck_mtx_unlock_always(c_list_lock); 2093 2094 clock_get_system_nanotime(&sec, &nsec); 2095 c_seg->c_creation_ts = (uint32_t)sec; 2096 2097 *current_chead = c_seg; 2098 2099 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, c_seg, 0, 0, 3, 0); 2100 } 2101 slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg, c_seg->c_nextslot); 2102 2103 if (c_seg->c_slots[slotarray] == 0) { 2104 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); 2105 2106 c_seg->c_slots[slotarray] = (struct c_slot *)kalloc(sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); 2107 2108 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); 2109 } 2110 2111 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2112 2113 lck_mtx_lock_spin_always(&c_seg->c_lock); 2114 2115 return (c_seg); 2116} 2117 2118 2119 2120static void 2121c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead) 2122{ 2123 uint32_t unused_bytes; 2124 uint32_t offset_to_depopulate; 2125 2126 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset)); 2127 2128 if (unused_bytes) { 2129 2130 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))); 2131 2132 /* 2133 * release the extra physical page(s) at the end of the segment 2134 */ 2135 lck_mtx_unlock_always(&c_seg->c_lock); 2136 2137 kernel_memory_depopulate( 2138 kernel_map, 2139 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate], 2140 unused_bytes, 2141 KMA_COMPRESSOR); 2142 2143 lck_mtx_lock_spin_always(&c_seg->c_lock); 2144 2145 c_seg->c_populated_offset = offset_to_depopulate; 2146 } 2147 c_seg->c_filling = 0; 2148 2149 if (C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) 2150 c_seg_need_delayed_compaction(c_seg); 2151 2152 lck_mtx_unlock_always(&c_seg->c_lock); 2153 2154 *current_chead = NULL; 2155} 2156 2157 2158/* 2159 * returns with c_seg locked 2160 */ 2161void 2162c_seg_swapin_requeue(c_segment_t c_seg) 2163{ 2164 clock_sec_t sec; 2165 clock_nsec_t nsec; 2166 2167 clock_get_system_nanotime(&sec, &nsec); 2168 2169 lck_mtx_lock_spin_always(c_list_lock); 2170 lck_mtx_lock_spin_always(&c_seg->c_lock); 2171 2172 if (c_seg->c_on_swappedout_q) { 2173 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 2174 c_seg->c_on_swappedout_q = 0; 2175 c_swappedout_count--; 2176 } else { 2177 assert(c_seg->c_on_swappedout_sparse_q); 2178 2179 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); 2180 c_seg->c_on_swappedout_sparse_q = 0; 2181 c_swappedout_sparse_count--; 2182 } 2183 if (c_seg->c_store.c_buffer) { 2184 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); 2185 c_seg->c_on_swappedin_q = 1; 2186 c_swappedin_count++; 2187 } 2188#if TRACK_BAD_C_SEGMENTS 2189 else { 2190 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list); 2191 c_seg->c_on_bad_q = 1; 2192 c_bad_count++; 2193 } 2194#endif 2195 c_seg->c_swappedin_ts = (uint32_t)sec; 2196 c_seg->c_ondisk = 0; 2197 c_seg->c_was_swapped_in = 1; 2198 2199 lck_mtx_unlock_always(c_list_lock); 2200} 2201 2202 2203 2204/* 2205 * c_seg has to be locked and is returned locked. 2206 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE 2207 */ 2208 2209void 2210c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) 2211{ 2212 vm_offset_t addr = 0; 2213 uint32_t io_size = 0; 2214 uint64_t f_offset; 2215 2216#if !CHECKSUM_THE_SWAP 2217 if (c_seg->c_ondisk) 2218 c_seg_trim_tail(c_seg); 2219#endif 2220 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 2221 f_offset = c_seg->c_store.c_swap_handle; 2222 2223 c_seg->c_busy = 1; 2224 lck_mtx_unlock_always(&c_seg->c_lock); 2225 2226 if (c_seg->c_ondisk) { 2227 2228 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2229 2230 if (kernel_memory_allocate(kernel_map, &addr, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) 2231 panic("c_seg_swapin: kernel_memory_allocate failed\n"); 2232 2233 kernel_memory_populate(kernel_map, addr, io_size, KMA_COMPRESSOR); 2234 2235 if (vm_swap_get(addr, f_offset, io_size) != KERN_SUCCESS) { 2236 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2237 2238 kernel_memory_depopulate(kernel_map, addr, io_size, KMA_COMPRESSOR); 2239 kmem_free(kernel_map, addr, C_SEG_ALLOCSIZE); 2240 2241 c_seg->c_store.c_buffer = (int32_t*) NULL; 2242 } else { 2243 c_seg->c_store.c_buffer = (int32_t*) addr; 2244#if CRYPTO 2245 vm_swap_decrypt(c_seg); 2246#endif /* CRYPTO */ 2247 2248#if CHECKSUM_THE_SWAP 2249 if (c_seg->cseg_swap_size != io_size) 2250 panic("swapin size doesn't match swapout size"); 2251 2252 if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) { 2253 panic("c_seg_swapin - Swap hash mismatch\n"); 2254 } 2255#endif /* CHECKSUM_THE_SWAP */ 2256 2257 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2258 2259 if (force_minor_compaction == TRUE) { 2260 lck_mtx_lock_spin_always(&c_seg->c_lock); 2261 2262 c_seg_minor_compaction_and_unlock(c_seg, FALSE); 2263 } 2264 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); 2265 } 2266 } 2267 c_seg_swapin_requeue(c_seg); 2268 2269 C_SEG_WAKEUP_DONE(c_seg); 2270} 2271 2272 2273static int 2274c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf) 2275{ 2276 int c_size; 2277 int c_rounded_size; 2278 int max_csize; 2279 c_slot_t cs; 2280 c_segment_t c_seg; 2281 2282 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0); 2283retry: 2284 if ((c_seg = c_seg_allocate(current_chead)) == NULL) 2285 return (1); 2286 /* 2287 * returns with c_seg lock held 2288 * and PAGE_REPLACEMENT_DISALLOWED(TRUE) 2289 */ 2290 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot); 2291 2292 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr); 2293 cs->c_offset = c_seg->c_nextoffset; 2294 2295 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset); 2296 2297 if (max_csize > PAGE_SIZE) 2298 max_csize = PAGE_SIZE; 2299 2300 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - 2301 c_seg->c_nextoffset) 2302 < (unsigned) max_csize + PAGE_SIZE && 2303 (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) 2304 < C_SEG_ALLOCSIZE)) { 2305 lck_mtx_unlock_always(&c_seg->c_lock); 2306 2307 kernel_memory_populate(kernel_map, 2308 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset], 2309 PAGE_SIZE, 2310 KMA_COMPRESSOR); 2311 2312 lck_mtx_lock_spin_always(&c_seg->c_lock); 2313 2314 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); 2315 } 2316 2317#if CHECKSUM_THE_DATA 2318 cs->c_hash_data = hash_string(src, PAGE_SIZE); 2319#endif 2320 c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 2321 (WK_word *)(uintptr_t)scratch_buf, max_csize - 4); 2322 2323 assert(c_size <= (max_csize - 4) && c_size >= -1); 2324 2325 if (c_size == -1) { 2326 2327 if (max_csize < PAGE_SIZE) { 2328 c_current_seg_filled(c_seg, current_chead); 2329 2330 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2331 2332 goto retry; 2333 } 2334 c_size = PAGE_SIZE; 2335 2336 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size); 2337 } 2338#if CHECKSUM_THE_COMPRESSED_DATA 2339 cs->c_hash_compressed_data = hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size); 2340#endif 2341 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 2342 2343 PACK_C_SIZE(cs, c_size); 2344 c_seg->c_bytes_used += c_rounded_size; 2345 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); 2346 2347 slot_ptr->s_cindx = c_seg->c_nextslot++; 2348 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */ 2349 slot_ptr->s_cseg = c_seg->c_mysegno + 1; 2350 2351 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX) 2352 c_current_seg_filled(c_seg, current_chead); 2353 else 2354 lck_mtx_unlock_always(&c_seg->c_lock); 2355 2356 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2357 2358 OSAddAtomic64(c_rounded_size, &compressor_bytes_used); 2359 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes); 2360 OSAddAtomic64(c_size, &c_segment_compressed_bytes); 2361 2362 OSAddAtomic(1, &c_segment_pages_compressed); 2363 OSAddAtomic(1, &sample_period_compression_count); 2364 2365 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0); 2366 2367 if (vm_compressor_low_on_space()) { 2368 ipc_port_t trigger = IP_NULL; 2369 2370 PSL_LOCK(); 2371 if (IP_VALID(min_pages_trigger_port)) { 2372 trigger = min_pages_trigger_port; 2373 min_pages_trigger_port = IP_NULL; 2374 } 2375 PSL_UNLOCK(); 2376 2377 if (IP_VALID(trigger)) { 2378 no_paging_space_action(); 2379 default_pager_space_alert(trigger, HI_WAT_ALERT); 2380 ipc_port_release_send(trigger); 2381 } 2382 } 2383 return (0); 2384} 2385 2386 2387static int 2388c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot) 2389{ 2390 c_slot_t cs; 2391 c_segment_t c_seg; 2392 int c_indx; 2393 int c_rounded_size; 2394 uint32_t c_size; 2395 int retval = 0; 2396 boolean_t c_seg_has_data = TRUE; 2397 boolean_t c_seg_swappedin = FALSE; 2398 boolean_t need_unlock = TRUE; 2399 boolean_t consider_defragmenting = FALSE; 2400 2401ReTry: 2402#if HIBERNATION 2403 if (dst) { 2404 if (lck_rw_try_lock_shared(&c_decompressor_lock) == 0) { 2405 if (flags & C_DONT_BLOCK) { 2406 *zeroslot = 0; 2407 return (-2); 2408 } 2409 lck_rw_lock_shared(&c_decompressor_lock); 2410 } 2411 } 2412#endif 2413 PAGE_REPLACEMENT_DISALLOWED(TRUE); 2414 2415 /* s_cseg is actually "segno+1" */ 2416 c_seg = c_segments[slot_ptr->s_cseg - 1].c_seg; 2417 2418 lck_mtx_lock_spin_always(&c_seg->c_lock); 2419 2420 if (flags & C_DONT_BLOCK) { 2421 if (c_seg->c_busy || c_seg->c_ondisk) { 2422 2423 retval = -2; 2424 *zeroslot = 0; 2425 2426 goto done; 2427 } 2428 } 2429 if (c_seg->c_busy) { 2430 2431 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2432#if HIBERNATION 2433 if (dst) 2434 lck_rw_done(&c_decompressor_lock); 2435#endif 2436 c_seg_wait_on_busy(c_seg); 2437 2438 goto ReTry; 2439 } 2440 c_indx = slot_ptr->s_cindx; 2441 2442 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); 2443 2444 c_size = UNPACK_C_SIZE(cs); 2445 2446 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; 2447 2448 if (dst) { 2449 uint32_t age_of_cseg; 2450 clock_sec_t cur_ts_sec; 2451 clock_nsec_t cur_ts_nsec; 2452 2453 if (c_seg->c_on_swappedout_q || c_seg->c_on_swappedout_sparse_q) { 2454 if (c_seg->c_ondisk) 2455 c_seg_swappedin = TRUE; 2456 c_seg_swapin(c_seg, FALSE); 2457 } 2458 if (c_seg->c_store.c_buffer == NULL) { 2459 c_seg_has_data = FALSE; 2460 goto c_seg_invalid_data; 2461 } 2462#if CHECKSUM_THE_COMPRESSED_DATA 2463 if (cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) 2464 panic("compressed data doesn't match original"); 2465#endif 2466 if (c_rounded_size == PAGE_SIZE) { 2467 /* 2468 * page wasn't compressible... just copy it out 2469 */ 2470 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE); 2471 } else { 2472 uint32_t my_cpu_no; 2473 char *scratch_buf; 2474 2475 /* 2476 * we're behind the c_seg lock held in spin mode 2477 * which means pre-emption is disabled... therefore 2478 * the following sequence is atomic and safe 2479 */ 2480 my_cpu_no = cpu_number(); 2481 2482 assert(my_cpu_no < compressor_cpus); 2483 2484 scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE]; 2485 2486 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], 2487 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); 2488 } 2489 2490#if CHECKSUM_THE_DATA 2491 if (cs->c_hash_data != hash_string(dst, PAGE_SIZE)) 2492 panic("decompressed data doesn't match original"); 2493#endif 2494 if (!c_seg->c_was_swapped_in) { 2495 2496 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); 2497 2498 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts; 2499 2500 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) 2501 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]); 2502 else 2503 OSAddAtomic(1, &overage_decompressions_during_sample_period); 2504 2505 OSAddAtomic(1, &sample_period_decompression_count); 2506 } 2507 } else { 2508 if (c_seg->c_store.c_buffer == NULL) 2509 c_seg_has_data = FALSE; 2510 } 2511c_seg_invalid_data: 2512 2513 if (c_seg_has_data == TRUE) { 2514 if (c_seg_swappedin == TRUE) 2515 retval = 1; 2516 else 2517 retval = 0; 2518 } else 2519 retval = -1; 2520 2521 if (flags & C_KEEP) { 2522 *zeroslot = 0; 2523 goto done; 2524 } 2525 c_seg->c_bytes_unused += c_rounded_size; 2526 c_seg->c_bytes_used -= c_rounded_size; 2527 PACK_C_SIZE(cs, 0); 2528 2529 if (c_indx < c_seg->c_firstemptyslot) 2530 c_seg->c_firstemptyslot = c_indx; 2531 2532 OSAddAtomic(-1, &c_segment_pages_compressed); 2533 2534 if (c_seg_has_data == TRUE && !c_seg->c_ondisk) { 2535 /* 2536 * c_ondisk == TRUE can occur when we're doing a 2537 * free of a compressed page (i.e. dst == NULL) 2538 */ 2539 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used); 2540 } 2541 if (!c_seg->c_filling) { 2542 if (c_seg->c_bytes_used == 0) { 2543 if (!c_seg->c_ondisk) { 2544 int pages_populated; 2545 2546 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; 2547 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0); 2548 2549 if (pages_populated) { 2550 assert(c_seg->c_store.c_buffer != NULL); 2551 2552 c_seg->c_busy = 1; 2553 lck_mtx_unlock_always(&c_seg->c_lock); 2554 2555 kernel_memory_depopulate(kernel_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); 2556 2557 lck_mtx_lock_spin_always(&c_seg->c_lock); 2558 C_SEG_WAKEUP_DONE(c_seg); 2559 } 2560 if (!c_seg->c_on_minorcompact_q && !c_seg->c_on_swapout_q) 2561 c_seg_need_delayed_compaction(c_seg); 2562 } else 2563 assert(c_seg->c_on_swappedout_sparse_q); 2564 2565 } else if (c_seg->c_on_minorcompact_q) { 2566 2567 if (C_SEG_INCORE_IS_SPARSE(c_seg)) { 2568 c_seg_try_minor_compaction_and_unlock(c_seg); 2569 need_unlock = FALSE; 2570 } 2571 } else if (!c_seg->c_ondisk) { 2572 2573 if (c_seg_has_data == TRUE && !c_seg->c_on_swapout_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { 2574 c_seg_need_delayed_compaction(c_seg); 2575 } 2576 } else if (!c_seg->c_on_swappedout_sparse_q && C_SEG_ONDISK_IS_SPARSE(c_seg)) { 2577 2578 c_seg_move_to_sparse_list(c_seg); 2579 consider_defragmenting = TRUE; 2580 } 2581 } 2582done: 2583 if (need_unlock == TRUE) 2584 lck_mtx_unlock_always(&c_seg->c_lock); 2585 2586 PAGE_REPLACEMENT_DISALLOWED(FALSE); 2587 2588 if (consider_defragmenting == TRUE) 2589 vm_swap_consider_defragmenting(); 2590#if HIBERNATION 2591 if (dst) 2592 lck_rw_done(&c_decompressor_lock); 2593#endif 2594 return (retval); 2595} 2596 2597 2598int 2599vm_compressor_get(ppnum_t pn, int *slot, int flags) 2600{ 2601 char *dst; 2602 int zeroslot = 1; 2603 int retval; 2604 2605#if __x86_64__ 2606 dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); 2607#else 2608#error "unsupported architecture" 2609#endif 2610 2611 retval = c_decompress_page(dst, (c_slot_mapping_t)slot, flags, &zeroslot); 2612 2613 /* 2614 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP) 2615 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set 2616 */ 2617 if (zeroslot) { 2618 /* 2619 * We've just decompressed a page, and are about to hand that back to VM for 2620 * re-entry into some pmap. This is a decompression operation which must have no 2621 * impact on the pmap's physical footprint. However, when VM goes to re-enter 2622 * this page into the pmap, it doesn't know that it came from the compressor, 2623 * which means the pmap's physical footprint will be incremented. To compensate 2624 * for that, we decrement the physical footprint here, so that the total net effect 2625 * on the physical footprint statistic is zero. 2626 */ 2627 pmap_ledger_debit(current_task()->map->pmap, task_ledgers.phys_footprint, PAGE_SIZE); 2628 2629 *slot = 0; 2630 } 2631 /* 2632 * returns 0 if we successfully decompressed a page from a segment already in memory 2633 * returns 1 if we had to first swap in the segment, before successfully decompressing the page 2634 * returns -1 if we encountered an error swapping in the segment - decompression failed 2635 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set 2636 */ 2637 return (retval); 2638} 2639 2640 2641void 2642vm_compressor_free(int *slot) 2643{ 2644 int zeroslot = 1; 2645 2646 (void)c_decompress_page(NULL, (c_slot_mapping_t)slot, 0, &zeroslot); 2647 2648 *slot = 0; 2649} 2650 2651 2652int 2653vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf) 2654{ 2655 char *src; 2656 int retval; 2657 2658 if ((vm_offset_t)slot < VM_MIN_KERNEL_AND_KEXT_ADDRESS || (vm_offset_t)slot >= VM_MAX_KERNEL_ADDRESS) 2659 panic("vm_compressor_put: slot 0x%llx address out of range [0x%llx:0x%llx]", 2660 (uint64_t)(vm_offset_t) slot, 2661 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, 2662 (uint64_t) VM_MAX_KERNEL_ADDRESS); 2663 2664#if __x86_64__ 2665 src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); 2666#else 2667#error "unsupported architecture" 2668#endif 2669 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf); 2670 2671 return (retval); 2672} 2673