1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include "vm_compressor_backing_store.h" 30#include <vm/vm_protos.h> 31 32#include <IOKit/IOHibernatePrivate.h> 33 34 35boolean_t compressor_store_stop_compaction = FALSE; 36boolean_t vm_swap_up = FALSE; 37boolean_t vm_swapfile_create_needed = FALSE; 38boolean_t vm_swapfile_gc_needed = FALSE; 39 40int swapper_throttle = -1; 41boolean_t swapper_throttle_inited = FALSE; 42uint64_t vm_swapout_thread_id; 43 44uint64_t vm_swap_put_failures = 0; 45uint64_t vm_swap_get_failures = 0; 46int vm_num_swap_files = 0; 47int vm_swapout_thread_processed_segments = 0; 48int vm_swapout_thread_awakened = 0; 49int vm_swapfile_create_thread_awakened = 0; 50int vm_swapfile_create_thread_running = 0; 51int vm_swapfile_gc_thread_awakened = 0; 52int vm_swapfile_gc_thread_running = 0; 53 54unsigned int vm_swapfile_total_segs_alloced = 0; 55unsigned int vm_swapfile_total_segs_used = 0; 56 57 58#define SWAP_READY 0x1 /* Swap file is ready to be used */ 59#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */ 60#define SWAP_WANTED 0x4 /* Swap file has waiters */ 61#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/ 62 63struct swapfile{ 64 queue_head_t swp_queue; /* list of swap files */ 65 char *swp_path; /* saved pathname of swap file */ 66 struct vnode *swp_vp; /* backing vnode */ 67 uint64_t swp_size; /* size of this swap file */ 68 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */ 69 unsigned int swp_pathlen; /* length of pathname */ 70 unsigned int swp_nsegs; /* #segments we can use */ 71 unsigned int swp_nseginuse; /* #segments in use */ 72 unsigned int swp_index; /* index of this swap file */ 73 unsigned int swp_flags; /* state of swap file */ 74 unsigned int swp_free_hint; /* offset of 1st free chunk */ 75 unsigned int swp_io_count; /* count of outstanding I/Os */ 76 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */ 77 78 struct trim_list *swp_delayed_trim_list_head; 79 unsigned int swp_delayed_trim_count; 80}; 81 82queue_head_t swf_global_queue; 83boolean_t swp_trim_supported = FALSE; 84 85#define VM_SWAPFILE_DELAYED_TRIM_MAX 128 86 87extern clock_sec_t dont_trim_until_ts; 88clock_sec_t vm_swapfile_last_failed_to_create_ts = 0; 89clock_sec_t vm_swapfile_last_successful_create_ts = 0; 90int vm_swapfile_can_be_created = FALSE; 91boolean_t delayed_trim_handling_in_progress = FALSE; 92 93static void vm_swapout_thread_throttle_adjust(void); 94static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset); 95static void vm_swapout_thread(void); 96static void vm_swapfile_create_thread(void); 97static void vm_swapfile_gc_thread(void); 98static void vm_swap_defragment(); 99static void vm_swap_handle_delayed_trims(boolean_t); 100static void vm_swap_do_delayed_trim(); 101static void vm_swap_wait_on_trim_handling_in_progress(void); 102 103 104 105#define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0) 106#define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0) 107#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0) 108#define VM_SWAP_SHOULD_CREATE(cur_ts) (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \ 109 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0) 110#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0) 111 112 113#define VM_SWAPFILE_DELAYED_CREATE 15 114 115#define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0) 116 117 118#if CHECKSUM_THE_SWAP 119extern unsigned int hash_string(char *cp, int len); 120#endif 121 122#if ENCRYPTED_SWAP 123extern boolean_t swap_crypt_ctx_initialized; 124extern void swap_crypt_ctx_initialize(void); 125extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE]; 126extern aes_ctx swap_crypt_ctx; 127extern unsigned long vm_page_encrypt_counter; 128extern unsigned long vm_page_decrypt_counter; 129#endif /* ENCRYPTED_SWAP */ 130 131extern void vm_pageout_io_throttle(void); 132extern void vm_pageout_reinit_tuneables(void); 133extern void vm_swap_file_set_tuneables(void); 134 135struct swapfile *vm_swapfile_for_handle(uint64_t); 136 137/* 138 * Called with the vm_swap_data_lock held. 139 */ 140 141struct swapfile * 142vm_swapfile_for_handle(uint64_t f_offset) 143{ 144 145 uint64_t file_offset = 0; 146 unsigned int swapfile_index = 0; 147 struct swapfile* swf = NULL; 148 149 file_offset = (f_offset & SWAP_SLOT_MASK); 150 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT); 151 152 swf = (struct swapfile*) queue_first(&swf_global_queue); 153 154 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { 155 156 if (swapfile_index == swf->swp_index) { 157 break; 158 } 159 160 swf = (struct swapfile*) queue_next(&swf->swp_queue); 161 } 162 163 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) { 164 swf = NULL; 165 } 166 167 return swf; 168} 169 170void 171vm_compressor_swap_init() 172{ 173 thread_t thread = NULL; 174 175 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr); 176 lck_grp_init(&vm_swap_data_lock_grp, 177 "vm_swap_data", 178 &vm_swap_data_lock_grp_attr); 179 lck_attr_setdefault(&vm_swap_data_lock_attr); 180 lck_mtx_init_ext(&vm_swap_data_lock, 181 &vm_swap_data_lock_ext, 182 &vm_swap_data_lock_grp, 183 &vm_swap_data_lock_attr); 184 185 queue_init(&swf_global_queue); 186 187 188 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL, 189 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { 190 panic("vm_swapout_thread: create failed"); 191 } 192 thread->options |= TH_OPT_VMPRIV; 193 vm_swapout_thread_id = thread->thread_id; 194 195 thread_deallocate(thread); 196 197 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL, 198 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { 199 panic("vm_swapfile_create_thread: create failed"); 200 } 201 thread->options |= TH_OPT_VMPRIV; 202 203 thread_deallocate(thread); 204 205 206 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL, 207 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { 208 panic("vm_swapfile_gc_thread: create failed"); 209 } 210 thread_deallocate(thread); 211 212 proc_set_task_policy_thread(kernel_task, thread->thread_id, 213 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); 214 proc_set_task_policy_thread(kernel_task, thread->thread_id, 215 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); 216 217#if ENCRYPTED_SWAP 218 if (swap_crypt_ctx_initialized == FALSE) { 219 swap_crypt_ctx_initialize(); 220 } 221#endif /* ENCRYPTED_SWAP */ 222 223 memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1); 224 225 vm_swap_up = TRUE; 226 227 printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF"); 228} 229 230 231void 232vm_swap_file_set_tuneables() 233{ 234 struct vnode *vp; 235 char *pathname; 236 int namelen; 237 238 if (strlen(swapfilename) == 0) { 239 /* 240 * If no swapfile name has been set, we'll 241 * use the default name. 242 * 243 * Also, this function is only called from the vm_pageout_scan thread 244 * via vm_consider_waking_compactor_swapper, 245 * so we don't need to worry about a race in checking/setting the name here. 246 */ 247 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN); 248 } 249 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1; 250 pathname = (char*)kalloc(namelen); 251 memset(pathname, 0, namelen); 252 snprintf(pathname, namelen, "%s%d", swapfilename, 0); 253 254 vm_swapfile_open(pathname, &vp); 255 256 if (vp == NULL) 257 goto done; 258 259 if (vnode_pager_isSSD(vp) == FALSE) 260 vm_pageout_reinit_tuneables(); 261 vnode_setswapmount(vp); 262 vm_swapfile_close((uint64_t)pathname, vp); 263done: 264 kfree(pathname, namelen); 265} 266 267 268#if ENCRYPTED_SWAP 269void 270vm_swap_encrypt(c_segment_t c_seg) 271{ 272 vm_offset_t kernel_vaddr = 0; 273 uint64_t size = 0; 274 275 union { 276 unsigned char aes_iv[AES_BLOCK_SIZE]; 277 void *c_seg; 278 } encrypt_iv; 279 280 assert(swap_crypt_ctx_initialized); 281 282 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); 283 284 encrypt_iv.c_seg = (void*)c_seg; 285 286 /* encrypt the "initial vector" */ 287 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], 288 swap_crypt_null_iv, 289 1, 290 &encrypt_iv.aes_iv[0], 291 &swap_crypt_ctx.encrypt); 292 293 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer; 294 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 295 296 /* 297 * Encrypt the c_segment. 298 */ 299 aes_encrypt_cbc((const unsigned char *) kernel_vaddr, 300 &encrypt_iv.aes_iv[0], 301 (unsigned int)(size / AES_BLOCK_SIZE), 302 (unsigned char *) kernel_vaddr, 303 &swap_crypt_ctx.encrypt); 304 305 vm_page_encrypt_counter += (size/PAGE_SIZE_64); 306} 307 308void 309vm_swap_decrypt(c_segment_t c_seg) 310{ 311 312 vm_offset_t kernel_vaddr = 0; 313 uint64_t size = 0; 314 315 union { 316 unsigned char aes_iv[AES_BLOCK_SIZE]; 317 void *c_seg; 318 } decrypt_iv; 319 320 321 assert(swap_crypt_ctx_initialized); 322 323 /* 324 * Prepare an "initial vector" for the decryption. 325 * It has to be the same as the "initial vector" we 326 * used to encrypt that page. 327 */ 328 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); 329 330 decrypt_iv.c_seg = (void*)c_seg; 331 332 /* encrypt the "initial vector" */ 333 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], 334 swap_crypt_null_iv, 335 1, 336 &decrypt_iv.aes_iv[0], 337 &swap_crypt_ctx.encrypt); 338 339 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer; 340 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 341 342 /* 343 * Decrypt the c_segment. 344 */ 345 aes_decrypt_cbc((const unsigned char *) kernel_vaddr, 346 &decrypt_iv.aes_iv[0], 347 (unsigned int) (size / AES_BLOCK_SIZE), 348 (unsigned char *) kernel_vaddr, 349 &swap_crypt_ctx.decrypt); 350 351 vm_page_decrypt_counter += (size/PAGE_SIZE_64); 352} 353#endif /* ENCRYPTED_SWAP */ 354 355 356void 357vm_swap_consider_defragmenting() 358{ 359 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() && 360 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) { 361 362 if (!vm_swapfile_gc_thread_running) { 363 lck_mtx_lock(&vm_swap_data_lock); 364 365 if (!vm_swapfile_gc_thread_running) 366 thread_wakeup((event_t) &vm_swapfile_gc_needed); 367 368 lck_mtx_unlock(&vm_swap_data_lock); 369 } 370 } 371} 372 373 374int vm_swap_defragment_yielded = 0; 375int vm_swap_defragment_swapin = 0; 376int vm_swap_defragment_free = 0; 377int vm_swap_defragment_busy = 0; 378 379 380static void 381vm_swap_defragment() 382{ 383 c_segment_t c_seg; 384 385 /* 386 * have to grab the master lock w/o holding 387 * any locks in spin mode 388 */ 389 PAGE_REPLACEMENT_DISALLOWED(TRUE); 390 391 lck_mtx_lock_spin_always(c_list_lock); 392 393 while (!queue_empty(&c_swappedout_sparse_list_head)) { 394 395 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) { 396 vm_swap_defragment_yielded++; 397 break; 398 } 399 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head); 400 401 lck_mtx_lock_spin_always(&c_seg->c_lock); 402 403 assert(c_seg->c_on_swappedout_sparse_q); 404 405 if (c_seg->c_busy) { 406 lck_mtx_unlock_always(c_list_lock); 407 408 PAGE_REPLACEMENT_DISALLOWED(FALSE); 409 /* 410 * c_seg_wait_on_busy consumes c_seg->c_lock 411 */ 412 c_seg_wait_on_busy(c_seg); 413 414 PAGE_REPLACEMENT_DISALLOWED(TRUE); 415 416 lck_mtx_lock_spin_always(c_list_lock); 417 418 vm_swap_defragment_busy++; 419 continue; 420 } 421 if (c_seg->c_bytes_used == 0) { 422 /* 423 * c_seg_free_locked consumes the c_list_lock 424 * and c_seg->c_lock 425 */ 426 c_seg_free_locked(c_seg); 427 428 vm_swap_defragment_free++; 429 } else { 430 lck_mtx_unlock_always(c_list_lock); 431 432 c_seg_swapin(c_seg, TRUE); 433 lck_mtx_unlock_always(&c_seg->c_lock); 434 435 vm_swap_defragment_swapin++; 436 } 437 PAGE_REPLACEMENT_DISALLOWED(FALSE); 438 439 vm_pageout_io_throttle(); 440 441 /* 442 * because write waiters have privilege over readers, 443 * dropping and immediately retaking the master lock will 444 * still allow any thread waiting to acquire the 445 * master lock exclusively an opportunity to take it 446 */ 447 PAGE_REPLACEMENT_DISALLOWED(TRUE); 448 449 lck_mtx_lock_spin_always(c_list_lock); 450 } 451 lck_mtx_unlock_always(c_list_lock); 452 453 PAGE_REPLACEMENT_DISALLOWED(FALSE); 454} 455 456 457 458static void 459vm_swapfile_create_thread(void) 460{ 461 clock_sec_t sec; 462 clock_nsec_t nsec; 463 464 vm_swapfile_create_thread_awakened++; 465 vm_swapfile_create_thread_running = 1; 466 467 while (TRUE) { 468 /* 469 * walk through the list of swap files 470 * and do the delayed frees/trims for 471 * any swap file whose count of delayed 472 * frees is above the batch limit 473 */ 474 vm_swap_handle_delayed_trims(FALSE); 475 476 lck_mtx_lock(&vm_swap_data_lock); 477 478 clock_get_system_nanotime(&sec, &nsec); 479 480 if (VM_SWAP_SHOULD_CREATE(sec) == 0) 481 break; 482 483 lck_mtx_unlock(&vm_swap_data_lock); 484 485 if (vm_swap_create_file() == FALSE) { 486 vm_swapfile_last_failed_to_create_ts = sec; 487 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec); 488 489 } else 490 vm_swapfile_last_successful_create_ts = sec; 491 } 492 vm_swapfile_create_thread_running = 0; 493 494 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT); 495 496 lck_mtx_unlock(&vm_swap_data_lock); 497 498 thread_block((thread_continue_t)vm_swapfile_create_thread); 499 500 /* NOTREACHED */ 501} 502 503 504static void 505vm_swapfile_gc_thread(void) 506{ 507 boolean_t need_defragment; 508 boolean_t need_reclaim; 509 510 vm_swapfile_gc_thread_awakened++; 511 vm_swapfile_gc_thread_running = 1; 512 513 while (TRUE) { 514 515 lck_mtx_lock(&vm_swap_data_lock); 516 517 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) 518 break; 519 520 need_defragment = FALSE; 521 need_reclaim = FALSE; 522 523 if (VM_SWAP_SHOULD_DEFRAGMENT()) 524 need_defragment = TRUE; 525 526 if (VM_SWAP_SHOULD_RECLAIM()) { 527 need_defragment = TRUE; 528 need_reclaim = TRUE; 529 } 530 if (need_defragment == FALSE && need_reclaim == FALSE) 531 break; 532 533 lck_mtx_unlock(&vm_swap_data_lock); 534 535 if (need_defragment == TRUE) 536 vm_swap_defragment(); 537 if (need_reclaim == TRUE) 538 vm_swap_reclaim(); 539 } 540 vm_swapfile_gc_thread_running = 0; 541 542 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT); 543 544 lck_mtx_unlock(&vm_swap_data_lock); 545 546 thread_block((thread_continue_t)vm_swapfile_gc_thread); 547 548 /* NOTREACHED */ 549} 550 551 552 553int swapper_entered_T0 = 0; 554int swapper_entered_T1 = 0; 555int swapper_entered_T2 = 0; 556 557static void 558vm_swapout_thread_throttle_adjust(void) 559{ 560 int swapper_throttle_new; 561 562 if (swapper_throttle_inited == FALSE) { 563 /* 564 * force this thread to be set to the correct 565 * throttling tier 566 */ 567 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; 568 swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1; 569 swapper_throttle_inited = TRUE; 570 swapper_entered_T2++; 571 goto done; 572 } 573 swapper_throttle_new = swapper_throttle; 574 575 576 switch(swapper_throttle) { 577 578 case THROTTLE_LEVEL_COMPRESSOR_TIER2: 579 580 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) { 581 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1; 582 swapper_entered_T1++; 583 break; 584 } 585 break; 586 587 case THROTTLE_LEVEL_COMPRESSOR_TIER1: 588 589 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) { 590 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0; 591 swapper_entered_T0++; 592 break; 593 } 594 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) { 595 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; 596 swapper_entered_T2++; 597 break; 598 } 599 break; 600 601 case THROTTLE_LEVEL_COMPRESSOR_TIER0: 602 603 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) { 604 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; 605 swapper_entered_T2++; 606 break; 607 } 608 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) { 609 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1; 610 swapper_entered_T1++; 611 break; 612 } 613 break; 614 } 615done: 616 if (swapper_throttle != swapper_throttle_new) { 617 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id, 618 TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new); 619 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id, 620 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); 621 622 swapper_throttle = swapper_throttle_new; 623 } 624} 625 626 627static void 628vm_swapout_thread(void) 629{ 630 uint64_t f_offset = 0; 631 uint32_t size = 0; 632 c_segment_t c_seg = NULL; 633 kern_return_t kr = KERN_SUCCESS; 634 vm_offset_t addr = 0; 635 636 vm_swapout_thread_awakened++; 637 638 lck_mtx_lock_spin_always(c_list_lock); 639 640 while (!queue_empty(&c_swapout_list_head)) { 641 642 c_seg = (c_segment_t)queue_first(&c_swapout_list_head); 643 644 lck_mtx_lock_spin_always(&c_seg->c_lock); 645 646 assert(c_seg->c_on_swapout_q); 647 648 if (c_seg->c_busy) { 649 lck_mtx_unlock_always(c_list_lock); 650 651 c_seg_wait_on_busy(c_seg); 652 653 lck_mtx_lock_spin_always(c_list_lock); 654 655 continue; 656 } 657 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); 658 c_seg->c_on_swapout_q = 0; 659 c_swapout_count--; 660 661 vm_swapout_thread_processed_segments++; 662 663 thread_wakeup((event_t)&compaction_swapper_running); 664 665 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 666 667 if (size == 0) { 668 c_seg_free_locked(c_seg); 669 goto c_seg_was_freed; 670 } 671 C_SEG_BUSY(c_seg); 672 c_seg->c_busy_swapping = 1; 673 674 lck_mtx_unlock_always(c_list_lock); 675 676 addr = (vm_offset_t) c_seg->c_store.c_buffer; 677 678 lck_mtx_unlock_always(&c_seg->c_lock); 679 680#if CHECKSUM_THE_SWAP 681 c_seg->cseg_hash = hash_string((char*)addr, (int)size); 682 c_seg->cseg_swap_size = size; 683#endif /* CHECKSUM_THE_SWAP */ 684 685#if ENCRYPTED_SWAP 686 vm_swap_encrypt(c_seg); 687#endif /* ENCRYPTED_SWAP */ 688 689 vm_swapout_thread_throttle_adjust(); 690 691 kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg); 692 693 PAGE_REPLACEMENT_DISALLOWED(TRUE); 694 695 lck_mtx_lock_spin_always(c_list_lock); 696 lck_mtx_lock_spin_always(&c_seg->c_lock); 697 698 if (kr == KERN_SUCCESS) { 699 700 if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) { 701 702 c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg); 703 c_seg->c_on_swappedout_sparse_q = 1; 704 c_swappedout_sparse_count++; 705 706 } else { 707 if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id && 708 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)) 709 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 710 else 711 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); 712 c_seg->c_on_swappedout_q = 1; 713 c_swappedout_count++; 714 } 715 c_seg->c_store.c_swap_handle = f_offset; 716 c_seg->c_ondisk = 1; 717 718 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT); 719 720 if (c_seg->c_bytes_used) 721 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used); 722 } else { 723#if ENCRYPTED_SWAP 724 vm_swap_decrypt(c_seg); 725#endif /* ENCRYPTED_SWAP */ 726 c_seg_insert_into_q(&c_age_list_head, c_seg); 727 c_seg->c_on_age_q = 1; 728 c_age_count++; 729 730 vm_swap_put_failures++; 731 } 732 lck_mtx_unlock_always(c_list_lock); 733 734 if (c_seg->c_must_free) 735 c_seg_free(c_seg); 736 else { 737 c_seg->c_busy_swapping = 0; 738 C_SEG_WAKEUP_DONE(c_seg); 739 lck_mtx_unlock_always(&c_seg->c_lock); 740 } 741 742 if (kr == KERN_SUCCESS) 743 kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR); 744 745 PAGE_REPLACEMENT_DISALLOWED(FALSE); 746 747 if (kr == KERN_SUCCESS) { 748 kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE); 749 OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used); 750 } 751 vm_pageout_io_throttle(); 752c_seg_was_freed: 753 if (c_swapout_count == 0) 754 vm_swap_consider_defragmenting(); 755 756 lck_mtx_lock_spin_always(c_list_lock); 757 } 758 759 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT); 760 761 lck_mtx_unlock_always(c_list_lock); 762 763 thread_block((thread_continue_t)vm_swapout_thread); 764 765 /* NOTREACHED */ 766} 767 768boolean_t 769vm_swap_create_file() 770{ 771 uint64_t size = 0; 772 int namelen = 0; 773 boolean_t swap_file_created = FALSE; 774 boolean_t swap_file_reuse = FALSE; 775 struct swapfile *swf = NULL; 776 777 /* 778 * Any swapfile structure ready for re-use? 779 */ 780 781 lck_mtx_lock(&vm_swap_data_lock); 782 783 swf = (struct swapfile*) queue_first(&swf_global_queue); 784 785 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { 786 if (swf->swp_flags == SWAP_REUSE) { 787 swap_file_reuse = TRUE; 788 break; 789 } 790 swf = (struct swapfile*) queue_next(&swf->swp_queue); 791 } 792 793 lck_mtx_unlock(&vm_swap_data_lock); 794 795 if (swap_file_reuse == FALSE) { 796 797 if (strlen(swapfilename) == 0) { 798 /* 799 * If no swapfile name has been set, we'll 800 * use the default name. 801 * 802 * Also, this function is only called from the swapfile management thread. 803 * So we don't need to worry about a race in checking/setting the name here. 804 */ 805 806 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN); 807 } 808 809 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1; 810 811 swf = (struct swapfile*) kalloc(sizeof *swf); 812 memset(swf, 0, sizeof(*swf)); 813 814 swf->swp_index = vm_num_swap_files + 1; 815 swf->swp_pathlen = namelen; 816 swf->swp_path = (char*)kalloc(swf->swp_pathlen); 817 818 memset(swf->swp_path, 0, namelen); 819 820 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files); 821 } 822 823 vm_swapfile_open(swf->swp_path, &swf->swp_vp); 824 825 if (swf->swp_vp == NULL) { 826 if (swap_file_reuse == FALSE) { 827 kfree(swf->swp_path, swf->swp_pathlen); 828 kfree(swf, sizeof *swf); 829 } 830 return FALSE; 831 } 832 vm_swapfile_can_be_created = TRUE; 833 834 size = MAX_SWAP_FILE_SIZE; 835 836 while (size >= MIN_SWAP_FILE_SIZE) { 837 838 if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) { 839 840 int num_bytes_for_bitmap = 0; 841 842 swap_file_created = TRUE; 843 844 swf->swp_size = size; 845 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE); 846 swf->swp_nseginuse = 0; 847 swf->swp_free_hint = 0; 848 849 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1); 850 /* 851 * Allocate a bitmap that describes the 852 * number of segments held by this swapfile. 853 */ 854 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap); 855 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap); 856 857 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t)); 858 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t))); 859 860 /* 861 * passing a NULL trim_list into vnode_trim_list 862 * will return ENOTSUP if trim isn't supported 863 * and 0 if it is 864 */ 865 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) 866 swp_trim_supported = TRUE; 867 868 lck_mtx_lock(&vm_swap_data_lock); 869 870 swf->swp_flags = SWAP_READY; 871 872 if (swap_file_reuse == FALSE) { 873 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue); 874 } 875 876 vm_num_swap_files++; 877 878 vm_swapfile_total_segs_alloced += swf->swp_nsegs; 879 880 lck_mtx_unlock(&vm_swap_data_lock); 881 882 thread_wakeup((event_t) &vm_num_swap_files); 883 884 break; 885 } else { 886 887 size = size / 2; 888 } 889 } 890 if (swap_file_created == FALSE) { 891 892 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp); 893 894 swf->swp_vp = NULL; 895 896 if (swap_file_reuse == FALSE) { 897 kfree(swf->swp_path, swf->swp_pathlen); 898 kfree(swf, sizeof *swf); 899 } 900 } 901 return swap_file_created; 902} 903 904 905kern_return_t 906vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size) 907{ 908 struct swapfile *swf = NULL; 909 uint64_t file_offset = 0; 910 int retval = 0; 911 912 if (addr == 0) { 913 return KERN_FAILURE; 914 } 915 916 lck_mtx_lock(&vm_swap_data_lock); 917 918 swf = vm_swapfile_for_handle(f_offset); 919 920 if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) { 921 retval = 1; 922 goto done; 923 } 924 swf->swp_io_count++; 925 926 lck_mtx_unlock(&vm_swap_data_lock); 927 928 file_offset = (f_offset & SWAP_SLOT_MASK); 929 retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ); 930 931 if (retval == 0) 932 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT); 933 else 934 vm_swap_get_failures++; 935 936 /* 937 * Free this slot in the swap structure. 938 */ 939 vm_swap_free(f_offset); 940 941 lck_mtx_lock(&vm_swap_data_lock); 942 swf->swp_io_count--; 943 944 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) { 945 946 swf->swp_flags &= ~SWAP_WANTED; 947 thread_wakeup((event_t) &swf->swp_flags); 948 } 949done: 950 lck_mtx_unlock(&vm_swap_data_lock); 951 952 if (retval == 0) 953 return KERN_SUCCESS; 954 else 955 return KERN_FAILURE; 956} 957 958kern_return_t 959vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg) 960{ 961 unsigned int segidx = 0; 962 struct swapfile *swf = NULL; 963 uint64_t file_offset = 0; 964 uint64_t swapfile_index = 0; 965 unsigned int byte_for_segidx = 0; 966 unsigned int offset_within_byte = 0; 967 boolean_t swf_eligible = FALSE; 968 boolean_t waiting = FALSE; 969 boolean_t retried = FALSE; 970 int error = 0; 971 clock_sec_t sec; 972 clock_nsec_t nsec; 973 974 if (addr == 0 || f_offset == NULL) { 975 return KERN_FAILURE; 976 } 977retry: 978 lck_mtx_lock(&vm_swap_data_lock); 979 980 swf = (struct swapfile*) queue_first(&swf_global_queue); 981 982 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { 983 984 segidx = swf->swp_free_hint; 985 986 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs); 987 988 if (swf_eligible) { 989 990 while(segidx < swf->swp_nsegs) { 991 992 byte_for_segidx = segidx >> 3; 993 offset_within_byte = segidx % 8; 994 995 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { 996 segidx++; 997 continue; 998 } 999 1000 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte); 1001 1002 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE; 1003 swf->swp_nseginuse++; 1004 swf->swp_io_count++; 1005 swapfile_index = swf->swp_index; 1006 1007 vm_swapfile_total_segs_used++; 1008 1009 clock_get_system_nanotime(&sec, &nsec); 1010 1011 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) 1012 thread_wakeup((event_t) &vm_swapfile_create_needed); 1013 1014 lck_mtx_unlock(&vm_swap_data_lock); 1015 1016 goto done; 1017 } 1018 } 1019 swf = (struct swapfile*) queue_next(&swf->swp_queue); 1020 } 1021 assert(queue_end(&swf_global_queue, (queue_entry_t) swf)); 1022 1023 /* 1024 * we've run out of swap segments, but may not 1025 * be in a position to immediately create a new swap 1026 * file if we've recently failed to create due to a lack 1027 * of free space in the root filesystem... we'll try 1028 * to kick that create off, but in any event we're going 1029 * to take a breather (up to 1 second) so that we're not caught in a tight 1030 * loop back in "vm_compressor_compact_and_swap" trying to stuff 1031 * segments into swap files only to have them immediately put back 1032 * on the c_age queue due to vm_swap_put failing. 1033 * 1034 * if we're doing these puts due to a hibernation flush, 1035 * no need to block... setting hibernate_no_swapspace to TRUE, 1036 * will cause "vm_compressor_compact_and_swap" to immediately abort 1037 */ 1038 clock_get_system_nanotime(&sec, &nsec); 1039 1040 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) 1041 thread_wakeup((event_t) &vm_swapfile_create_needed); 1042 1043 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) { 1044 waiting = TRUE; 1045 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC); 1046 } else 1047 hibernate_no_swapspace = TRUE; 1048 1049 lck_mtx_unlock(&vm_swap_data_lock); 1050 1051 if (waiting == TRUE) { 1052 thread_block(THREAD_CONTINUE_NULL); 1053 1054 if (retried == FALSE && hibernate_flushing == TRUE) { 1055 retried = TRUE; 1056 goto retry; 1057 } 1058 } 1059 1060 return KERN_FAILURE; 1061 1062done: 1063 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE); 1064 1065 lck_mtx_lock(&vm_swap_data_lock); 1066 1067 swf->swp_csegs[segidx] = c_seg; 1068 1069 swf->swp_io_count--; 1070 1071 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset; 1072 1073 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) { 1074 1075 swf->swp_flags &= ~SWAP_WANTED; 1076 thread_wakeup((event_t) &swf->swp_flags); 1077 } 1078 1079 lck_mtx_unlock(&vm_swap_data_lock); 1080 1081 if (error) { 1082 vm_swap_free(*f_offset); 1083 1084 return KERN_FAILURE; 1085 } 1086 return KERN_SUCCESS; 1087} 1088 1089 1090 1091static void 1092vm_swap_free_now(struct swapfile *swf, uint64_t f_offset) 1093{ 1094 uint64_t file_offset = 0; 1095 unsigned int segidx = 0; 1096 1097 1098 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) { 1099 1100 unsigned int byte_for_segidx = 0; 1101 unsigned int offset_within_byte = 0; 1102 1103 file_offset = (f_offset & SWAP_SLOT_MASK); 1104 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE); 1105 1106 byte_for_segidx = segidx >> 3; 1107 offset_within_byte = segidx % 8; 1108 1109 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { 1110 1111 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); 1112 1113 swf->swp_csegs[segidx] = NULL; 1114 1115 swf->swp_nseginuse--; 1116 vm_swapfile_total_segs_used--; 1117 1118 if (segidx < swf->swp_free_hint) { 1119 swf->swp_free_hint = segidx; 1120 } 1121 } 1122 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) 1123 thread_wakeup((event_t) &vm_swapfile_gc_needed); 1124 } 1125} 1126 1127 1128uint32_t vm_swap_free_now_count = 0; 1129uint32_t vm_swap_free_delayed_count = 0; 1130 1131 1132void 1133vm_swap_free(uint64_t f_offset) 1134{ 1135 struct swapfile *swf = NULL; 1136 struct trim_list *tl = NULL; 1137 clock_sec_t sec; 1138 clock_nsec_t nsec; 1139 1140 if (swp_trim_supported == TRUE) 1141 tl = kalloc(sizeof(struct trim_list)); 1142 1143 lck_mtx_lock(&vm_swap_data_lock); 1144 1145 swf = vm_swapfile_for_handle(f_offset); 1146 1147 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) { 1148 1149 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) { 1150 /* 1151 * don't delay the free if the underlying disk doesn't support 1152 * trim, or we're in the midst of reclaiming this swap file since 1153 * we don't want to move segments that are technically free 1154 * but not yet handled by the delayed free mechanism 1155 */ 1156 vm_swap_free_now(swf, f_offset); 1157 1158 vm_swap_free_now_count++; 1159 goto done; 1160 } 1161 tl->tl_offset = f_offset & SWAP_SLOT_MASK; 1162 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE; 1163 1164 tl->tl_next = swf->swp_delayed_trim_list_head; 1165 swf->swp_delayed_trim_list_head = tl; 1166 swf->swp_delayed_trim_count++; 1167 tl = NULL; 1168 1169 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) { 1170 clock_get_system_nanotime(&sec, &nsec); 1171 1172 if (sec > dont_trim_until_ts) 1173 thread_wakeup((event_t) &vm_swapfile_create_needed); 1174 } 1175 vm_swap_free_delayed_count++; 1176 } 1177done: 1178 lck_mtx_unlock(&vm_swap_data_lock); 1179 1180 if (tl != NULL) 1181 kfree(tl, sizeof(struct trim_list)); 1182} 1183 1184 1185static void 1186vm_swap_wait_on_trim_handling_in_progress() 1187{ 1188 while (delayed_trim_handling_in_progress == TRUE) { 1189 1190 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT); 1191 lck_mtx_unlock(&vm_swap_data_lock); 1192 1193 thread_block(THREAD_CONTINUE_NULL); 1194 1195 lck_mtx_lock(&vm_swap_data_lock); 1196 } 1197} 1198 1199 1200static void 1201vm_swap_handle_delayed_trims(boolean_t force_now) 1202{ 1203 struct swapfile *swf = NULL; 1204 1205 /* 1206 * serialize the race between us and vm_swap_reclaim... 1207 * if vm_swap_reclaim wins it will turn off SWAP_READY 1208 * on the victim it has chosen... we can just skip over 1209 * that file since vm_swap_reclaim will first process 1210 * all of the delayed trims associated with it 1211 */ 1212 lck_mtx_lock(&vm_swap_data_lock); 1213 1214 delayed_trim_handling_in_progress = TRUE; 1215 1216 lck_mtx_unlock(&vm_swap_data_lock); 1217 1218 /* 1219 * no need to hold the lock to walk the swf list since 1220 * vm_swap_create (the only place where we add to this list) 1221 * is run on the same thread as this function 1222 * and vm_swap_reclaim doesn't remove items from this list 1223 * instead marking them with SWAP_REUSE for future re-use 1224 */ 1225 swf = (struct swapfile*) queue_first(&swf_global_queue); 1226 1227 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { 1228 1229 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) { 1230 1231 assert(!(swf->swp_flags & SWAP_RECLAIM)); 1232 vm_swap_do_delayed_trim(swf); 1233 } 1234 swf = (struct swapfile*) queue_next(&swf->swp_queue); 1235 } 1236 lck_mtx_lock(&vm_swap_data_lock); 1237 1238 delayed_trim_handling_in_progress = FALSE; 1239 thread_wakeup((event_t) &delayed_trim_handling_in_progress); 1240 1241 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) 1242 thread_wakeup((event_t) &vm_swapfile_gc_needed); 1243 1244 lck_mtx_unlock(&vm_swap_data_lock); 1245 1246} 1247 1248static void 1249vm_swap_do_delayed_trim(struct swapfile *swf) 1250{ 1251 struct trim_list *tl, *tl_head; 1252 1253 lck_mtx_lock(&vm_swap_data_lock); 1254 1255 tl_head = swf->swp_delayed_trim_list_head; 1256 swf->swp_delayed_trim_list_head = NULL; 1257 swf->swp_delayed_trim_count = 0; 1258 1259 lck_mtx_unlock(&vm_swap_data_lock); 1260 1261 vnode_trim_list(swf->swp_vp, tl_head, TRUE); 1262 1263 while ((tl = tl_head) != NULL) { 1264 unsigned int segidx = 0; 1265 unsigned int byte_for_segidx = 0; 1266 unsigned int offset_within_byte = 0; 1267 1268 lck_mtx_lock(&vm_swap_data_lock); 1269 1270 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE); 1271 1272 byte_for_segidx = segidx >> 3; 1273 offset_within_byte = segidx % 8; 1274 1275 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { 1276 1277 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); 1278 1279 swf->swp_csegs[segidx] = NULL; 1280 1281 swf->swp_nseginuse--; 1282 vm_swapfile_total_segs_used--; 1283 1284 if (segidx < swf->swp_free_hint) { 1285 swf->swp_free_hint = segidx; 1286 } 1287 } 1288 lck_mtx_unlock(&vm_swap_data_lock); 1289 1290 tl_head = tl->tl_next; 1291 1292 kfree(tl, sizeof(struct trim_list)); 1293 } 1294} 1295 1296 1297void 1298vm_swap_flush() 1299{ 1300 return; 1301} 1302 1303int vm_swap_reclaim_yielded = 0; 1304 1305void 1306vm_swap_reclaim(void) 1307{ 1308 vm_offset_t addr = 0; 1309 unsigned int segidx = 0; 1310 uint64_t f_offset = 0; 1311 struct swapfile *swf = NULL; 1312 struct swapfile *smallest_swf = NULL; 1313 unsigned int min_nsegs = 0; 1314 unsigned int byte_for_segidx = 0; 1315 unsigned int offset_within_byte = 0; 1316 uint32_t c_size = 0; 1317 1318 c_segment_t c_seg = NULL; 1319 1320 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) { 1321 panic("vm_swap_reclaim: kernel_memory_allocate failed\n"); 1322 } 1323 1324 lck_mtx_lock(&vm_swap_data_lock); 1325 1326 /* 1327 * if we're running the swapfile list looking for 1328 * candidates with delayed trims, we need to 1329 * wait before making our decision concerning 1330 * the swapfile we want to reclaim 1331 */ 1332 vm_swap_wait_on_trim_handling_in_progress(); 1333 1334 /* 1335 * from here until we knock down the SWAP_READY bit, 1336 * we need to remain behind the vm_swap_data_lock... 1337 * once that bit has been turned off, "vm_swap_handle_delayed_trims" 1338 * will not consider this swapfile for processing 1339 */ 1340 swf = (struct swapfile*) queue_first(&swf_global_queue); 1341 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE; 1342 smallest_swf = NULL; 1343 1344 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { 1345 1346 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) { 1347 1348 smallest_swf = swf; 1349 min_nsegs = swf->swp_nseginuse; 1350 } 1351 swf = (struct swapfile*) queue_next(&swf->swp_queue); 1352 } 1353 1354 if (smallest_swf == NULL) 1355 goto done; 1356 1357 swf = smallest_swf; 1358 1359 1360 swf->swp_flags &= ~SWAP_READY; 1361 swf->swp_flags |= SWAP_RECLAIM; 1362 1363 if (swf->swp_delayed_trim_count) { 1364 1365 lck_mtx_unlock(&vm_swap_data_lock); 1366 1367 vm_swap_do_delayed_trim(swf); 1368 1369 lck_mtx_lock(&vm_swap_data_lock); 1370 } 1371 segidx = 0; 1372 1373 while (segidx < swf->swp_nsegs) { 1374 1375ReTry_for_cseg: 1376 /* 1377 * Wait for outgoing I/Os. 1378 */ 1379 while (swf->swp_io_count) { 1380 1381 swf->swp_flags |= SWAP_WANTED; 1382 1383 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT); 1384 lck_mtx_unlock(&vm_swap_data_lock); 1385 1386 thread_block(THREAD_CONTINUE_NULL); 1387 1388 lck_mtx_lock(&vm_swap_data_lock); 1389 } 1390 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) { 1391 vm_swap_reclaim_yielded++; 1392 break; 1393 } 1394 1395 byte_for_segidx = segidx >> 3; 1396 offset_within_byte = segidx % 8; 1397 1398 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) { 1399 1400 segidx++; 1401 continue; 1402 } 1403 1404 c_seg = swf->swp_csegs[segidx]; 1405 1406 lck_mtx_lock_spin_always(&c_seg->c_lock); 1407 1408 assert(c_seg->c_ondisk); 1409 1410 if (c_seg->c_busy) { 1411 1412 c_seg->c_wanted = 1; 1413 1414 assert_wait((event_t) (c_seg), THREAD_UNINT); 1415 lck_mtx_unlock_always(&c_seg->c_lock); 1416 1417 lck_mtx_unlock(&vm_swap_data_lock); 1418 1419 thread_block(THREAD_CONTINUE_NULL); 1420 1421 lck_mtx_lock(&vm_swap_data_lock); 1422 1423 goto ReTry_for_cseg; 1424 } 1425 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); 1426 1427 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE; 1428 1429 swf->swp_csegs[segidx] = NULL; 1430 swf->swp_nseginuse--; 1431 1432 vm_swapfile_total_segs_used--; 1433 1434 lck_mtx_unlock(&vm_swap_data_lock); 1435 1436 if (c_seg->c_must_free) { 1437 C_SEG_BUSY(c_seg); 1438 c_seg_free(c_seg); 1439 } else { 1440 1441 C_SEG_BUSY(c_seg); 1442 c_seg->c_busy_swapping = 1; 1443#if !CHECKSUM_THE_SWAP 1444 c_seg_trim_tail(c_seg); 1445#endif 1446 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); 1447 1448 assert(c_size <= C_SEG_BUFSIZE); 1449 1450 lck_mtx_unlock_always(&c_seg->c_lock); 1451 1452 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) { 1453 1454 /* 1455 * reading the data back in failed, so convert c_seg 1456 * to a swapped in c_segment that contains no data 1457 */ 1458 c_seg->c_store.c_buffer = (int32_t *)NULL; 1459 c_seg_swapin_requeue(c_seg); 1460 1461 goto swap_io_failed; 1462 } 1463 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT); 1464 1465 if (vm_swap_put(addr, &f_offset, c_size, c_seg)) { 1466 vm_offset_t c_buffer; 1467 1468 /* 1469 * the put failed, so convert c_seg to a fully swapped in c_segment 1470 * with valid data 1471 */ 1472 if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) 1473 panic("vm_swap_reclaim: kernel_memory_allocate failed\n"); 1474 OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used); 1475 1476 kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR); 1477 1478 memcpy((char *)c_buffer, (char *)addr, c_size); 1479 1480 c_seg->c_store.c_buffer = (int32_t *)c_buffer; 1481#if ENCRYPTED_SWAP 1482 vm_swap_decrypt(c_seg); 1483#endif /* ENCRYPTED_SWAP */ 1484 c_seg_swapin_requeue(c_seg); 1485 1486 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); 1487 1488 goto swap_io_failed; 1489 } 1490 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT); 1491 1492 lck_mtx_lock_spin_always(&c_seg->c_lock); 1493 1494 assert(c_seg->c_ondisk); 1495 /* 1496 * The c_seg will now know about the new location on disk. 1497 */ 1498 c_seg->c_store.c_swap_handle = f_offset; 1499swap_io_failed: 1500 c_seg->c_busy_swapping = 0; 1501 1502 if (c_seg->c_must_free) 1503 c_seg_free(c_seg); 1504 else { 1505 C_SEG_WAKEUP_DONE(c_seg); 1506 1507 lck_mtx_unlock_always(&c_seg->c_lock); 1508 } 1509 } 1510 lck_mtx_lock(&vm_swap_data_lock); 1511 } 1512 1513 if (swf->swp_nseginuse) { 1514 1515 swf->swp_flags &= ~SWAP_RECLAIM; 1516 swf->swp_flags |= SWAP_READY; 1517 1518 goto done; 1519 } 1520 /* 1521 * We don't remove this inactive swf from the queue. 1522 * That way, we can re-use it when needed again and 1523 * preserve the namespace. The delayed_trim processing 1524 * is also dependent on us not removing swfs from the queue. 1525 */ 1526 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue); 1527 1528 vm_num_swap_files--; 1529 1530 vm_swapfile_total_segs_alloced -= swf->swp_nsegs; 1531 1532 lck_mtx_unlock(&vm_swap_data_lock); 1533 1534 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp); 1535 1536 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t)); 1537 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1)); 1538 1539 lck_mtx_lock(&vm_swap_data_lock); 1540 1541 swf->swp_vp = NULL; 1542 swf->swp_size = 0; 1543 swf->swp_free_hint = 0; 1544 swf->swp_nsegs = 0; 1545 swf->swp_flags = SWAP_REUSE; 1546 1547done: 1548 thread_wakeup((event_t) &swf->swp_flags); 1549 lck_mtx_unlock(&vm_swap_data_lock); 1550 1551 kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE); 1552} 1553 1554 1555uint64_t 1556vm_swap_get_total_space(void) 1557{ 1558 uint64_t total_space = 0; 1559 1560 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE; 1561 1562 return total_space; 1563} 1564 1565uint64_t 1566vm_swap_get_used_space(void) 1567{ 1568 uint64_t used_space = 0; 1569 1570 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE; 1571 1572 return used_space; 1573} 1574 1575uint64_t 1576vm_swap_get_free_space(void) 1577{ 1578 return (vm_swap_get_total_space() - vm_swap_get_used_space()); 1579} 1580 1581 1582int 1583vm_swap_low_on_space(void) 1584{ 1585 1586 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) 1587 return (0); 1588 1589 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) { 1590 1591 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) 1592 return (0); 1593 1594 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) 1595 return (1); 1596 } 1597 return (0); 1598} 1599