1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include "vm_compressor_backing_store.h"
30#include <vm/vm_protos.h>
31
32#include <IOKit/IOHibernatePrivate.h>
33
34
35boolean_t	compressor_store_stop_compaction = FALSE;
36boolean_t	vm_swap_up = FALSE;
37boolean_t	vm_swapfile_create_needed = FALSE;
38boolean_t	vm_swapfile_gc_needed = FALSE;
39
40int		swapper_throttle = -1;
41boolean_t	swapper_throttle_inited = FALSE;
42uint64_t	vm_swapout_thread_id;
43
44uint64_t	vm_swap_put_failures = 0;
45uint64_t	vm_swap_get_failures = 0;
46int		vm_num_swap_files = 0;
47int		vm_swapout_thread_processed_segments = 0;
48int		vm_swapout_thread_awakened = 0;
49int		vm_swapfile_create_thread_awakened = 0;
50int		vm_swapfile_create_thread_running = 0;
51int		vm_swapfile_gc_thread_awakened = 0;
52int		vm_swapfile_gc_thread_running = 0;
53
54unsigned int	vm_swapfile_total_segs_alloced = 0;
55unsigned int	vm_swapfile_total_segs_used = 0;
56
57
58#define SWAP_READY	0x1	/* Swap file is ready to be used */
59#define SWAP_RECLAIM	0x2	/* Swap file is marked to be reclaimed */
60#define SWAP_WANTED	0x4	/* Swap file has waiters */
61#define SWAP_REUSE	0x8	/* Swap file is on the Q and has a name. Reuse after init-ing.*/
62
63struct swapfile{
64	queue_head_t		swp_queue;	/* list of swap files */
65	char			*swp_path;	/* saved pathname of swap file */
66	struct vnode		*swp_vp;	/* backing vnode */
67	uint64_t		swp_size;	/* size of this swap file */
68	uint8_t			*swp_bitmap;	/* bitmap showing the alloced/freed slots in the swap file */
69	unsigned int		swp_pathlen;	/* length of pathname */
70	unsigned int		swp_nsegs;	/* #segments we can use */
71	unsigned int		swp_nseginuse;	/* #segments in use */
72	unsigned int		swp_index;	/* index of this swap file */
73	unsigned int		swp_flags;	/* state of swap file */
74	unsigned int		swp_free_hint;	/* offset of 1st free chunk */
75	unsigned int		swp_io_count;	/* count of outstanding I/Os */
76	c_segment_t		*swp_csegs;	/* back pointers to the c_segments. Used during swap reclaim. */
77
78	struct trim_list	*swp_delayed_trim_list_head;
79	unsigned int		swp_delayed_trim_count;
80};
81
82queue_head_t	swf_global_queue;
83boolean_t	swp_trim_supported = FALSE;
84
85#define		VM_SWAPFILE_DELAYED_TRIM_MAX	128
86
87extern clock_sec_t	dont_trim_until_ts;
88clock_sec_t		vm_swapfile_last_failed_to_create_ts = 0;
89clock_sec_t		vm_swapfile_last_successful_create_ts = 0;
90int			vm_swapfile_can_be_created = FALSE;
91boolean_t		delayed_trim_handling_in_progress = FALSE;
92
93static void vm_swapout_thread_throttle_adjust(void);
94static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
95static void vm_swapout_thread(void);
96static void vm_swapfile_create_thread(void);
97static void vm_swapfile_gc_thread(void);
98static void vm_swap_defragment();
99static void vm_swap_handle_delayed_trims(boolean_t);
100static void vm_swap_do_delayed_trim();
101static void vm_swap_wait_on_trim_handling_in_progress(void);
102
103
104
105#define	VM_SWAP_SHOULD_DEFRAGMENT()	(c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
106#define VM_SWAP_SHOULD_RECLAIM()	(((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
107#define VM_SWAP_SHOULD_ABORT_RECLAIM()	(((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
108#define VM_SWAP_SHOULD_CREATE(cur_ts)	(((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
109					 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
110#define VM_SWAP_SHOULD_TRIM(swf)	((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
111
112
113#define	VM_SWAPFILE_DELAYED_CREATE	15
114
115#define VM_SWAP_BUSY()	((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
116
117
118#if CHECKSUM_THE_SWAP
119extern unsigned int hash_string(char *cp, int len);
120#endif
121
122#if ENCRYPTED_SWAP
123extern boolean_t		swap_crypt_ctx_initialized;
124extern void 			swap_crypt_ctx_initialize(void);
125extern const unsigned char	swap_crypt_null_iv[AES_BLOCK_SIZE];
126extern aes_ctx			swap_crypt_ctx;
127extern unsigned long 		vm_page_encrypt_counter;
128extern unsigned long 		vm_page_decrypt_counter;
129#endif /* ENCRYPTED_SWAP */
130
131extern void			vm_pageout_io_throttle(void);
132extern void			vm_pageout_reinit_tuneables(void);
133extern void			vm_swap_file_set_tuneables(void);
134
135struct swapfile *vm_swapfile_for_handle(uint64_t);
136
137/*
138 * Called with the vm_swap_data_lock held.
139 */
140
141struct swapfile *
142vm_swapfile_for_handle(uint64_t f_offset)
143{
144
145	uint64_t		file_offset = 0;
146	unsigned int		swapfile_index = 0;
147	struct swapfile*	swf = NULL;
148
149	file_offset = (f_offset & SWAP_SLOT_MASK);
150	swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
151
152	swf = (struct swapfile*) queue_first(&swf_global_queue);
153
154	while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
155
156		if (swapfile_index == swf->swp_index) {
157			break;
158		}
159
160		swf = (struct swapfile*) queue_next(&swf->swp_queue);
161	}
162
163	if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
164		swf = NULL;
165	}
166
167	return swf;
168}
169
170void
171vm_compressor_swap_init()
172{
173	thread_t	thread = NULL;
174
175	lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
176	lck_grp_init(&vm_swap_data_lock_grp,
177		     "vm_swap_data",
178		     &vm_swap_data_lock_grp_attr);
179	lck_attr_setdefault(&vm_swap_data_lock_attr);
180	lck_mtx_init_ext(&vm_swap_data_lock,
181			 &vm_swap_data_lock_ext,
182			 &vm_swap_data_lock_grp,
183			 &vm_swap_data_lock_attr);
184
185	queue_init(&swf_global_queue);
186
187
188	if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
189					 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
190		panic("vm_swapout_thread: create failed");
191	}
192	thread->options |= TH_OPT_VMPRIV;
193	vm_swapout_thread_id = thread->thread_id;
194
195	thread_deallocate(thread);
196
197	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
198				 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
199		panic("vm_swapfile_create_thread: create failed");
200	}
201	thread->options |= TH_OPT_VMPRIV;
202
203	thread_deallocate(thread);
204
205
206	if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
207				 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
208		panic("vm_swapfile_gc_thread: create failed");
209	}
210	thread_deallocate(thread);
211
212	proc_set_task_policy_thread(kernel_task, thread->thread_id,
213				    TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
214	proc_set_task_policy_thread(kernel_task, thread->thread_id,
215				    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
216
217#if ENCRYPTED_SWAP
218	if (swap_crypt_ctx_initialized == FALSE) {
219		swap_crypt_ctx_initialize();
220	}
221#endif /* ENCRYPTED_SWAP */
222
223	memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
224
225	vm_swap_up = TRUE;
226
227	printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF");
228}
229
230
231void
232vm_swap_file_set_tuneables()
233{
234	struct	vnode *vp;
235	char	*pathname;
236	int	namelen;
237
238	if (strlen(swapfilename) == 0) {
239		/*
240		 * If no swapfile name has been set, we'll
241		 * use the default name.
242		 *
243		 * Also, this function is only called from the vm_pageout_scan thread
244		 * via vm_consider_waking_compactor_swapper,
245		 * so we don't need to worry about a race in checking/setting the name here.
246		 */
247		strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
248	}
249	namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
250	pathname = (char*)kalloc(namelen);
251	memset(pathname, 0, namelen);
252	snprintf(pathname, namelen, "%s%d", swapfilename, 0);
253
254	vm_swapfile_open(pathname, &vp);
255
256	if (vp == NULL)
257		goto done;
258
259        if (vnode_pager_isSSD(vp) == FALSE)
260		vm_pageout_reinit_tuneables();
261	vnode_setswapmount(vp);
262	vm_swapfile_close((uint64_t)pathname, vp);
263done:
264	kfree(pathname, namelen);
265}
266
267
268#if ENCRYPTED_SWAP
269void
270vm_swap_encrypt(c_segment_t c_seg)
271{
272	vm_offset_t	kernel_vaddr = 0;
273	uint64_t	size = 0;
274
275	union {
276		unsigned char	aes_iv[AES_BLOCK_SIZE];
277		void		*c_seg;
278	} encrypt_iv;
279
280	assert(swap_crypt_ctx_initialized);
281
282	bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
283
284	encrypt_iv.c_seg = (void*)c_seg;
285
286	/* encrypt the "initial vector" */
287	aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
288			swap_crypt_null_iv,
289			1,
290			&encrypt_iv.aes_iv[0],
291			&swap_crypt_ctx.encrypt);
292
293	kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
294	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
295
296	/*
297	 * Encrypt the c_segment.
298	 */
299	aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
300			&encrypt_iv.aes_iv[0],
301			(unsigned int)(size / AES_BLOCK_SIZE),
302			(unsigned char *) kernel_vaddr,
303			&swap_crypt_ctx.encrypt);
304
305	vm_page_encrypt_counter += (size/PAGE_SIZE_64);
306}
307
308void
309vm_swap_decrypt(c_segment_t c_seg)
310{
311
312	vm_offset_t	kernel_vaddr = 0;
313	uint64_t	size = 0;
314
315	union {
316		unsigned char	aes_iv[AES_BLOCK_SIZE];
317		void		*c_seg;
318	} decrypt_iv;
319
320
321	assert(swap_crypt_ctx_initialized);
322
323	/*
324	 * Prepare an "initial vector" for the decryption.
325	 * It has to be the same as the "initial vector" we
326	 * used to encrypt that page.
327	 */
328	bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
329
330	decrypt_iv.c_seg = (void*)c_seg;
331
332	/* encrypt the "initial vector" */
333	aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
334			swap_crypt_null_iv,
335			1,
336			&decrypt_iv.aes_iv[0],
337			&swap_crypt_ctx.encrypt);
338
339	kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
340	size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
341
342	/*
343	 * Decrypt the c_segment.
344	 */
345	aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
346			&decrypt_iv.aes_iv[0],
347			(unsigned int) (size / AES_BLOCK_SIZE),
348			(unsigned char *) kernel_vaddr,
349			&swap_crypt_ctx.decrypt);
350
351	vm_page_decrypt_counter += (size/PAGE_SIZE_64);
352}
353#endif /* ENCRYPTED_SWAP */
354
355
356void
357vm_swap_consider_defragmenting()
358{
359	if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
360	    (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
361
362		if (!vm_swapfile_gc_thread_running) {
363			lck_mtx_lock(&vm_swap_data_lock);
364
365			if (!vm_swapfile_gc_thread_running)
366				thread_wakeup((event_t) &vm_swapfile_gc_needed);
367
368			lck_mtx_unlock(&vm_swap_data_lock);
369		}
370	}
371}
372
373
374int vm_swap_defragment_yielded = 0;
375int vm_swap_defragment_swapin = 0;
376int vm_swap_defragment_free = 0;
377int vm_swap_defragment_busy = 0;
378
379
380static void
381vm_swap_defragment()
382{
383	c_segment_t	c_seg;
384
385	/*
386	 * have to grab the master lock w/o holding
387	 * any locks in spin mode
388	 */
389	PAGE_REPLACEMENT_DISALLOWED(TRUE);
390
391	lck_mtx_lock_spin_always(c_list_lock);
392
393	while (!queue_empty(&c_swappedout_sparse_list_head)) {
394
395		if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
396			vm_swap_defragment_yielded++;
397			break;
398		}
399		c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
400
401		lck_mtx_lock_spin_always(&c_seg->c_lock);
402
403		assert(c_seg->c_on_swappedout_sparse_q);
404
405		if (c_seg->c_busy) {
406			lck_mtx_unlock_always(c_list_lock);
407
408			PAGE_REPLACEMENT_DISALLOWED(FALSE);
409			/*
410			 * c_seg_wait_on_busy consumes c_seg->c_lock
411			 */
412			c_seg_wait_on_busy(c_seg);
413
414			PAGE_REPLACEMENT_DISALLOWED(TRUE);
415
416			lck_mtx_lock_spin_always(c_list_lock);
417
418			vm_swap_defragment_busy++;
419			continue;
420		}
421		if (c_seg->c_bytes_used == 0) {
422			/*
423			 * c_seg_free_locked consumes the c_list_lock
424			 * and c_seg->c_lock
425			 */
426			c_seg_free_locked(c_seg);
427
428			vm_swap_defragment_free++;
429		} else {
430			lck_mtx_unlock_always(c_list_lock);
431
432			c_seg_swapin(c_seg, TRUE);
433			lck_mtx_unlock_always(&c_seg->c_lock);
434
435			vm_swap_defragment_swapin++;
436		}
437		PAGE_REPLACEMENT_DISALLOWED(FALSE);
438
439		vm_pageout_io_throttle();
440
441		/*
442		 * because write waiters have privilege over readers,
443		 * dropping and immediately retaking the master lock will
444		 * still allow any thread waiting to acquire the
445		 * master lock exclusively an opportunity to take it
446		 */
447		PAGE_REPLACEMENT_DISALLOWED(TRUE);
448
449		lck_mtx_lock_spin_always(c_list_lock);
450	}
451	lck_mtx_unlock_always(c_list_lock);
452
453	PAGE_REPLACEMENT_DISALLOWED(FALSE);
454}
455
456
457
458static void
459vm_swapfile_create_thread(void)
460{
461	clock_sec_t	sec;
462	clock_nsec_t	nsec;
463
464	vm_swapfile_create_thread_awakened++;
465	vm_swapfile_create_thread_running = 1;
466
467	while (TRUE) {
468		/*
469		 * walk through the list of swap files
470		 * and do the delayed frees/trims for
471		 * any swap file whose count of delayed
472		 * frees is above the batch limit
473		 */
474		vm_swap_handle_delayed_trims(FALSE);
475
476		lck_mtx_lock(&vm_swap_data_lock);
477
478		clock_get_system_nanotime(&sec, &nsec);
479
480		if (VM_SWAP_SHOULD_CREATE(sec) == 0)
481			break;
482
483		lck_mtx_unlock(&vm_swap_data_lock);
484
485		if (vm_swap_create_file() == FALSE) {
486			vm_swapfile_last_failed_to_create_ts = sec;
487			HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
488
489		} else
490			vm_swapfile_last_successful_create_ts = sec;
491	}
492	vm_swapfile_create_thread_running = 0;
493
494	assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
495
496	lck_mtx_unlock(&vm_swap_data_lock);
497
498	thread_block((thread_continue_t)vm_swapfile_create_thread);
499
500	/* NOTREACHED */
501}
502
503
504static void
505vm_swapfile_gc_thread(void)
506{
507	boolean_t	need_defragment;
508	boolean_t	need_reclaim;
509
510	vm_swapfile_gc_thread_awakened++;
511	vm_swapfile_gc_thread_running = 1;
512
513	while (TRUE) {
514
515		lck_mtx_lock(&vm_swap_data_lock);
516
517		if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
518			break;
519
520		need_defragment = FALSE;
521		need_reclaim = FALSE;
522
523		if (VM_SWAP_SHOULD_DEFRAGMENT())
524			need_defragment = TRUE;
525
526		if (VM_SWAP_SHOULD_RECLAIM()) {
527			need_defragment = TRUE;
528			need_reclaim = TRUE;
529		}
530		if (need_defragment == FALSE && need_reclaim == FALSE)
531			break;
532
533		lck_mtx_unlock(&vm_swap_data_lock);
534
535		if (need_defragment == TRUE)
536			vm_swap_defragment();
537		if (need_reclaim == TRUE)
538			vm_swap_reclaim();
539	}
540	vm_swapfile_gc_thread_running = 0;
541
542	assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
543
544	lck_mtx_unlock(&vm_swap_data_lock);
545
546	thread_block((thread_continue_t)vm_swapfile_gc_thread);
547
548	/* NOTREACHED */
549}
550
551
552
553int	  swapper_entered_T0 = 0;
554int	  swapper_entered_T1 = 0;
555int	  swapper_entered_T2 = 0;
556
557static void
558vm_swapout_thread_throttle_adjust(void)
559{
560	int swapper_throttle_new;
561
562	if (swapper_throttle_inited == FALSE) {
563		/*
564		 * force this thread to be set to the correct
565		 * throttling tier
566		 */
567		swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
568		swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
569		swapper_throttle_inited = TRUE;
570		swapper_entered_T2++;
571		goto done;
572	}
573	swapper_throttle_new = swapper_throttle;
574
575
576	switch(swapper_throttle) {
577
578	case THROTTLE_LEVEL_COMPRESSOR_TIER2:
579
580		if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) {
581			swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
582			swapper_entered_T1++;
583			break;
584		}
585		break;
586
587	case THROTTLE_LEVEL_COMPRESSOR_TIER1:
588
589		if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
590			swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0;
591			swapper_entered_T0++;
592			break;
593		}
594		if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) {
595			swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
596			swapper_entered_T2++;
597			break;
598		}
599		break;
600
601	case THROTTLE_LEVEL_COMPRESSOR_TIER0:
602
603		if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
604			swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
605			swapper_entered_T2++;
606			break;
607		}
608		if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
609			swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
610			swapper_entered_T1++;
611			break;
612		}
613		break;
614	}
615done:
616	if (swapper_throttle != swapper_throttle_new) {
617		proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
618					    TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
619		proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
620					    TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
621
622		swapper_throttle = swapper_throttle_new;
623	}
624}
625
626
627static void
628vm_swapout_thread(void)
629{
630	uint64_t	f_offset = 0;
631	uint32_t	size = 0;
632	c_segment_t 	c_seg = NULL;
633	kern_return_t	kr = KERN_SUCCESS;
634	vm_offset_t	addr = 0;
635
636	vm_swapout_thread_awakened++;
637
638	lck_mtx_lock_spin_always(c_list_lock);
639
640	while (!queue_empty(&c_swapout_list_head)) {
641
642		c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
643
644		lck_mtx_lock_spin_always(&c_seg->c_lock);
645
646		assert(c_seg->c_on_swapout_q);
647
648		if (c_seg->c_busy) {
649			lck_mtx_unlock_always(c_list_lock);
650
651			c_seg_wait_on_busy(c_seg);
652
653			lck_mtx_lock_spin_always(c_list_lock);
654
655			continue;
656		}
657		queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
658		c_seg->c_on_swapout_q = 0;
659		c_swapout_count--;
660
661		vm_swapout_thread_processed_segments++;
662
663		thread_wakeup((event_t)&compaction_swapper_running);
664
665		size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
666
667		if (size == 0) {
668			c_seg_free_locked(c_seg);
669			goto c_seg_was_freed;
670		}
671		C_SEG_BUSY(c_seg);
672		c_seg->c_busy_swapping = 1;
673
674		lck_mtx_unlock_always(c_list_lock);
675
676		addr = (vm_offset_t) c_seg->c_store.c_buffer;
677
678		lck_mtx_unlock_always(&c_seg->c_lock);
679
680#if CHECKSUM_THE_SWAP
681		c_seg->cseg_hash = hash_string((char*)addr, (int)size);
682		c_seg->cseg_swap_size = size;
683#endif /* CHECKSUM_THE_SWAP */
684
685#if ENCRYPTED_SWAP
686		vm_swap_encrypt(c_seg);
687#endif /* ENCRYPTED_SWAP */
688
689		vm_swapout_thread_throttle_adjust();
690
691		kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg);
692
693		PAGE_REPLACEMENT_DISALLOWED(TRUE);
694
695		lck_mtx_lock_spin_always(c_list_lock);
696		lck_mtx_lock_spin_always(&c_seg->c_lock);
697
698	       	if (kr == KERN_SUCCESS) {
699
700			if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) {
701
702				c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
703				c_seg->c_on_swappedout_sparse_q = 1;
704				c_swappedout_sparse_count++;
705
706			} else {
707				if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
708								   c_seg->c_generation_id <= last_c_segment_to_warm_generation_id))
709					queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
710				else
711					queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
712				c_seg->c_on_swappedout_q = 1;
713				c_swappedout_count++;
714			}
715			c_seg->c_store.c_swap_handle = f_offset;
716			c_seg->c_ondisk = 1;
717
718			VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
719
720			if (c_seg->c_bytes_used)
721				OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
722		} else {
723#if ENCRYPTED_SWAP
724			vm_swap_decrypt(c_seg);
725#endif /* ENCRYPTED_SWAP */
726			c_seg_insert_into_q(&c_age_list_head, c_seg);
727			c_seg->c_on_age_q = 1;
728			c_age_count++;
729
730			vm_swap_put_failures++;
731		}
732		lck_mtx_unlock_always(c_list_lock);
733
734		if (c_seg->c_must_free)
735			c_seg_free(c_seg);
736		else {
737			c_seg->c_busy_swapping = 0;
738			C_SEG_WAKEUP_DONE(c_seg);
739			lck_mtx_unlock_always(&c_seg->c_lock);
740		}
741
742		if (kr == KERN_SUCCESS)
743			kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
744
745		PAGE_REPLACEMENT_DISALLOWED(FALSE);
746
747		if (kr == KERN_SUCCESS) {
748			kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE);
749			OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used);
750		}
751		vm_pageout_io_throttle();
752c_seg_was_freed:
753		if (c_swapout_count == 0)
754			vm_swap_consider_defragmenting();
755
756		lck_mtx_lock_spin_always(c_list_lock);
757	}
758
759	assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
760
761	lck_mtx_unlock_always(c_list_lock);
762
763	thread_block((thread_continue_t)vm_swapout_thread);
764
765	/* NOTREACHED */
766}
767
768boolean_t
769vm_swap_create_file()
770{
771	uint64_t	size = 0;
772	int		namelen = 0;
773	boolean_t	swap_file_created = FALSE;
774	boolean_t	swap_file_reuse = FALSE;
775	struct swapfile *swf = NULL;
776
777	/*
778  	 * Any swapfile structure ready for re-use?
779	 */
780
781	lck_mtx_lock(&vm_swap_data_lock);
782
783	swf = (struct swapfile*) queue_first(&swf_global_queue);
784
785	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
786		if (swf->swp_flags == SWAP_REUSE) {
787			swap_file_reuse = TRUE;
788			break;
789		}
790		swf = (struct swapfile*) queue_next(&swf->swp_queue);
791	}
792
793	lck_mtx_unlock(&vm_swap_data_lock);
794
795	if (swap_file_reuse == FALSE) {
796
797		if (strlen(swapfilename) == 0) {
798			/*
799			 * If no swapfile name has been set, we'll
800			 * use the default name.
801			 *
802			 * Also, this function is only called from the swapfile management thread.
803			 * So we don't need to worry about a race in checking/setting the name here.
804			 */
805
806			strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
807		}
808
809		namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
810
811		swf = (struct swapfile*) kalloc(sizeof *swf);
812		memset(swf, 0, sizeof(*swf));
813
814		swf->swp_index = vm_num_swap_files + 1;
815		swf->swp_pathlen = namelen;
816		swf->swp_path = (char*)kalloc(swf->swp_pathlen);
817
818		memset(swf->swp_path, 0, namelen);
819
820		snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
821	}
822
823	vm_swapfile_open(swf->swp_path, &swf->swp_vp);
824
825	if (swf->swp_vp == NULL) {
826		if (swap_file_reuse == FALSE) {
827			kfree(swf->swp_path, swf->swp_pathlen);
828			kfree(swf, sizeof *swf);
829		}
830		return FALSE;
831	}
832	vm_swapfile_can_be_created = TRUE;
833
834	size = MAX_SWAP_FILE_SIZE;
835
836	while (size >= MIN_SWAP_FILE_SIZE) {
837
838		if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) {
839
840			int num_bytes_for_bitmap = 0;
841
842			swap_file_created = TRUE;
843
844			swf->swp_size = size;
845			swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
846			swf->swp_nseginuse = 0;
847			swf->swp_free_hint = 0;
848
849			num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
850			/*
851			 * Allocate a bitmap that describes the
852			 * number of segments held by this swapfile.
853			 */
854			swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
855			memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
856
857			swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
858			memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
859
860			/*
861			 * passing a NULL trim_list into vnode_trim_list
862			 * will return ENOTSUP if trim isn't supported
863			 * and 0 if it is
864			 */
865			if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
866				swp_trim_supported = TRUE;
867
868			lck_mtx_lock(&vm_swap_data_lock);
869
870			swf->swp_flags = SWAP_READY;
871
872			if (swap_file_reuse == FALSE) {
873				queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
874			}
875
876			vm_num_swap_files++;
877
878			vm_swapfile_total_segs_alloced += swf->swp_nsegs;
879
880			lck_mtx_unlock(&vm_swap_data_lock);
881
882			thread_wakeup((event_t) &vm_num_swap_files);
883
884			break;
885		} else {
886
887			size = size / 2;
888		}
889	}
890	if (swap_file_created == FALSE) {
891
892		vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
893
894		swf->swp_vp = NULL;
895
896		if (swap_file_reuse == FALSE) {
897			kfree(swf->swp_path, swf->swp_pathlen);
898			kfree(swf, sizeof *swf);
899		}
900	}
901	return swap_file_created;
902}
903
904
905kern_return_t
906vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size)
907{
908	struct swapfile *swf = NULL;
909	uint64_t	file_offset = 0;
910	int		retval = 0;
911
912	if (addr == 0) {
913		return KERN_FAILURE;
914	}
915
916	lck_mtx_lock(&vm_swap_data_lock);
917
918	swf = vm_swapfile_for_handle(f_offset);
919
920	if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
921		retval = 1;
922		goto done;
923	}
924	swf->swp_io_count++;
925
926	lck_mtx_unlock(&vm_swap_data_lock);
927
928	file_offset = (f_offset & SWAP_SLOT_MASK);
929	retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ);
930
931	if (retval == 0)
932		VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
933	else
934		vm_swap_get_failures++;
935
936	/*
937	 * Free this slot in the swap structure.
938	 */
939	vm_swap_free(f_offset);
940
941	lck_mtx_lock(&vm_swap_data_lock);
942	swf->swp_io_count--;
943
944	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
945
946		swf->swp_flags &= ~SWAP_WANTED;
947		thread_wakeup((event_t) &swf->swp_flags);
948	}
949done:
950	lck_mtx_unlock(&vm_swap_data_lock);
951
952	if (retval == 0)
953		return KERN_SUCCESS;
954	else
955		return KERN_FAILURE;
956}
957
958kern_return_t
959vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg)
960{
961	unsigned int	segidx = 0;
962	struct swapfile *swf = NULL;
963	uint64_t	file_offset = 0;
964	uint64_t	swapfile_index = 0;
965	unsigned int 	byte_for_segidx = 0;
966	unsigned int 	offset_within_byte = 0;
967	boolean_t	swf_eligible = FALSE;
968	boolean_t	waiting = FALSE;
969	boolean_t	retried = FALSE;
970	int		error = 0;
971	clock_sec_t	sec;
972	clock_nsec_t	nsec;
973
974	if (addr == 0 || f_offset == NULL) {
975		return KERN_FAILURE;
976	}
977retry:
978	lck_mtx_lock(&vm_swap_data_lock);
979
980	swf = (struct swapfile*) queue_first(&swf_global_queue);
981
982	while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
983
984		segidx = swf->swp_free_hint;
985
986		swf_eligible = 	(swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
987
988		if (swf_eligible) {
989
990			while(segidx < swf->swp_nsegs) {
991
992				byte_for_segidx = segidx >> 3;
993				offset_within_byte = segidx % 8;
994
995				if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
996					segidx++;
997					continue;
998				}
999
1000				(swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1001
1002				file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1003				swf->swp_nseginuse++;
1004				swf->swp_io_count++;
1005				swapfile_index = swf->swp_index;
1006
1007				vm_swapfile_total_segs_used++;
1008
1009				clock_get_system_nanotime(&sec, &nsec);
1010
1011				if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1012					thread_wakeup((event_t) &vm_swapfile_create_needed);
1013
1014				lck_mtx_unlock(&vm_swap_data_lock);
1015
1016				goto done;
1017			}
1018		}
1019		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1020	}
1021	assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1022
1023	/*
1024	 * we've run out of swap segments, but may not
1025	 * be in a position to immediately create a new swap
1026	 * file if we've recently failed to create due to a lack
1027	 * of free space in the root filesystem... we'll try
1028	 * to kick that create off, but in any event we're going
1029	 * to take a breather (up to 1 second) so that we're not caught in a tight
1030	 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1031	 * segments into swap files only to have them immediately put back
1032	 * on the c_age queue due to vm_swap_put failing.
1033	 *
1034	 * if we're doing these puts due to a hibernation flush,
1035	 * no need to block... setting hibernate_no_swapspace to TRUE,
1036	 * will cause "vm_compressor_compact_and_swap" to immediately abort
1037	 */
1038	clock_get_system_nanotime(&sec, &nsec);
1039
1040	if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1041		thread_wakeup((event_t) &vm_swapfile_create_needed);
1042
1043	if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1044		waiting = TRUE;
1045		assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1046	} else
1047		hibernate_no_swapspace = TRUE;
1048
1049	lck_mtx_unlock(&vm_swap_data_lock);
1050
1051	if (waiting == TRUE) {
1052		thread_block(THREAD_CONTINUE_NULL);
1053
1054		if (retried == FALSE && hibernate_flushing == TRUE) {
1055			retried = TRUE;
1056			goto retry;
1057		}
1058	}
1059
1060	return KERN_FAILURE;
1061
1062done:
1063	error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE);
1064
1065	lck_mtx_lock(&vm_swap_data_lock);
1066
1067	swf->swp_csegs[segidx] = c_seg;
1068
1069	swf->swp_io_count--;
1070
1071	*f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1072
1073	if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1074
1075		swf->swp_flags &= ~SWAP_WANTED;
1076		thread_wakeup((event_t) &swf->swp_flags);
1077	}
1078
1079	lck_mtx_unlock(&vm_swap_data_lock);
1080
1081	if (error) {
1082		vm_swap_free(*f_offset);
1083
1084		return KERN_FAILURE;
1085	}
1086	return KERN_SUCCESS;
1087}
1088
1089
1090
1091static void
1092vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1093{
1094	uint64_t	file_offset = 0;
1095	unsigned int	segidx = 0;
1096
1097
1098	if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1099
1100		unsigned int byte_for_segidx = 0;
1101		unsigned int offset_within_byte = 0;
1102
1103		file_offset = (f_offset & SWAP_SLOT_MASK);
1104		segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1105
1106		byte_for_segidx = segidx >> 3;
1107		offset_within_byte = segidx % 8;
1108
1109		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1110
1111			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1112
1113			swf->swp_csegs[segidx] = NULL;
1114
1115			swf->swp_nseginuse--;
1116			vm_swapfile_total_segs_used--;
1117
1118			if (segidx < swf->swp_free_hint) {
1119				swf->swp_free_hint = segidx;
1120			}
1121		}
1122		if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1123			thread_wakeup((event_t) &vm_swapfile_gc_needed);
1124	}
1125}
1126
1127
1128uint32_t vm_swap_free_now_count = 0;
1129uint32_t vm_swap_free_delayed_count = 0;
1130
1131
1132void
1133vm_swap_free(uint64_t f_offset)
1134{
1135	struct swapfile *swf = NULL;
1136	struct trim_list *tl = NULL;
1137        clock_sec_t     sec;
1138        clock_nsec_t    nsec;
1139
1140	if (swp_trim_supported == TRUE)
1141		tl = kalloc(sizeof(struct trim_list));
1142
1143	lck_mtx_lock(&vm_swap_data_lock);
1144
1145	swf = vm_swapfile_for_handle(f_offset);
1146
1147	if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1148
1149		if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1150			/*
1151			 * don't delay the free if the underlying disk doesn't support
1152			 * trim, or we're in the midst of reclaiming this swap file since
1153			 * we don't want to move segments that are technically free
1154			 * but not yet handled by the delayed free mechanism
1155			 */
1156			vm_swap_free_now(swf, f_offset);
1157
1158			vm_swap_free_now_count++;
1159			goto done;
1160		}
1161		tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1162		tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1163
1164		tl->tl_next = swf->swp_delayed_trim_list_head;
1165		swf->swp_delayed_trim_list_head = tl;
1166		swf->swp_delayed_trim_count++;
1167		tl = NULL;
1168
1169		if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1170			clock_get_system_nanotime(&sec, &nsec);
1171
1172			if (sec > dont_trim_until_ts)
1173				thread_wakeup((event_t) &vm_swapfile_create_needed);
1174		}
1175		vm_swap_free_delayed_count++;
1176	}
1177done:
1178	lck_mtx_unlock(&vm_swap_data_lock);
1179
1180	if (tl != NULL)
1181		kfree(tl, sizeof(struct trim_list));
1182}
1183
1184
1185static void
1186vm_swap_wait_on_trim_handling_in_progress()
1187{
1188	while (delayed_trim_handling_in_progress == TRUE) {
1189
1190		assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1191		lck_mtx_unlock(&vm_swap_data_lock);
1192
1193		thread_block(THREAD_CONTINUE_NULL);
1194
1195		lck_mtx_lock(&vm_swap_data_lock);
1196	}
1197}
1198
1199
1200static void
1201vm_swap_handle_delayed_trims(boolean_t force_now)
1202{
1203	struct swapfile *swf = NULL;
1204
1205	/*
1206	 * serialize the race between us and vm_swap_reclaim...
1207	 * if vm_swap_reclaim wins it will turn off SWAP_READY
1208	 * on the victim it has chosen... we can just skip over
1209	 * that file since vm_swap_reclaim will first process
1210	 * all of the delayed trims associated with it
1211	 */
1212	lck_mtx_lock(&vm_swap_data_lock);
1213
1214	delayed_trim_handling_in_progress = TRUE;
1215
1216	lck_mtx_unlock(&vm_swap_data_lock);
1217
1218	/*
1219	 * no need to hold the lock to walk the swf list since
1220	 * vm_swap_create (the only place where we add to this list)
1221	 * is run on the same thread as this function
1222	 * and vm_swap_reclaim doesn't remove items from this list
1223	 * instead marking them with SWAP_REUSE for future re-use
1224	 */
1225	swf = (struct swapfile*) queue_first(&swf_global_queue);
1226
1227	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1228
1229		if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1230
1231			assert(!(swf->swp_flags & SWAP_RECLAIM));
1232			vm_swap_do_delayed_trim(swf);
1233		}
1234		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1235	}
1236	lck_mtx_lock(&vm_swap_data_lock);
1237
1238	delayed_trim_handling_in_progress = FALSE;
1239	thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1240
1241	if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1242		thread_wakeup((event_t) &vm_swapfile_gc_needed);
1243
1244	lck_mtx_unlock(&vm_swap_data_lock);
1245
1246}
1247
1248static void
1249vm_swap_do_delayed_trim(struct swapfile *swf)
1250{
1251	struct trim_list *tl, *tl_head;
1252
1253	lck_mtx_lock(&vm_swap_data_lock);
1254
1255	tl_head = swf->swp_delayed_trim_list_head;
1256	swf->swp_delayed_trim_list_head = NULL;
1257	swf->swp_delayed_trim_count = 0;
1258
1259	lck_mtx_unlock(&vm_swap_data_lock);
1260
1261	vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1262
1263	while ((tl = tl_head) != NULL) {
1264		unsigned int	segidx = 0;
1265		unsigned int	byte_for_segidx = 0;
1266		unsigned int	offset_within_byte = 0;
1267
1268		lck_mtx_lock(&vm_swap_data_lock);
1269
1270		segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1271
1272		byte_for_segidx = segidx >> 3;
1273		offset_within_byte = segidx % 8;
1274
1275		if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1276
1277			(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1278
1279			swf->swp_csegs[segidx] = NULL;
1280
1281			swf->swp_nseginuse--;
1282			vm_swapfile_total_segs_used--;
1283
1284			if (segidx < swf->swp_free_hint) {
1285				swf->swp_free_hint = segidx;
1286			}
1287		}
1288		lck_mtx_unlock(&vm_swap_data_lock);
1289
1290		tl_head = tl->tl_next;
1291
1292		kfree(tl, sizeof(struct trim_list));
1293	}
1294}
1295
1296
1297void
1298vm_swap_flush()
1299{
1300	return;
1301}
1302
1303int	vm_swap_reclaim_yielded = 0;
1304
1305void
1306vm_swap_reclaim(void)
1307{
1308	vm_offset_t	addr = 0;
1309	unsigned int	segidx = 0;
1310	uint64_t	f_offset = 0;
1311	struct swapfile *swf = NULL;
1312	struct swapfile *smallest_swf = NULL;
1313	unsigned int	min_nsegs = 0;
1314	unsigned int 	byte_for_segidx = 0;
1315	unsigned int 	offset_within_byte = 0;
1316	uint32_t	c_size = 0;
1317
1318	c_segment_t	c_seg = NULL;
1319
1320	if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) {
1321		panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1322	}
1323
1324	lck_mtx_lock(&vm_swap_data_lock);
1325
1326	/*
1327	 * if we're running the swapfile list looking for
1328	 * candidates with delayed trims, we need to
1329	 * wait before making our decision concerning
1330	 * the swapfile we want to reclaim
1331	 */
1332	vm_swap_wait_on_trim_handling_in_progress();
1333
1334	/*
1335	 * from here until we knock down the SWAP_READY bit,
1336	 * we need to remain behind the vm_swap_data_lock...
1337	 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1338	 * will not consider this swapfile for processing
1339	 */
1340	swf = (struct swapfile*) queue_first(&swf_global_queue);
1341	min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1342	smallest_swf = NULL;
1343
1344	while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1345
1346		if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1347
1348			smallest_swf = swf;
1349			min_nsegs = swf->swp_nseginuse;
1350		}
1351		swf = (struct swapfile*) queue_next(&swf->swp_queue);
1352	}
1353
1354	if (smallest_swf == NULL)
1355		goto done;
1356
1357	swf = smallest_swf;
1358
1359
1360	swf->swp_flags &= ~SWAP_READY;
1361	swf->swp_flags |= SWAP_RECLAIM;
1362
1363	if (swf->swp_delayed_trim_count) {
1364
1365		lck_mtx_unlock(&vm_swap_data_lock);
1366
1367		vm_swap_do_delayed_trim(swf);
1368
1369		lck_mtx_lock(&vm_swap_data_lock);
1370	}
1371	segidx = 0;
1372
1373	while (segidx < swf->swp_nsegs) {
1374
1375ReTry_for_cseg:
1376		/*
1377		 * Wait for outgoing I/Os.
1378		 */
1379		while (swf->swp_io_count) {
1380
1381			swf->swp_flags |= SWAP_WANTED;
1382
1383			assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1384			lck_mtx_unlock(&vm_swap_data_lock);
1385
1386			thread_block(THREAD_CONTINUE_NULL);
1387
1388			lck_mtx_lock(&vm_swap_data_lock);
1389		}
1390	        if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1391			vm_swap_reclaim_yielded++;
1392			break;
1393		}
1394
1395		byte_for_segidx = segidx >> 3;
1396		offset_within_byte = segidx % 8;
1397
1398		if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1399
1400			segidx++;
1401			continue;
1402		}
1403
1404		c_seg = swf->swp_csegs[segidx];
1405
1406		lck_mtx_lock_spin_always(&c_seg->c_lock);
1407
1408		assert(c_seg->c_ondisk);
1409
1410		if (c_seg->c_busy) {
1411
1412			c_seg->c_wanted = 1;
1413
1414			assert_wait((event_t) (c_seg), THREAD_UNINT);
1415			lck_mtx_unlock_always(&c_seg->c_lock);
1416
1417			lck_mtx_unlock(&vm_swap_data_lock);
1418
1419			thread_block(THREAD_CONTINUE_NULL);
1420
1421			lck_mtx_lock(&vm_swap_data_lock);
1422
1423			goto ReTry_for_cseg;
1424		}
1425		(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1426
1427		f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1428
1429		swf->swp_csegs[segidx] = NULL;
1430		swf->swp_nseginuse--;
1431
1432		vm_swapfile_total_segs_used--;
1433
1434		lck_mtx_unlock(&vm_swap_data_lock);
1435
1436		if (c_seg->c_must_free) {
1437			C_SEG_BUSY(c_seg);
1438			c_seg_free(c_seg);
1439		} else {
1440
1441			C_SEG_BUSY(c_seg);
1442			c_seg->c_busy_swapping = 1;
1443#if !CHECKSUM_THE_SWAP
1444			c_seg_trim_tail(c_seg);
1445#endif
1446			c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1447
1448			assert(c_size <= C_SEG_BUFSIZE);
1449
1450			lck_mtx_unlock_always(&c_seg->c_lock);
1451
1452			if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
1453
1454				/*
1455				 * reading the data back in failed, so convert c_seg
1456				 * to a swapped in c_segment that contains no data
1457				 */
1458				c_seg->c_store.c_buffer = (int32_t *)NULL;
1459				c_seg_swapin_requeue(c_seg);
1460
1461				goto swap_io_failed;
1462			}
1463			VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1464
1465			if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
1466				vm_offset_t	c_buffer;
1467
1468				/*
1469				 * the put failed, so convert c_seg to a fully swapped in c_segment
1470				 * with valid data
1471				 */
1472				if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS)
1473					panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1474				OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used);
1475
1476				kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR);
1477
1478				memcpy((char *)c_buffer, (char *)addr, c_size);
1479
1480				c_seg->c_store.c_buffer = (int32_t *)c_buffer;
1481#if ENCRYPTED_SWAP
1482				vm_swap_decrypt(c_seg);
1483#endif /* ENCRYPTED_SWAP */
1484				c_seg_swapin_requeue(c_seg);
1485
1486				OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
1487
1488				goto swap_io_failed;
1489			}
1490			VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
1491
1492			lck_mtx_lock_spin_always(&c_seg->c_lock);
1493
1494			assert(c_seg->c_ondisk);
1495			/*
1496			 * The c_seg will now know about the new location on disk.
1497			 */
1498			c_seg->c_store.c_swap_handle = f_offset;
1499swap_io_failed:
1500			c_seg->c_busy_swapping = 0;
1501
1502			if (c_seg->c_must_free)
1503				c_seg_free(c_seg);
1504			else {
1505				C_SEG_WAKEUP_DONE(c_seg);
1506
1507				lck_mtx_unlock_always(&c_seg->c_lock);
1508			}
1509		}
1510		lck_mtx_lock(&vm_swap_data_lock);
1511	}
1512
1513	if (swf->swp_nseginuse) {
1514
1515		swf->swp_flags &= ~SWAP_RECLAIM;
1516		swf->swp_flags |= SWAP_READY;
1517
1518		goto done;
1519	}
1520	/*
1521  	 * We don't remove this inactive swf from the queue.
1522	 * That way, we can re-use it when needed again and
1523	 * preserve the namespace. The delayed_trim processing
1524	 * is also dependent on us not removing swfs from the queue.
1525	 */
1526	//queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1527
1528	vm_num_swap_files--;
1529
1530	vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
1531
1532	lck_mtx_unlock(&vm_swap_data_lock);
1533
1534	vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1535
1536	kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
1537	kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
1538
1539	lck_mtx_lock(&vm_swap_data_lock);
1540
1541	swf->swp_vp = NULL;
1542	swf->swp_size = 0;
1543	swf->swp_free_hint = 0;
1544	swf->swp_nsegs = 0;
1545	swf->swp_flags = SWAP_REUSE;
1546
1547done:
1548	thread_wakeup((event_t) &swf->swp_flags);
1549	lck_mtx_unlock(&vm_swap_data_lock);
1550
1551	kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
1552}
1553
1554
1555uint64_t
1556vm_swap_get_total_space(void)
1557{
1558	uint64_t total_space = 0;
1559
1560	total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
1561
1562	return total_space;
1563}
1564
1565uint64_t
1566vm_swap_get_used_space(void)
1567{
1568	uint64_t used_space = 0;
1569
1570	used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
1571
1572	return used_space;
1573}
1574
1575uint64_t
1576vm_swap_get_free_space(void)
1577{
1578	return (vm_swap_get_total_space() - vm_swap_get_used_space());
1579}
1580
1581
1582int
1583vm_swap_low_on_space(void)
1584{
1585
1586	if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
1587		return (0);
1588
1589	if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
1590
1591		if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1592			return (0);
1593
1594		if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
1595			return (1);
1596	}
1597	return (0);
1598}
1599