1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <mach/mach_types.h>
25#include <vm/vm_page.h>
26#include <vm/vm_kern.h>				/* kmem_alloc */
27#include <vm/vm_protos.h>
28#include <vm/vm_purgeable_internal.h>
29#include <sys/kdebug.h>
30#include <kern/sched_prim.h>
31#include <machine/limits.h>
32
33extern vm_pressure_level_t memorystatus_vm_pressure_level;
34
35struct token {
36	token_cnt_t     count;
37	token_idx_t	prev;
38	token_idx_t     next;
39};
40
41struct token	*tokens;
42token_idx_t	token_q_max_cnt = 0;
43vm_size_t	token_q_cur_size = 0;
44
45token_idx_t     token_free_idx = 0;		/* head of free queue */
46token_idx_t     token_init_idx = 1;		/* token 0 is reserved!! */
47int32_t		token_new_pagecount = 0;	/* count of pages that will
48						 * be added onto token queue */
49
50int             available_for_purge = 0;	/* increase when ripe token
51						 * added, decrease when ripe
52						 * token removed.
53						 * protected by page_queue_lock
54						 */
55
56static int token_q_allocating = 0;		/* flag for singlethreading
57						 * allocator */
58
59struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX];
60
61decl_lck_mtx_data(,vm_purgeable_queue_lock)
62
63#define TOKEN_ADD		0x40	/* 0x100 */
64#define TOKEN_DELETE		0x41	/* 0x104 */
65#define TOKEN_RIPEN		0x42	/* 0x108 */
66#define OBJECT_ADD		0x48	/* 0x120 */
67#define OBJECT_REMOVE		0x49	/* 0x124 */
68#define OBJECT_PURGE		0x4a	/* 0x128 */
69#define OBJECT_PURGE_ALL	0x4b	/* 0x12c */
70
71static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue);
72
73static void vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task);
74
75#if MACH_ASSERT
76static void
77vm_purgeable_token_check_queue(purgeable_q_t queue)
78{
79	int             token_cnt = 0, page_cnt = 0;
80	token_idx_t     token = queue->token_q_head;
81	token_idx_t     unripe = 0;
82	int             our_inactive_count;
83
84	while (token) {
85		if (tokens[token].count != 0) {
86			assert(queue->token_q_unripe);
87			if (unripe == 0) {
88				assert(token == queue->token_q_unripe);
89				unripe = token;
90			}
91			page_cnt += tokens[token].count;
92		}
93		if (tokens[token].next == 0)
94			assert(queue->token_q_tail == token);
95
96		token_cnt++;
97		token = tokens[token].next;
98	}
99
100	if (unripe)
101		assert(queue->token_q_unripe == unripe);
102	assert(token_cnt == queue->debug_count_tokens);
103
104	/* obsolete queue doesn't maintain token counts */
105	if(queue->type != PURGEABLE_Q_TYPE_OBSOLETE)
106	{
107		our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
108		assert(our_inactive_count >= 0);
109		assert((uint32_t) our_inactive_count == vm_page_inactive_count - vm_page_cleaned_count);
110	}
111}
112#endif
113
114/*
115 * Add a token. Allocate token queue memory if necessary.
116 * Call with page queue locked.
117 */
118kern_return_t
119vm_purgeable_token_add(purgeable_q_t queue)
120{
121#if MACH_ASSERT
122	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
123#endif
124
125	/* new token */
126	token_idx_t     token;
127	enum purgeable_q_type i;
128
129find_available_token:
130
131	if (token_free_idx) {				/* unused tokens available */
132		token = token_free_idx;
133		token_free_idx = tokens[token_free_idx].next;
134	} else if (token_init_idx < token_q_max_cnt) {	/* lazy token array init */
135		token = token_init_idx;
136		token_init_idx++;
137	} else {					/* allocate more memory */
138		/* Wait if another thread is inside the memory alloc section */
139		while(token_q_allocating) {
140			wait_result_t res = lck_mtx_sleep(&vm_page_queue_lock,
141							  LCK_SLEEP_DEFAULT,
142							  (event_t)&token_q_allocating,
143							  THREAD_UNINT);
144			if(res != THREAD_AWAKENED) return KERN_ABORTED;
145		};
146
147		/* Check whether memory is still maxed out */
148		if(token_init_idx < token_q_max_cnt)
149			goto find_available_token;
150
151		/* Still no memory. Allocate some. */
152		token_q_allocating = 1;
153
154		/* Drop page queue lock so we can allocate */
155		vm_page_unlock_queues();
156
157		struct token *new_loc;
158		vm_size_t alloc_size = token_q_cur_size + PAGE_SIZE;
159		kern_return_t result;
160
161		if (alloc_size / sizeof (struct token) > TOKEN_COUNT_MAX) {
162			result = KERN_RESOURCE_SHORTAGE;
163		} else {
164			if (token_q_cur_size) {
165				result = kmem_realloc(kernel_map,
166						      (vm_offset_t) tokens,
167						      token_q_cur_size,
168						      (vm_offset_t *) &new_loc,
169						      alloc_size);
170			} else {
171				result = kmem_alloc(kernel_map,
172						    (vm_offset_t *) &new_loc,
173						    alloc_size);
174			}
175		}
176
177		vm_page_lock_queues();
178
179		if (result) {
180			/* Unblock waiting threads */
181			token_q_allocating = 0;
182			thread_wakeup((event_t)&token_q_allocating);
183			return result;
184		}
185
186		/* If we get here, we allocated new memory. Update pointers and
187		 * dealloc old range */
188		struct token *old_tokens=tokens;
189		tokens=new_loc;
190		vm_size_t old_token_q_cur_size=token_q_cur_size;
191		token_q_cur_size=alloc_size;
192		token_q_max_cnt = (token_idx_t) (token_q_cur_size /
193						 sizeof(struct token));
194		assert (token_init_idx < token_q_max_cnt);	/* We must have a free token now */
195
196		if (old_token_q_cur_size) {	/* clean up old mapping */
197			vm_page_unlock_queues();
198			/* kmem_realloc leaves the old region mapped. Get rid of it. */
199			kmem_free(kernel_map, (vm_offset_t)old_tokens, old_token_q_cur_size);
200			vm_page_lock_queues();
201		}
202
203		/* Unblock waiting threads */
204		token_q_allocating = 0;
205		thread_wakeup((event_t)&token_q_allocating);
206
207		goto find_available_token;
208	}
209
210	assert (token);
211
212	/*
213	 * the new pagecount we got need to be applied to all queues except
214	 * obsolete
215	 */
216	for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
217		int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
218		assert(pages >= 0);
219		assert(pages <= TOKEN_COUNT_MAX);
220		purgeable_queues[i].new_pages = (int32_t) pages;
221		assert(purgeable_queues[i].new_pages == pages);
222	}
223	token_new_pagecount = 0;
224
225	/* set token counter value */
226	if (queue->type != PURGEABLE_Q_TYPE_OBSOLETE)
227		tokens[token].count = queue->new_pages;
228	else
229		tokens[token].count = 0;	/* all obsolete items are
230						 * ripe immediately */
231	queue->new_pages = 0;
232
233	/* put token on token counter list */
234	tokens[token].next = 0;
235	if (queue->token_q_tail == 0) {
236		assert(queue->token_q_head == 0 && queue->token_q_unripe == 0);
237		queue->token_q_head = token;
238		tokens[token].prev = 0;
239	} else {
240		tokens[queue->token_q_tail].next = token;
241		tokens[token].prev = queue->token_q_tail;
242	}
243	if (queue->token_q_unripe == 0) {	/* only ripe tokens (token
244						 * count == 0) in queue */
245		if (tokens[token].count > 0)
246			queue->token_q_unripe = token;	/* first unripe token */
247		else
248			available_for_purge++;	/* added a ripe token?
249						 * increase available count */
250	}
251	queue->token_q_tail = token;
252
253#if MACH_ASSERT
254	queue->debug_count_tokens++;
255	/* Check both queues, since we modified the new_pages count on each */
256	vm_purgeable_token_check_queue(&purgeable_queues[PURGEABLE_Q_TYPE_FIFO]);
257	vm_purgeable_token_check_queue(&purgeable_queues[PURGEABLE_Q_TYPE_LIFO]);
258
259	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_ADD)),
260			      queue->type,
261			      tokens[token].count,	/* num pages on token
262							 * (last token) */
263			      queue->debug_count_tokens,
264			      0,
265			      0);
266#endif
267
268	return KERN_SUCCESS;
269}
270
271/*
272 * Remove first token from queue and return its index. Add its count to the
273 * count of the next token.
274 * Call with page queue locked.
275 */
276static token_idx_t
277vm_purgeable_token_remove_first(purgeable_q_t queue)
278{
279#if MACH_ASSERT
280	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
281#endif
282
283	token_idx_t     token;
284	token = queue->token_q_head;
285
286	assert(token);
287
288	if (token) {
289		assert(queue->token_q_tail);
290		if (queue->token_q_head == queue->token_q_unripe) {
291			/* no ripe tokens... must move unripe pointer */
292			queue->token_q_unripe = tokens[token].next;
293		} else {
294			/* we're removing a ripe token. decrease count */
295			available_for_purge--;
296			assert(available_for_purge >= 0);
297		}
298
299		if (queue->token_q_tail == queue->token_q_head)
300			assert(tokens[token].next == 0);
301
302		queue->token_q_head = tokens[token].next;
303		if (queue->token_q_head) {
304			tokens[queue->token_q_head].count += tokens[token].count;
305			tokens[queue->token_q_head].prev = 0;
306		} else {
307			/* currently no other tokens in the queue */
308			/*
309			 * the page count must be added to the next newly
310			 * created token
311			 */
312			queue->new_pages += tokens[token].count;
313			/* if head is zero, tail is too */
314			queue->token_q_tail = 0;
315		}
316
317#if MACH_ASSERT
318		queue->debug_count_tokens--;
319		vm_purgeable_token_check_queue(queue);
320
321		KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_DELETE)),
322				      queue->type,
323				      tokens[queue->token_q_head].count,	/* num pages on new
324										 * first token */
325				      token_new_pagecount,	/* num pages waiting for
326								 * next token */
327				      available_for_purge,
328				      0);
329#endif
330	}
331	return token;
332}
333
334static token_idx_t
335vm_purgeable_token_remove_last(purgeable_q_t queue)
336{
337#if MACH_ASSERT
338	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
339#endif
340
341	token_idx_t     token;
342	token = queue->token_q_tail;
343
344	assert(token);
345
346	if (token) {
347		assert(queue->token_q_head);
348
349		if (queue->token_q_tail == queue->token_q_head)
350			assert(tokens[token].next == 0);
351
352		if (queue->token_q_unripe == 0) {
353			/* we're removing a ripe token. decrease count */
354			available_for_purge--;
355			assert(available_for_purge >= 0);
356		} else if (queue->token_q_unripe == token) {
357			/* we're removing the only unripe token */
358			queue->token_q_unripe = 0;
359		}
360
361		if (token == queue->token_q_head) {
362			/* token is the last one in the queue */
363			queue->token_q_head = 0;
364			queue->token_q_tail = 0;
365		} else {
366			token_idx_t new_tail;
367
368			new_tail = tokens[token].prev;
369
370			assert(new_tail);
371			assert(tokens[new_tail].next == token);
372
373			queue->token_q_tail = new_tail;
374			tokens[new_tail].next = 0;
375		}
376
377		queue->new_pages += tokens[token].count;
378
379#if MACH_ASSERT
380		queue->debug_count_tokens--;
381		vm_purgeable_token_check_queue(queue);
382
383		KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_DELETE)),
384				      queue->type,
385				      tokens[queue->token_q_head].count,	/* num pages on new
386										 * first token */
387				      token_new_pagecount,	/* num pages waiting for
388								 * next token */
389				      available_for_purge,
390				      0);
391#endif
392	}
393	return token;
394}
395
396/*
397 * Delete first token from queue. Return token to token queue.
398 * Call with page queue locked.
399 */
400void
401vm_purgeable_token_delete_first(purgeable_q_t queue)
402{
403#if MACH_ASSERT
404	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
405#endif
406	token_idx_t     token = vm_purgeable_token_remove_first(queue);
407
408	if (token) {
409		/* stick removed token on free queue */
410		tokens[token].next = token_free_idx;
411		tokens[token].prev = 0;
412		token_free_idx = token;
413	}
414}
415
416void
417vm_purgeable_token_delete_last(purgeable_q_t queue)
418{
419#if MACH_ASSERT
420	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
421#endif
422	token_idx_t     token = vm_purgeable_token_remove_last(queue);
423
424	if (token) {
425		/* stick removed token on free queue */
426		tokens[token].next = token_free_idx;
427		tokens[token].prev = 0;
428		token_free_idx = token;
429	}
430}
431
432
433/* Call with page queue locked. */
434void
435vm_purgeable_q_advance_all()
436{
437#if MACH_ASSERT
438	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
439#endif
440
441	/* check queue counters - if they get really large, scale them back.
442	 * They tend to get that large when there is no purgeable queue action */
443	int i;
444	if(token_new_pagecount > (TOKEN_NEW_PAGECOUNT_MAX >> 1))	/* a system idling years might get there */
445	{
446		for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
447			int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount;
448			assert(pages >= 0);
449			assert(pages <= TOKEN_COUNT_MAX);
450			purgeable_queues[i].new_pages = (int32_t) pages;
451			assert(purgeable_queues[i].new_pages == pages);
452		}
453		token_new_pagecount = 0;
454	}
455
456	/*
457	 * Decrement token counters. A token counter can be zero, this means the
458	 * object is ripe to be purged. It is not purged immediately, because that
459	 * could cause several objects to be purged even if purging one would satisfy
460	 * the memory needs. Instead, the pageout thread purges one after the other
461	 * by calling vm_purgeable_object_purge_one and then rechecking the memory
462	 * balance.
463	 *
464	 * No need to advance obsolete queue - all items are ripe there,
465	 * always
466	 */
467	for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
468		purgeable_q_t queue = &purgeable_queues[i];
469		uint32_t num_pages = 1;
470
471		/* Iterate over tokens as long as there are unripe tokens. */
472		while (queue->token_q_unripe) {
473			if (tokens[queue->token_q_unripe].count && num_pages)
474			{
475				tokens[queue->token_q_unripe].count -= 1;
476				num_pages -= 1;
477			}
478
479			if (tokens[queue->token_q_unripe].count == 0) {
480				queue->token_q_unripe = tokens[queue->token_q_unripe].next;
481				available_for_purge++;
482				KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_RIPEN)),
483						      queue->type,
484						      tokens[queue->token_q_head].count,	/* num pages on new
485											 * first token */
486						      0,
487						      available_for_purge,
488						      0);
489				continue;	/* One token ripened. Make sure to
490						 * check the next. */
491			}
492			if (num_pages == 0)
493				break;	/* Current token not ripe and no more pages.
494					 * Work done. */
495		}
496
497		/*
498		 * if there are no unripe tokens in the queue, decrement the
499		 * new_pages counter instead new_pages can be negative, but must be
500		 * canceled out by token_new_pagecount -- since inactive queue as a
501		 * whole always contains a nonnegative number of pages
502		 */
503		if (!queue->token_q_unripe) {
504			queue->new_pages -= num_pages;
505			assert((int32_t) token_new_pagecount + queue->new_pages >= 0);
506		}
507#if MACH_ASSERT
508		vm_purgeable_token_check_queue(queue);
509#endif
510	}
511}
512
513/*
514 * grab any ripe object and purge it obsolete queue first. then, go through
515 * each volatile group. Select a queue with a ripe token.
516 * Start with first group (0)
517 * 1. Look at queue. Is there an object?
518 *   Yes - purge it. Remove token.
519 *   No - check other queue. Is there an object?
520 *     No - increment group, then go to (1)
521 *     Yes - purge it. Remove token. If there is no ripe token, remove ripe
522 *      token from other queue and migrate unripe token from this
523 *      queue to other queue.
524 * Call with page queue locked.
525 */
526static void
527vm_purgeable_token_remove_ripe(purgeable_q_t queue)
528{
529#if MACH_ASSERT
530	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
531#endif
532	assert(queue->token_q_head && tokens[queue->token_q_head].count == 0);
533	/* return token to free list. advance token list. */
534	token_idx_t     new_head = tokens[queue->token_q_head].next;
535	tokens[queue->token_q_head].next = token_free_idx;
536	tokens[queue->token_q_head].prev = 0;
537	token_free_idx = queue->token_q_head;
538	queue->token_q_head = new_head;
539	tokens[new_head].prev = 0;
540	if (new_head == 0)
541		queue->token_q_tail = 0;
542
543#if MACH_ASSERT
544	queue->debug_count_tokens--;
545	vm_purgeable_token_check_queue(queue);
546#endif
547
548	available_for_purge--;
549	assert(available_for_purge >= 0);
550}
551
552/*
553 * Delete a ripe token from the given queue. If there are no ripe tokens on
554 * that queue, delete a ripe token from queue2, and migrate an unripe token
555 * from queue to queue2
556 * Call with page queue locked.
557 */
558static void
559vm_purgeable_token_choose_and_delete_ripe(purgeable_q_t queue, purgeable_q_t queue2)
560{
561#if MACH_ASSERT
562	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
563#endif
564	assert(queue->token_q_head);
565
566	if (tokens[queue->token_q_head].count == 0) {
567		/* This queue has a ripe token. Remove. */
568		vm_purgeable_token_remove_ripe(queue);
569	} else {
570		assert(queue2);
571		/*
572		 * queue2 must have a ripe token. Remove, and migrate one
573		 * from queue to queue2.
574		 */
575		vm_purgeable_token_remove_ripe(queue2);
576		/* migrate unripe token */
577		token_idx_t     token;
578		token_cnt_t     count;
579
580		/* remove token from queue1 */
581		assert(queue->token_q_unripe == queue->token_q_head);	/* queue1 had no unripe
582									 * tokens, remember? */
583		token = vm_purgeable_token_remove_first(queue);
584		assert(token);
585
586		count = tokens[token].count;
587
588		/* migrate to queue2 */
589		/* go to migration target loc */
590
591		token_idx_t token_to_insert_before = queue2->token_q_head, token_to_insert_after;
592
593		while (token_to_insert_before != 0 && count > tokens[token_to_insert_before].count) {
594			count -= tokens[token_to_insert_before].count;
595			token_to_insert_before = tokens[token_to_insert_before].next;
596		}
597
598		/* token_to_insert_before is now set correctly */
599
600		/* should the inserted token become the first unripe token? */
601		if ((token_to_insert_before == queue2->token_q_unripe) || (queue2->token_q_unripe == 0))
602			queue2->token_q_unripe = token;	/* if so, must update unripe pointer */
603
604		/*
605		 * insert token.
606		 * if inserting at end, reduce new_pages by that value;
607		 * otherwise, reduce counter of next token
608		 */
609
610		tokens[token].count = count;
611
612		if (token_to_insert_before != 0) {
613			token_to_insert_after = tokens[token_to_insert_before].prev;
614
615			tokens[token].next = token_to_insert_before;
616			tokens[token_to_insert_before].prev = token;
617
618			assert(tokens[token_to_insert_before].count >= count);
619			tokens[token_to_insert_before].count -= count;
620		} else {
621			/* if we ran off the end of the list, the token to insert after is the tail */
622			token_to_insert_after = queue2->token_q_tail;
623
624			tokens[token].next = 0;
625			queue2->token_q_tail = token;
626
627			assert(queue2->new_pages >= (int32_t) count);
628			queue2->new_pages -= count;
629		}
630
631		if (token_to_insert_after != 0) {
632			tokens[token].prev = token_to_insert_after;
633			tokens[token_to_insert_after].next = token;
634		} else {
635			/* is this case possible? */
636			tokens[token].prev = 0;
637			queue2->token_q_head = token;
638		}
639
640#if MACH_ASSERT
641		queue2->debug_count_tokens++;
642		vm_purgeable_token_check_queue(queue2);
643#endif
644	}
645}
646
647/* Find an object that can be locked. Returns locked object. */
648/* Call with purgeable queue locked. */
649static vm_object_t
650vm_purgeable_object_find_and_lock(
651	purgeable_q_t	queue,
652	int 		group,
653	boolean_t	pick_ripe)
654{
655	vm_object_t     object, best_object;
656	int		object_task_importance;
657	int		best_object_task_importance;
658	int		best_object_skipped;
659	int		num_objects_skipped;
660	task_t		owner;
661
662	best_object = VM_OBJECT_NULL;
663	best_object_task_importance = INT_MAX;
664
665	lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED);
666	/*
667	 * Usually we would pick the first element from a queue. However, we
668	 * might not be able to get a lock on it, in which case we try the
669	 * remaining elements in order.
670	 */
671
672	num_objects_skipped = -1;
673	for (object = (vm_object_t) queue_first(&queue->objq[group]);
674	     !queue_end(&queue->objq[group], (queue_entry_t) object);
675	     object = (vm_object_t) queue_next(&object->objq),
676		num_objects_skipped++) {
677
678		if (pick_ripe &&
679		    ! object->purgeable_when_ripe) {
680			/* we want an object that has a ripe token */
681			continue;
682		}
683
684		object_task_importance = 0;
685		owner = object->vo_purgeable_owner;
686		if (owner) {
687			object_task_importance = task_importance_estimate(owner);
688		}
689		if (object_task_importance < best_object_task_importance) {
690			if (vm_object_lock_try(object)) {
691				if (best_object != VM_OBJECT_NULL) {
692					/* forget about previous best object */
693					vm_object_unlock(best_object);
694				}
695				best_object = object;
696				best_object_task_importance = object_task_importance;
697				best_object_skipped = num_objects_skipped;
698				if (best_object_task_importance == 0) {
699					/* can't get any better: stop looking */
700					break;
701				}
702			}
703		}
704	}
705
706	if (best_object) {
707		/* Locked. Great. We'll take it. Remove and return. */
708//		printf("FOUND PURGEABLE object %p skipped %d\n", object, num_objects_skipped);
709
710		/* clear ownership when dequeueing purgeable object */
711		owner = best_object->vo_purgeable_owner;
712		if (owner) {
713			assert(owner->task_volatile_objects > 0);
714			OSAddAtomic(-1, &owner->task_volatile_objects);
715			best_object->vo_purgeable_owner = NULL;
716		}
717
718		queue_remove(&queue->objq[group], best_object,
719			     vm_object_t, objq);
720		best_object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX;
721		best_object->purgeable_queue_group = 0;
722		best_object->objq.next = NULL;
723		best_object->objq.prev = NULL;
724#if MACH_ASSERT
725		queue->debug_count_objects--;
726#endif
727		return best_object;
728	}
729
730	return 0;
731}
732
733/* Can be called without holding locks */
734void
735vm_purgeable_object_purge_all(void)
736{
737	enum purgeable_q_type i;
738	int             group;
739	vm_object_t     object;
740	unsigned int	purged_count;
741	uint32_t	collisions;
742
743	purged_count = 0;
744	collisions = 0;
745
746restart:
747	lck_mtx_lock(&vm_purgeable_queue_lock);
748	/* Cycle through all queues */
749	for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
750		purgeable_q_t   queue;
751
752		queue = &purgeable_queues[i];
753
754		/*
755		 * Look through all groups, starting from the lowest. If
756		 * we find an object in that group, try to lock it (this can
757		 * fail). If locking is successful, we can drop the queue
758		 * lock, remove a token and then purge the object.
759		 */
760		for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
761			while (!queue_empty(&queue->objq[group])) {
762				object = vm_purgeable_object_find_and_lock(queue, group, FALSE);
763				if (object == VM_OBJECT_NULL) {
764					lck_mtx_unlock(&vm_purgeable_queue_lock);
765					mutex_pause(collisions++);
766					goto restart;
767				}
768
769				lck_mtx_unlock(&vm_purgeable_queue_lock);
770
771				/* Lock the page queue here so we don't hold it
772				 * over the whole, legthy operation */
773				if (object->purgeable_when_ripe) {
774					vm_page_lock_queues();
775					vm_purgeable_token_remove_first(queue);
776					vm_page_unlock_queues();
777				}
778
779				assert(object->purgable == VM_PURGABLE_VOLATILE);
780				(void) vm_object_purge(object);
781				vm_object_unlock(object);
782				purged_count++;
783				goto restart;
784			}
785			assert(queue->debug_count_objects >= 0);
786		}
787	}
788	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_ALL)),
789			      purged_count, /* # of purged objects */
790			      0,
791			      available_for_purge,
792			      0,
793			      0);
794	lck_mtx_unlock(&vm_purgeable_queue_lock);
795	return;
796}
797
798boolean_t
799vm_purgeable_object_purge_one_unlocked(
800	int	force_purge_below_group)
801{
802	boolean_t	retval;
803
804	vm_page_lock_queues();
805	retval = vm_purgeable_object_purge_one(force_purge_below_group);
806	vm_page_unlock_queues();
807
808	return retval;
809}
810
811boolean_t
812vm_purgeable_object_purge_one(
813	int	force_purge_below_group)
814{
815	enum purgeable_q_type i;
816	int             group;
817	vm_object_t     object = 0;
818	purgeable_q_t   queue, queue2;
819	boolean_t	forced_purge;
820
821	/* Need the page queue lock since we'll be changing the token queue. */
822#if MACH_ASSERT
823	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
824#endif
825	lck_mtx_lock(&vm_purgeable_queue_lock);
826
827	/* Cycle through all queues */
828	for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
829		queue = &purgeable_queues[i];
830
831		if (force_purge_below_group == 0) {
832			/*
833			 * Are there any ripe tokens on this queue? If yes,
834			 * we'll find an object to purge there
835			 */
836			if (!queue->token_q_head) {
837				/* no token: look at next purgeable queue */
838				continue;
839			}
840
841			if (tokens[queue->token_q_head].count != 0) {
842				/* no ripe token: next queue */
843				continue;
844			}
845		}
846
847		/*
848		 * Now look through all groups, starting from the lowest. If
849		 * we find an object in that group, try to lock it (this can
850		 * fail). If locking is successful, we can drop the queue
851		 * lock, remove a token and then purge the object.
852		 */
853		for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
854			if (!queue->token_q_head ||
855			    tokens[queue->token_q_head].count != 0) {
856				/* no tokens or no ripe tokens */
857
858				if (group >= force_purge_below_group) {
859					/* no more groups to force-purge */
860					break;
861				}
862
863				/*
864				 * Try and purge an object in this group
865				 * even though no tokens are ripe.
866				 */
867				if (!queue_empty(&queue->objq[group]) &&
868				    (object = vm_purgeable_object_find_and_lock(queue, group, FALSE))) {
869					lck_mtx_unlock(&vm_purgeable_queue_lock);
870					if (object->purgeable_when_ripe) {
871						vm_purgeable_token_delete_first(queue);
872					}
873					forced_purge = TRUE;
874					goto purge_now;
875				}
876
877				/* nothing to purge in this group: next group */
878				continue;
879			}
880			if (!queue_empty(&queue->objq[group]) &&
881			    (object = vm_purgeable_object_find_and_lock(queue, group, TRUE))) {
882				lck_mtx_unlock(&vm_purgeable_queue_lock);
883				if (object->purgeable_when_ripe) {
884					vm_purgeable_token_choose_and_delete_ripe(queue, 0);
885				}
886				forced_purge = FALSE;
887				goto purge_now;
888			}
889			if (i != PURGEABLE_Q_TYPE_OBSOLETE) {
890				/* This is the token migration case, and it works between
891				 * FIFO and LIFO only */
892				queue2 = &purgeable_queues[i != PURGEABLE_Q_TYPE_FIFO ?
893							   PURGEABLE_Q_TYPE_FIFO :
894							   PURGEABLE_Q_TYPE_LIFO];
895
896				if (!queue_empty(&queue2->objq[group]) &&
897				    (object = vm_purgeable_object_find_and_lock(queue2, group, TRUE))) {
898					lck_mtx_unlock(&vm_purgeable_queue_lock);
899					if (object->purgeable_when_ripe) {
900						vm_purgeable_token_choose_and_delete_ripe(queue2, queue);
901					}
902					forced_purge = FALSE;
903					goto purge_now;
904				}
905			}
906			assert(queue->debug_count_objects >= 0);
907		}
908	}
909	/*
910         * because we have to do a try_lock on the objects which could fail,
911         * we could end up with no object to purge at this time, even though
912         * we have objects in a purgeable state
913         */
914	lck_mtx_unlock(&vm_purgeable_queue_lock);
915	return FALSE;
916
917purge_now:
918
919	assert(object);
920	assert(object->purgable == VM_PURGABLE_VOLATILE);
921	vm_page_unlock_queues();  /* Unlock for call to vm_object_purge() */
922//	printf("%sPURGING object %p task %p importance %d queue %d group %d force_purge_below_group %d memorystatus_vm_pressure_level %d\n", forced_purge ? "FORCED " : "", object, object->vo_purgeable_owner, task_importance_estimate(object->vo_purgeable_owner), i, group, force_purge_below_group, memorystatus_vm_pressure_level);
923	(void) vm_object_purge(object);
924	vm_object_unlock(object);
925	vm_page_lock_queues();
926
927	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE)),
928			      object,	/* purged object */
929			      0,
930			      available_for_purge,
931			      0,
932			      0);
933
934	return TRUE;
935}
936
937/* Called with object lock held */
938void
939vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group)
940{
941	task_t	owner;
942
943	vm_object_lock_assert_exclusive(object);
944	lck_mtx_lock(&vm_purgeable_queue_lock);
945
946	if (queue->type == PURGEABLE_Q_TYPE_OBSOLETE)
947		group = 0;
948
949	if (queue->type != PURGEABLE_Q_TYPE_LIFO)	/* fifo and obsolete are
950							 * fifo-queued */
951		queue_enter(&queue->objq[group], object, vm_object_t, objq);	/* last to die */
952	else
953		queue_enter_first(&queue->objq[group], object, vm_object_t, objq);	/* first to die */
954
955	object->purgeable_queue_type = queue->type;
956	object->purgeable_queue_group = group;
957
958	/* set ownership when enqueueing purgeable object */
959	assert(object->vo_purgeable_owner == NULL);
960	owner = current_task();
961	if (current_task() != kernel_task) {
962		OSAddAtomic(+1, &owner->task_volatile_objects);
963		assert(owner->task_volatile_objects > 0);
964		object->vo_purgeable_owner = owner;
965	}
966
967#if MACH_ASSERT
968	queue->debug_count_objects++;
969	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_ADD)),
970			      0,
971			      tokens[queue->token_q_head].count,
972			      queue->type,
973			      group,
974			      0);
975#endif
976
977	lck_mtx_unlock(&vm_purgeable_queue_lock);
978}
979
980/* Look for object. If found, remove from purgeable queue. */
981/* Called with object lock held */
982purgeable_q_t
983vm_purgeable_object_remove(vm_object_t object)
984{
985	int group;
986	task_t owner;
987	enum purgeable_q_type type;
988	purgeable_q_t queue;
989
990	vm_object_lock_assert_exclusive(object);
991
992	type = object->purgeable_queue_type;
993	group = object->purgeable_queue_group;
994
995	if (type == PURGEABLE_Q_TYPE_MAX) {
996		if (object->objq.prev || object->objq.next)
997			panic("unmarked object on purgeable q");
998
999		return NULL;
1000	} else if (!(object->objq.prev && object->objq.next))
1001		panic("marked object not on purgeable q");
1002
1003	lck_mtx_lock(&vm_purgeable_queue_lock);
1004
1005	queue = &purgeable_queues[type];
1006
1007	/* clear ownership when dequeueing purgeable object */
1008	owner = object->vo_purgeable_owner;
1009	if (owner) {
1010		assert(owner->task_volatile_objects > 0);
1011		OSAddAtomic(-1, &owner->task_volatile_objects);
1012		object->vo_purgeable_owner = NULL;
1013	}
1014
1015	queue_remove(&queue->objq[group], object, vm_object_t, objq);
1016
1017#if MACH_ASSERT
1018	queue->debug_count_objects--;
1019	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_REMOVE)),
1020			      0,
1021			      tokens[queue->token_q_head].count,
1022			      queue->type,
1023			      group,
1024			      0);
1025#endif
1026
1027	lck_mtx_unlock(&vm_purgeable_queue_lock);
1028
1029	object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX;
1030	object->purgeable_queue_group = 0;
1031
1032	object->objq.next = NULL;
1033	object->objq.prev = NULL;
1034
1035	return &purgeable_queues[type];
1036}
1037
1038void
1039vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task)
1040{
1041	lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED);
1042
1043	stat->count = stat->size = 0;
1044	vm_object_t     object;
1045	for (object = (vm_object_t) queue_first(&queue->objq[group]);
1046	     !queue_end(&queue->objq[group], (queue_entry_t) object);
1047	     object = (vm_object_t) queue_next(&object->objq)) {
1048			if (!target_task || object->vo_purgeable_owner == target_task) {
1049				stat->count++;
1050				stat->size += (object->resident_page_count * PAGE_SIZE);
1051			}
1052	}
1053	return;
1054}
1055
1056void
1057vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task)
1058{
1059	purgeable_q_t	queue;
1060	int             group;
1061
1062	lck_mtx_lock(&vm_purgeable_queue_lock);
1063
1064	/* Populate fifo_data */
1065	queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
1066	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
1067		vm_purgeable_stats_helper(&(info->fifo_data[group]), queue, group, target_task);
1068
1069	/* Populate lifo_data */
1070	queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
1071	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
1072		vm_purgeable_stats_helper(&(info->lifo_data[group]), queue, group, target_task);
1073
1074	/* Populate obsolete data */
1075	queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
1076	vm_purgeable_stats_helper(&(info->obsolete_data), queue, 0, target_task);
1077
1078	lck_mtx_unlock(&vm_purgeable_queue_lock);
1079	return;
1080}
1081
1082
1083static void
1084vm_purgeable_queue_disown(
1085	purgeable_q_t	queue,
1086	int		group,
1087	task_t		task)
1088{
1089	vm_object_t	object;
1090	int		num_objects;
1091
1092	lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED);
1093
1094	num_objects = 0;
1095	for (object = (vm_object_t) queue_first(&queue->objq[group]);
1096	     !queue_end(&queue->objq[group], (queue_entry_t) object);
1097	     object = (vm_object_t) queue_next(&object->objq)) {
1098		if (object->vo_purgeable_owner == task) {
1099			object->vo_purgeable_owner = NULL;
1100			num_objects++;
1101		}
1102	}
1103	assert(task->task_volatile_objects >= num_objects);
1104	OSAddAtomic(-num_objects, &task->task_volatile_objects);
1105	return;
1106}
1107
1108void
1109vm_purgeable_disown(
1110	task_t	task)
1111{
1112	purgeable_q_t	queue;
1113	int		group;
1114
1115	if (task == NULL) {
1116		return;
1117	}
1118
1119	lck_mtx_lock(&vm_purgeable_queue_lock);
1120
1121	queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
1122	vm_purgeable_queue_disown(queue, 0, task);
1123
1124	queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
1125	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
1126		vm_purgeable_queue_disown(queue, group, task);
1127
1128	queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
1129	for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
1130		vm_purgeable_queue_disown(queue, group, task);
1131
1132	lck_mtx_unlock(&vm_purgeable_queue_lock);
1133}
1134