1/*
2 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <libkern/libkern.h>
30#include <mach/mach_types.h>
31#include <mach/task.h>
32#include <sys/proc_internal.h>
33#include <sys/event.h>
34#include <sys/eventvar.h>
35#include <kern/locks.h>
36#include <sys/queue.h>
37#include <kern/vm_pressure.h>
38#include <sys/malloc.h>
39#include <sys/errno.h>
40#include <sys/systm.h>
41#include <sys/types.h>
42#include <sys/sysctl.h>
43#include <kern/assert.h>
44#include <vm/vm_pageout.h>
45
46#if CONFIG_MEMORYSTATUS
47#include <sys/kern_memorystatus.h>
48#endif
49
50/*
51 * This value is the threshold that a process must meet to be considered for scavenging.
52 */
53#define VM_PRESSURE_MINIMUM_RSIZE		10	/* MB */
54#define VM_PRESSURE_NOTIFY_WAIT_PERIOD		10000	/* milliseconds */
55
56static void vm_pressure_klist_lock(void);
57static void vm_pressure_klist_unlock(void);
58
59static void vm_dispatch_memory_pressure(void);
60static kern_return_t vm_try_pressure_candidates(void);
61static void vm_reset_active_list(void);
62
63static lck_mtx_t vm_pressure_klist_mutex;
64
65struct klist vm_pressure_klist;
66struct klist vm_pressure_klist_dormant;
67
68#if DEBUG
69#define VM_PRESSURE_DEBUG(cond, format, ...)      \
70do {                                              \
71	if (cond) { printf(format, ##__VA_ARGS__); } \
72} while(0)
73#else
74#define VM_PRESSURE_DEBUG(cond, format, ...)
75#endif
76
77void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) {
78	lck_mtx_init(&vm_pressure_klist_mutex, grp, attr);
79}
80
81static void vm_pressure_klist_lock(void) {
82	lck_mtx_lock(&vm_pressure_klist_mutex);
83}
84
85static void vm_pressure_klist_unlock(void) {
86	lck_mtx_unlock(&vm_pressure_klist_mutex);
87}
88
89int vm_knote_register(struct knote *kn) {
90	int rv = 0;
91
92	vm_pressure_klist_lock();
93
94	if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) {
95		KNOTE_ATTACH(&vm_pressure_klist, kn);
96	} else {
97		rv = ENOTSUP;
98	}
99
100	vm_pressure_klist_unlock();
101
102	return rv;
103}
104
105void vm_knote_unregister(struct knote *kn) {
106	struct knote *kn_temp;
107
108	vm_pressure_klist_lock();
109
110	VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
111
112	SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) {
113		if (kn_temp == kn) {
114			KNOTE_DETACH(&vm_pressure_klist, kn);
115			vm_pressure_klist_unlock();
116			return;
117		}
118	}
119
120	SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) {
121		if (kn_temp == kn) {
122			KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
123			vm_pressure_klist_unlock();
124			return;
125		}
126	}
127
128	vm_pressure_klist_unlock();
129}
130
131void vm_pressure_proc_cleanup(proc_t p)
132{
133	struct knote *kn = NULL;
134
135	vm_pressure_klist_lock();
136
137	VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid);
138
139	SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
140		if (kn->kn_kq->kq_p == p) {
141			KNOTE_DETACH(&vm_pressure_klist, kn);
142			vm_pressure_klist_unlock();
143			return;
144		}
145	}
146
147	SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) {
148		if (kn->kn_kq->kq_p == p) {
149			KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
150			vm_pressure_klist_unlock();
151			return;
152		}
153	}
154
155	vm_pressure_klist_unlock();
156}
157
158void consider_vm_pressure_events(void)
159{
160	vm_dispatch_memory_pressure();
161}
162
163static void vm_dispatch_memory_pressure(void)
164{
165	vm_pressure_klist_lock();
166
167	if (!SLIST_EMPTY(&vm_pressure_klist)) {
168
169		VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n");
170
171		if (vm_try_pressure_candidates() == KERN_SUCCESS) {
172			vm_pressure_klist_unlock();
173			return;
174		}
175
176	}
177
178	VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n");
179
180	vm_reset_active_list();
181
182	vm_pressure_klist_unlock();
183}
184
185#if CONFIG_JETSAM
186
187/* Jetsam aware version. Called with lock held */
188
189static struct knote * vm_find_knote_from_pid(pid_t pid) {
190	struct knote *kn = NULL;
191
192	SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
193		struct proc *p;
194		pid_t current_pid;
195
196		p = kn->kn_kq->kq_p;
197		current_pid = p->p_pid;
198
199		if (current_pid == pid) {
200			break;
201		}
202	}
203
204	return kn;
205}
206
207static kern_return_t vm_try_pressure_candidates(void)
208{
209        struct knote *kn = NULL;
210        pid_t target_pid = (pid_t)-1;
211
212        /* If memory is low, and there's a pid to target... */
213        target_pid = memorystatus_request_vm_pressure_candidate();
214        while (target_pid != -1) {
215                /* ...look it up in the list, and break if found... */
216                if ((kn = vm_find_knote_from_pid(target_pid))) {
217                        break;
218                }
219
220                /* ...otherwise, go round again. */
221                target_pid = memorystatus_request_vm_pressure_candidate();
222        }
223
224        if (NULL == kn) {
225                VM_PRESSURE_DEBUG(0, "[vm_pressure] can't find candidate pid\n");
226                return KERN_FAILURE;
227        }
228
229        /* ...and dispatch the note */
230        VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d, free pages %d\n", kn->kn_kq->kq_p->p_pid, memorystatus_available_pages);
231
232        KNOTE(&vm_pressure_klist, target_pid);
233
234        memorystatus_send_pressure_note(target_pid);
235
236        return KERN_SUCCESS;
237}
238
239static void vm_reset_active_list(void) {
240        /* No-op */
241}
242
243#if DEVELOPMENT || DEBUG
244
245/* Test purposes only */
246boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid) {
247	struct knote *kn;
248
249	vm_pressure_klist_lock();
250
251	kn = vm_find_knote_from_pid(pid);
252	if (kn) {
253		KNOTE(&vm_pressure_klist, pid);
254	}
255
256	vm_pressure_klist_unlock();
257
258	return kn ? TRUE : FALSE;
259}
260
261#endif /* DEVELOPMENT || DEBUG */
262
263#else /* CONFIG_MEMORYSTATUS */
264
265static kern_return_t vm_try_pressure_candidates(void)
266{
267	struct knote *kn = NULL, *kn_max = NULL;
268        unsigned int resident_max = 0;
269        pid_t target_pid = -1;
270        struct klist dispatch_klist = { NULL };
271	kern_return_t kr = KERN_SUCCESS;
272	struct timeval curr_tstamp = {0, 0};
273	int elapsed_msecs = 0;
274	proc_t	target_proc = PROC_NULL;
275
276	microuptime(&curr_tstamp);
277
278        SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
279                struct mach_task_basic_info basic_info;
280                mach_msg_type_number_t  size = MACH_TASK_BASIC_INFO_COUNT;
281                unsigned int		resident_size = 0;
282		proc_t			p = PROC_NULL;
283		struct task*		t = TASK_NULL;
284
285		p = kn->kn_kq->kq_p;
286		proc_list_lock();
287		if (p != proc_ref_locked(p)) {
288			p = PROC_NULL;
289			proc_list_unlock();
290			continue;
291		}
292		proc_list_unlock();
293
294		t = (struct task *)(p->task);
295
296		timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
297		elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
298
299		if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) {
300			proc_rele(p);
301			continue;
302		}
303
304                if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) {
305                        VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr);
306			proc_rele(p);
307                        continue;
308                }
309
310                /*
311                * We don't want a small process to block large processes from
312                * being notified again. <rdar://problem/7955532>
313                */
314                resident_size = (basic_info.resident_size)/(MB);
315                if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
316                        if (resident_size > resident_max) {
317                                resident_max = resident_size;
318                                kn_max = kn;
319                                target_pid = p->p_pid;
320				target_proc = p;
321                        }
322                } else {
323                        /* There was no candidate with enough resident memory to scavenge */
324                        VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
325                }
326		proc_rele(p);
327        }
328
329        if (kn_max == NULL || target_pid == -1) {
330		return KERN_FAILURE;
331	}
332
333	VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
334        VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
335
336        KNOTE_DETACH(&vm_pressure_klist, kn_max);
337
338	target_proc = proc_find(target_pid);
339	if (target_proc != PROC_NULL) {
340        	KNOTE_ATTACH(&dispatch_klist, kn_max);
341        	KNOTE(&dispatch_klist, target_pid);
342        	KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
343
344		microuptime(&target_proc->vm_pressure_last_notify_tstamp);
345		proc_rele(target_proc);
346	}
347
348        return KERN_SUCCESS;
349}
350
351/*
352 * Remove all elements from the dormant list and place them on the active list.
353 * Called with klist lock held.
354 */
355static void vm_reset_active_list(void) {
356	/* Re-charge the main list from the dormant list if possible */
357	if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
358		struct knote *kn;
359
360		VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n");
361
362		while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
363			kn = SLIST_FIRST(&vm_pressure_klist_dormant);
364			SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);
365			SLIST_INSERT_HEAD(&vm_pressure_klist, kn, kn_selnext);
366		}
367	}
368}
369
370#endif /* CONFIG_MEMORYSTATUS */
371