1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections.  This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/lock.h>
38#include <sys/proc_internal.h>
39#include <sys/kauth.h>
40#include <sys/buf.h>
41#include <sys/uio.h>
42#include <sys/vnode_internal.h>
43#include <sys/namei.h>
44#include <sys/ubc_internal.h>
45#include <sys/malloc.h>
46#include <sys/user.h>
47#if CONFIG_PROTECT
48#include <sys/cprotect.h>
49#endif
50
51#include <default_pager/default_pager_types.h>
52#include <default_pager/default_pager_object.h>
53
54#include <security/audit/audit.h>
55#include <bsm/audit_kevents.h>
56
57#include <mach/mach_types.h>
58#include <mach/host_priv.h>
59#include <mach/mach_traps.h>
60#include <mach/boolean.h>
61
62#include <kern/kern_types.h>
63#include <kern/host.h>
64#include <kern/task.h>
65#include <kern/zalloc.h>
66#include <kern/kalloc.h>
67#include <kern/assert.h>
68
69#include <libkern/libkern.h>
70
71#include <vm/vm_pageout.h>
72#include <vm/vm_map.h>
73#include <vm/vm_kern.h>
74#include <vm/vnode_pager.h>
75#include <vm/vm_protos.h>
76#if CONFIG_MACF
77#include <security/mac_framework.h>
78#endif
79
80void kprintf(const char *fmt, ...);
81
82/*
83 * temporary support for delayed instantiation
84 * of default_pager
85 */
86int default_pager_init_flag = 0;
87
88struct bs_map		bs_port_table[MAX_BACKING_STORE] = {
89	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
90	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
91	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
92	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
93	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
94	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
95	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
96	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
97	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
98	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}};
99
100/* ###################################################### */
101
102
103/*
104 *	Routine:	macx_backing_store_recovery
105 *	Function:
106 *		Syscall interface to set a tasks privilege
107 *		level so that it is not subject to
108 *		macx_backing_store_suspend
109 */
110int
111macx_backing_store_recovery(
112	struct macx_backing_store_recovery_args *args)
113{
114	int		pid = args->pid;
115	int		error;
116	struct proc	*p =  current_proc();
117	boolean_t	funnel_state;
118
119	funnel_state = thread_funnel_set(kernel_flock, TRUE);
120	if ((error = suser(kauth_cred_get(), 0)))
121		goto backing_store_recovery_return;
122
123	/* for now restrict backing_store_recovery */
124	/* usage to only present task */
125	if(pid != proc_selfpid()) {
126		error = EINVAL;
127		goto backing_store_recovery_return;
128	}
129
130	task_backing_store_privileged(p->task);
131
132backing_store_recovery_return:
133	(void) thread_funnel_set(kernel_flock, FALSE);
134	return(error);
135}
136
137/*
138 *	Routine:	macx_backing_store_suspend
139 *	Function:
140 *		Syscall interface to stop new demand for
141 *		backing store when backing store is low
142 */
143
144int
145macx_backing_store_suspend(
146	struct macx_backing_store_suspend_args *args)
147{
148	boolean_t	suspend = args->suspend;
149	int		error;
150	boolean_t	funnel_state;
151
152	funnel_state = thread_funnel_set(kernel_flock, TRUE);
153	if ((error = suser(kauth_cred_get(), 0)))
154		goto backing_store_suspend_return;
155
156	vm_backing_store_disable(suspend);
157
158backing_store_suspend_return:
159	(void) thread_funnel_set(kernel_flock, FALSE);
160	return(error);
161}
162
163extern boolean_t backing_store_stop_compaction;
164extern boolean_t compressor_store_stop_compaction;
165
166/*
167 *	Routine:	macx_backing_store_compaction
168 *	Function:
169 *		Turn compaction of swap space on or off.  This is
170 *		used during shutdown/restart so	that the kernel
171 *		doesn't waste time compacting swap files that are
172 *		about to be deleted anyway.  Compaction	is always
173 *		on by default when the system comes up and is turned
174 *		off when a shutdown/restart is requested.  It is
175 *		re-enabled if the shutdown/restart is aborted for any reason.
176 */
177
178int
179macx_backing_store_compaction(int flags)
180{
181	int error;
182
183	if ((error = suser(kauth_cred_get(), 0)))
184		return error;
185
186	if (flags & SWAP_COMPACT_DISABLE) {
187		backing_store_stop_compaction = TRUE;
188		compressor_store_stop_compaction = TRUE;
189
190		kprintf("backing_store_stop_compaction = TRUE\n");
191
192	} else if (flags & SWAP_COMPACT_ENABLE) {
193		backing_store_stop_compaction = FALSE;
194		compressor_store_stop_compaction = FALSE;
195
196		kprintf("backing_store_stop_compaction = FALSE\n");
197	}
198
199	return 0;
200}
201
202/*
203 *	Routine:	macx_triggers
204 *	Function:
205 *		Syscall interface to set the call backs for low and
206 *		high water marks.
207 */
208int
209macx_triggers(
210	struct macx_triggers_args *args)
211{
212	int	error;
213
214	error = suser(kauth_cred_get(), 0);
215	if (error)
216		return error;
217
218	return mach_macx_triggers(args);
219}
220
221
222extern boolean_t dp_isssd;
223extern void vm_swap_init(void);
224extern int vm_compressor_mode;
225
226/*
227 * In the compressed pager world, the swapfiles are created by the kernel.
228 * Well, all except the first one. That swapfile is absorbed by the kernel at
229 * the end of the macx_swapon function (if swap is enabled). That's why
230 * we allow the first invocation of macx_swapon to succeed.
231 *
232 * If the compressor pool is running low, the kernel messages the dynamic pager
233 * on the port it has registered with the kernel. That port can transport 1 of 2
234 * pieces of information to dynamic pager: create a swapfile or delete a swapfile.
235 *
236 * We choose to transmit the former. So, that message tells dynamic pager
237 * to create a swapfile and activate it by calling macx_swapon.
238 *
239 * We deny this new macx_swapon request. That leads dynamic pager to interpret the
240 * failure as a serious error and notify all it's clients that swap is running low.
241 * That's how we get the loginwindow "Resume / Force Quit Applications" dialog to appear.
242 *
243 * NOTE:
244 * If the kernel has already created multiple swapfiles by the time the compressor
245 * pool is running low (and it has to play this trick), dynamic pager won't be able to
246 * create a file in user-space and, that too will lead to a similar notification blast
247 * to all of it's clients. So, that behaves as desired too.
248 */
249boolean_t	macx_swapon_allowed = TRUE;
250
251/*
252 *	Routine:	macx_swapon
253 *	Function:
254 *		Syscall interface to add a file to backing store
255 */
256int
257macx_swapon(
258	struct macx_swapon_args *args)
259{
260	int			size = args->size;
261	vnode_t			vp = (vnode_t)NULL;
262	struct nameidata 	nd, *ndp;
263	register int		error;
264	kern_return_t		kr;
265	mach_port_t		backing_store;
266	memory_object_default_t	default_pager;
267	int			i;
268	boolean_t		funnel_state;
269	off_t			file_size;
270	vfs_context_t		ctx = vfs_context_current();
271	struct proc		*p =  current_proc();
272	int			dp_cluster_size;
273
274	if (COMPRESSED_PAGER_IS_ACTIVE) {
275		if (macx_swapon_allowed == FALSE) {
276			return EINVAL;
277		} else {
278			if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) ||
279			    (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) {
280				vm_swap_init();
281			}
282
283			macx_swapon_allowed = FALSE;
284			return 0;
285		}
286	}
287
288	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON);
289	AUDIT_ARG(value32, args->priority);
290
291	funnel_state = thread_funnel_set(kernel_flock, TRUE);
292	ndp = &nd;
293
294	if ((error = suser(kauth_cred_get(), 0)))
295		goto swapon_bailout;
296
297	/*
298	 * Get a vnode for the paging area.
299	 */
300	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
301	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
302	       (user_addr_t) args->filename, ctx);
303
304	if ((error = namei(ndp)))
305		goto swapon_bailout;
306	nameidone(ndp);
307	vp = ndp->ni_vp;
308
309	if (vp->v_type != VREG) {
310		error = EINVAL;
311		goto swapon_bailout;
312	}
313
314	/* get file size */
315	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
316		goto swapon_bailout;
317#if CONFIG_MACF
318	vnode_lock(vp);
319	error = mac_system_check_swapon(vfs_context_ucred(ctx), vp);
320	vnode_unlock(vp);
321	if (error)
322		goto swapon_bailout;
323#endif
324
325	/* resize to desired size if it's too small */
326	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
327		goto swapon_bailout;
328
329#if CONFIG_PROTECT
330	{
331		/* initialize content protection keys manually */
332		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
333			goto swapon_bailout;
334 		}
335	}
336#endif
337
338
339	if (default_pager_init_flag == 0) {
340		start_def_pager(NULL);
341		default_pager_init_flag = 1;
342	}
343
344	/* add new backing store to list */
345	i = 0;
346	while(bs_port_table[i].vp != 0) {
347		if(i == MAX_BACKING_STORE)
348			break;
349		i++;
350	}
351	if(i == MAX_BACKING_STORE) {
352	   	error = ENOMEM;
353		goto swapon_bailout;
354	}
355
356	/* remember the vnode. This vnode has namei() reference */
357	bs_port_table[i].vp = vp;
358
359	/*
360	 * Look to see if we are already paging to this file.
361	 */
362	/* make certain the copy send of kernel call will work */
363	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
364	kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
365	if(kr != KERN_SUCCESS) {
366	   error = EAGAIN;
367	   bs_port_table[i].vp = 0;
368	   goto swapon_bailout;
369	}
370
371	if ((dp_isssd = vnode_pager_isSSD(vp)) == TRUE) {
372		/*
373		 * keep the cluster size small since the
374		 * seek cost is effectively 0 which means
375		 * we don't care much about fragmentation
376		 */
377		dp_cluster_size = 2 * PAGE_SIZE;
378	} else {
379		/*
380		 * use the default cluster size
381		 */
382		dp_cluster_size = 0;
383	}
384	kr = default_pager_backing_store_create(default_pager,
385					-1, /* default priority */
386					dp_cluster_size,
387					&backing_store);
388	memory_object_default_deallocate(default_pager);
389
390	if(kr != KERN_SUCCESS) {
391	   error = ENOMEM;
392	   bs_port_table[i].vp = 0;
393	   goto swapon_bailout;
394	}
395
396	/* Mark this vnode as being used for swapfile */
397	vnode_lock_spin(vp);
398	SET(vp->v_flag, VSWAP);
399	vnode_unlock(vp);
400
401	/*
402	 * NOTE: we are able to supply PAGE_SIZE here instead of
403	 *	an actual record size or block number because:
404	 *	a: we do not support offsets from the beginning of the
405	 *		file (allowing for non page size/record modulo offsets.
406	 *	b: because allow paging will be done modulo page size
407	 */
408
409	kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp,
410				PAGE_SIZE, (int)(file_size/PAGE_SIZE));
411	if(kr != KERN_SUCCESS) {
412	   bs_port_table[i].vp = 0;
413	   if(kr == KERN_INVALID_ARGUMENT)
414		error = EINVAL;
415	   else
416		error = ENOMEM;
417
418	   /* This vnode is not to be used for swapfile */
419	   vnode_lock_spin(vp);
420	   CLR(vp->v_flag, VSWAP);
421	   vnode_unlock(vp);
422
423	   goto swapon_bailout;
424	}
425	bs_port_table[i].bs = (void *)backing_store;
426	error = 0;
427
428	ubc_setthreadcred(vp, p, current_thread());
429
430	/*
431	 * take a long term reference on the vnode to keep
432	 * vnreclaim() away from this vnode.
433	 */
434	vnode_ref(vp);
435
436swapon_bailout:
437	if (vp) {
438		vnode_put(vp);
439	}
440	(void) thread_funnel_set(kernel_flock, FALSE);
441	AUDIT_MACH_SYSCALL_EXIT(error);
442
443	if (error)
444		printf("macx_swapon FAILED - %d\n", error);
445	else
446		printf("macx_swapon SUCCESS\n");
447
448	return(error);
449}
450
451/*
452 *	Routine:	macx_swapoff
453 *	Function:
454 *		Syscall interface to remove a file from backing store
455 */
456int
457macx_swapoff(
458	struct macx_swapoff_args *args)
459{
460	__unused int	flags = args->flags;
461	kern_return_t	kr;
462	mach_port_t	backing_store;
463
464	struct vnode		*vp = 0;
465	struct nameidata 	nd, *ndp;
466	struct proc		*p =  current_proc();
467	int			i;
468	int			error;
469	boolean_t		funnel_state;
470	vfs_context_t ctx = vfs_context_current();
471	int			orig_iopol_disk;
472
473	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF);
474
475	funnel_state = thread_funnel_set(kernel_flock, TRUE);
476	backing_store = NULL;
477	ndp = &nd;
478
479	if ((error = suser(kauth_cred_get(), 0)))
480		goto swapoff_bailout;
481
482	/*
483	 * Get the vnode for the paging area.
484	 */
485	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
486	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
487	       (user_addr_t) args->filename, ctx);
488
489	if ((error = namei(ndp)))
490		goto swapoff_bailout;
491	nameidone(ndp);
492	vp = ndp->ni_vp;
493
494	if (vp->v_type != VREG) {
495		error = EINVAL;
496		goto swapoff_bailout;
497	}
498#if CONFIG_MACF
499	vnode_lock(vp);
500	error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp);
501	vnode_unlock(vp);
502	if (error)
503		goto swapoff_bailout;
504#endif
505
506	for(i = 0; i < MAX_BACKING_STORE; i++) {
507		if(bs_port_table[i].vp == vp) {
508			break;
509		}
510	}
511	if (i == MAX_BACKING_STORE) {
512		error = EINVAL;
513		goto swapoff_bailout;
514	}
515	backing_store = (mach_port_t)bs_port_table[i].bs;
516
517	orig_iopol_disk = proc_get_task_policy(current_task(), current_thread(),
518	                                       TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL);
519
520	proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL,
521	                     TASK_POLICY_IOPOL, IOPOL_THROTTLE);
522
523	kr = default_pager_backing_store_delete(backing_store);
524
525	proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL,
526	                     TASK_POLICY_IOPOL, orig_iopol_disk);
527
528	switch (kr) {
529		case KERN_SUCCESS:
530			error = 0;
531			bs_port_table[i].vp = 0;
532			/* This vnode is no longer used for swapfile */
533			vnode_lock_spin(vp);
534			CLR(vp->v_flag, VSWAP);
535			vnode_unlock(vp);
536
537			/* get rid of macx_swapon() "long term" reference */
538			vnode_rele(vp);
539
540			break;
541		case KERN_FAILURE:
542			error = EAGAIN;
543			break;
544		default:
545			error = EAGAIN;
546			break;
547	}
548
549swapoff_bailout:
550	/* get rid of macx_swapoff() namei() reference */
551	if (vp)
552		vnode_put(vp);
553
554	(void) thread_funnel_set(kernel_flock, FALSE);
555	AUDIT_MACH_SYSCALL_EXIT(error);
556
557	if (error)
558		printf("macx_swapoff FAILED - %d\n", error);
559	else
560		printf("macx_swapoff SUCCESS\n");
561
562	return(error);
563}
564
565/*
566 *	Routine:	macx_swapinfo
567 *	Function:
568 *		Syscall interface to get general swap statistics
569 */
570extern uint64_t vm_swap_get_total_space(void);
571extern uint64_t vm_swap_get_used_space(void);
572extern uint64_t vm_swap_get_free_space(void);
573extern boolean_t vm_swap_up;
574
575int
576macx_swapinfo(
577	memory_object_size_t	*total_p,
578	memory_object_size_t	*avail_p,
579	vm_size_t		*pagesize_p,
580	boolean_t		*encrypted_p)
581{
582	int			error;
583	memory_object_default_t	default_pager;
584	default_pager_info_64_t	dpi64;
585	kern_return_t		kr;
586
587	error = 0;
588	if (COMPRESSED_PAGER_IS_ACTIVE) {
589
590		if (vm_swap_up == TRUE) {
591
592			*total_p = vm_swap_get_total_space();
593			*avail_p = vm_swap_get_free_space();
594			*pagesize_p = PAGE_SIZE_64;
595			*encrypted_p = TRUE;
596
597		} else {
598
599			*total_p = 0;
600			*avail_p = 0;
601			*pagesize_p = 0;
602			*encrypted_p = FALSE;
603		}
604	} else {
605
606		/*
607		 * Get a handle on the default pager.
608		 */
609		default_pager = MEMORY_OBJECT_DEFAULT_NULL;
610		kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
611		if (kr != KERN_SUCCESS) {
612			error = EAGAIN;	/* XXX why EAGAIN ? */
613			goto done;
614		}
615		if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) {
616			/*
617			 * The default pager has not initialized yet,
618			 * so it can't be using any swap space at all.
619			 */
620			*total_p = 0;
621			*avail_p = 0;
622			*pagesize_p = 0;
623			*encrypted_p = FALSE;
624			goto done;
625		}
626
627		/*
628		 * Get swap usage data from default pager.
629		 */
630		kr = default_pager_info_64(default_pager, &dpi64);
631		if (kr != KERN_SUCCESS) {
632			error = ENOTSUP;
633			goto done;
634		}
635
636		/*
637		 * Provide default pager info to caller.
638		 */
639		*total_p = dpi64.dpi_total_space;
640		*avail_p = dpi64.dpi_free_space;
641		*pagesize_p = dpi64.dpi_page_size;
642		if (dpi64.dpi_flags & DPI_ENCRYPTED) {
643			*encrypted_p = TRUE;
644		} else {
645			*encrypted_p = FALSE;
646		}
647
648done:
649		if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) {
650			/* release our handle on default pager */
651			memory_object_default_deallocate(default_pager);
652		}
653	}
654	return error;
655}
656