1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections.  This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/lock.h>
38#include <sys/proc_internal.h>
39#include <sys/kauth.h>
40#include <sys/buf.h>
41#include <sys/uio.h>
42#include <sys/vnode_internal.h>
43#include <sys/namei.h>
44#include <sys/ubc_internal.h>
45#include <sys/malloc.h>
46#include <sys/user.h>
47#if CONFIG_PROTECT
48#include <sys/cprotect.h>
49#endif
50
51#include <default_pager/default_pager_types.h>
52#include <default_pager/default_pager_object.h>
53
54#include <security/audit/audit.h>
55#include <bsm/audit_kevents.h>
56
57#include <mach/mach_types.h>
58#include <mach/host_priv.h>
59#include <mach/mach_traps.h>
60#include <mach/boolean.h>
61
62#include <kern/kern_types.h>
63#include <kern/locks.h>
64#include <kern/host.h>
65#include <kern/task.h>
66#include <kern/zalloc.h>
67#include <kern/kalloc.h>
68#include <kern/assert.h>
69
70#include <libkern/libkern.h>
71
72#include <vm/vm_pageout.h>
73#include <vm/vm_map.h>
74#include <vm/vm_kern.h>
75#include <vm/vnode_pager.h>
76#include <vm/vm_protos.h>
77#if CONFIG_MACF
78#include <security/mac_framework.h>
79#endif
80
81#include <pexpert/pexpert.h>
82
83void macx_init(void);
84
85static lck_grp_t *macx_lock_group;
86static lck_mtx_t *macx_lock;
87
88/*
89 * temporary support for delayed instantiation
90 * of default_pager
91 */
92int default_pager_init_flag = 0;
93
94struct bs_map		bs_port_table[MAX_BACKING_STORE] = {
95	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
96	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
97	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
98	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
99	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
100	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
101	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
102	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
103	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
104	{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}};
105
106/* ###################################################### */
107
108/*
109 *	Routine:	macx_init
110 *	Function:
111 *		Initialize locks so that only one caller can change
112 *      state at a time.
113 */
114void
115macx_init(void)
116{
117	macx_lock_group = lck_grp_alloc_init("macx", NULL);
118	macx_lock = lck_mtx_alloc_init(macx_lock_group, NULL);
119}
120
121/*
122 *	Routine:	macx_backing_store_recovery
123 *	Function:
124 *		Syscall interface to set a tasks privilege
125 *		level so that it is not subject to
126 *		macx_backing_store_suspend
127 */
128int
129macx_backing_store_recovery(
130	struct macx_backing_store_recovery_args *args)
131{
132	int		pid = args->pid;
133	int		error;
134	struct proc	*p =  current_proc();
135
136	if ((error = suser(kauth_cred_get(), 0)))
137		goto backing_store_recovery_return;
138
139	/* for now restrict backing_store_recovery */
140	/* usage to only present task */
141	if(pid != proc_selfpid()) {
142		error = EINVAL;
143		goto backing_store_recovery_return;
144	}
145
146	task_backing_store_privileged(p->task);
147
148backing_store_recovery_return:
149	return(error);
150}
151
152/*
153 *	Routine:	macx_backing_store_suspend
154 *	Function:
155 *		Syscall interface to stop new demand for
156 *		backing store when backing store is low
157 */
158
159int
160macx_backing_store_suspend(
161	struct macx_backing_store_suspend_args *args)
162{
163	boolean_t	suspend = args->suspend;
164	int		error;
165
166	lck_mtx_lock(macx_lock);
167	if ((error = suser(kauth_cred_get(), 0)))
168		goto backing_store_suspend_return;
169
170	/* Multiple writers protected by macx_lock */
171	vm_backing_store_disable(suspend);
172
173backing_store_suspend_return:
174	lck_mtx_unlock(macx_lock);
175	return(error);
176}
177
178extern boolean_t backing_store_stop_compaction;
179extern boolean_t compressor_store_stop_compaction;
180
181/*
182 *	Routine:	macx_backing_store_compaction
183 *	Function:
184 *		Turn compaction of swap space on or off.  This is
185 *		used during shutdown/restart so	that the kernel
186 *		doesn't waste time compacting swap files that are
187 *		about to be deleted anyway.  Compaction	is always
188 *		on by default when the system comes up and is turned
189 *		off when a shutdown/restart is requested.  It is
190 *		re-enabled if the shutdown/restart is aborted for any reason.
191 *
192 *  This routine assumes macx_lock has been locked by macx_triggers ->
193 *      mach_macx_triggers -> macx_backing_store_compaction
194 */
195
196int
197macx_backing_store_compaction(int flags)
198{
199	int error;
200
201	lck_mtx_assert(macx_lock, LCK_MTX_ASSERT_OWNED);
202	if ((error = suser(kauth_cred_get(), 0)))
203		return error;
204
205	if (flags & SWAP_COMPACT_DISABLE) {
206		backing_store_stop_compaction = TRUE;
207		compressor_store_stop_compaction = TRUE;
208
209		kprintf("backing_store_stop_compaction = TRUE\n");
210
211	} else if (flags & SWAP_COMPACT_ENABLE) {
212		backing_store_stop_compaction = FALSE;
213		compressor_store_stop_compaction = FALSE;
214
215		kprintf("backing_store_stop_compaction = FALSE\n");
216	}
217
218	return 0;
219}
220
221/*
222 *	Routine:	macx_triggers
223 *	Function:
224 *		Syscall interface to set the call backs for low and
225 *		high water marks.
226 */
227int
228macx_triggers(
229	struct macx_triggers_args *args)
230{
231	int	error;
232
233	lck_mtx_lock(macx_lock);
234	error = suser(kauth_cred_get(), 0);
235	if (error)
236		return error;
237
238	error = mach_macx_triggers(args);
239
240	lck_mtx_unlock(macx_lock);
241	return error;
242}
243
244
245extern boolean_t dp_isssd;
246
247/*
248 * In the compressed pager world, the swapfiles are created by the kernel.
249 * Well, all except the first one. That swapfile is absorbed by the kernel at
250 * the end of the macx_swapon function (if swap is enabled). That's why
251 * we allow the first invocation of macx_swapon to succeed.
252 *
253 * If the compressor pool is running low, the kernel messages the dynamic pager
254 * on the port it has registered with the kernel. That port can transport 1 of 2
255 * pieces of information to dynamic pager: create a swapfile or delete a swapfile.
256 *
257 * We choose to transmit the former. So, that message tells dynamic pager
258 * to create a swapfile and activate it by calling macx_swapon.
259 *
260 * We deny this new macx_swapon request. That leads dynamic pager to interpret the
261 * failure as a serious error and notify all it's clients that swap is running low.
262 * That's how we get the loginwindow "Resume / Force Quit Applications" dialog to appear.
263 *
264 * NOTE:
265 * If the kernel has already created multiple swapfiles by the time the compressor
266 * pool is running low (and it has to play this trick), dynamic pager won't be able to
267 * create a file in user-space and, that too will lead to a similar notification blast
268 * to all of it's clients. So, that behaves as desired too.
269 */
270boolean_t	macx_swapon_allowed = TRUE;
271
272/*
273 *	Routine:	macx_swapon
274 *	Function:
275 *		Syscall interface to add a file to backing store
276 */
277int
278macx_swapon(
279	struct macx_swapon_args *args)
280{
281	int			size = args->size;
282	vnode_t			vp = (vnode_t)NULL;
283	struct nameidata 	nd, *ndp;
284	register int		error;
285	kern_return_t		kr;
286	mach_port_t		backing_store;
287	memory_object_default_t	default_pager;
288	int			i;
289	off_t			file_size;
290	vfs_context_t		ctx = vfs_context_current();
291	struct proc		*p =  current_proc();
292	int			dp_cluster_size;
293
294	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON);
295	AUDIT_ARG(value32, args->priority);
296
297	lck_mtx_lock(macx_lock);
298
299	if (COMPRESSED_PAGER_IS_ACTIVE) {
300		if (macx_swapon_allowed == FALSE) {
301			error = EINVAL;
302			goto swapon_bailout;
303		} else {
304			macx_swapon_allowed = FALSE;
305			error = 0;
306			goto swapon_bailout;
307		}
308	}
309
310	ndp = &nd;
311
312	if ((error = suser(kauth_cred_get(), 0)))
313		goto swapon_bailout;
314
315	/*
316	 * Get a vnode for the paging area.
317	 */
318	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
319	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
320	       (user_addr_t) args->filename, ctx);
321
322	if ((error = namei(ndp)))
323		goto swapon_bailout;
324	nameidone(ndp);
325	vp = ndp->ni_vp;
326
327	if (vp->v_type != VREG) {
328		error = EINVAL;
329		goto swapon_bailout;
330	}
331
332	/* get file size */
333	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
334		goto swapon_bailout;
335#if CONFIG_MACF
336	vnode_lock(vp);
337	error = mac_system_check_swapon(vfs_context_ucred(ctx), vp);
338	vnode_unlock(vp);
339	if (error)
340		goto swapon_bailout;
341#endif
342
343	/* resize to desired size if it's too small */
344	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
345		goto swapon_bailout;
346
347#if CONFIG_PROTECT
348	{
349		/* initialize content protection keys manually */
350		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
351			goto swapon_bailout;
352 		}
353	}
354#endif
355
356
357	if (default_pager_init_flag == 0) {
358		start_def_pager(NULL);
359		default_pager_init_flag = 1;
360	}
361
362	/* add new backing store to list */
363	i = 0;
364	while(bs_port_table[i].vp != 0) {
365		if(i == MAX_BACKING_STORE)
366			break;
367		i++;
368	}
369	if(i == MAX_BACKING_STORE) {
370	   	error = ENOMEM;
371		goto swapon_bailout;
372	}
373
374	/* remember the vnode. This vnode has namei() reference */
375	bs_port_table[i].vp = vp;
376
377	/*
378	 * Look to see if we are already paging to this file.
379	 */
380	/* make certain the copy send of kernel call will work */
381	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
382	kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
383	if(kr != KERN_SUCCESS) {
384	   error = EAGAIN;
385	   bs_port_table[i].vp = 0;
386	   goto swapon_bailout;
387	}
388
389	if ((dp_isssd = vnode_pager_isSSD(vp)) == TRUE) {
390		/*
391		 * keep the cluster size small since the
392		 * seek cost is effectively 0 which means
393		 * we don't care much about fragmentation
394		 */
395		dp_cluster_size = 2 * PAGE_SIZE;
396	} else {
397		/*
398		 * use the default cluster size
399		 */
400		dp_cluster_size = 0;
401	}
402	kr = default_pager_backing_store_create(default_pager,
403					-1, /* default priority */
404					dp_cluster_size,
405					&backing_store);
406	memory_object_default_deallocate(default_pager);
407
408	if(kr != KERN_SUCCESS) {
409	   error = ENOMEM;
410	   bs_port_table[i].vp = 0;
411	   goto swapon_bailout;
412	}
413
414	/* Mark this vnode as being used for swapfile */
415	vnode_lock_spin(vp);
416	SET(vp->v_flag, VSWAP);
417	vnode_unlock(vp);
418
419	/*
420	 * NOTE: we are able to supply PAGE_SIZE here instead of
421	 *	an actual record size or block number because:
422	 *	a: we do not support offsets from the beginning of the
423	 *		file (allowing for non page size/record modulo offsets.
424	 *	b: because allow paging will be done modulo page size
425	 */
426
427	kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp,
428				PAGE_SIZE, (int)(file_size/PAGE_SIZE));
429	if(kr != KERN_SUCCESS) {
430	   bs_port_table[i].vp = 0;
431	   if(kr == KERN_INVALID_ARGUMENT)
432		error = EINVAL;
433	   else
434		error = ENOMEM;
435
436	   /* This vnode is not to be used for swapfile */
437	   vnode_lock_spin(vp);
438	   CLR(vp->v_flag, VSWAP);
439	   vnode_unlock(vp);
440
441	   goto swapon_bailout;
442	}
443	bs_port_table[i].bs = (void *)backing_store;
444	error = 0;
445
446	ubc_setthreadcred(vp, p, current_thread());
447
448	/*
449	 * take a long term reference on the vnode to keep
450	 * vnreclaim() away from this vnode.
451	 */
452	vnode_ref(vp);
453
454swapon_bailout:
455	if (vp) {
456		vnode_put(vp);
457	}
458	lck_mtx_unlock(macx_lock);
459	AUDIT_MACH_SYSCALL_EXIT(error);
460
461	if (error)
462		printf("macx_swapon FAILED - %d\n", error);
463	else
464		printf("macx_swapon SUCCESS\n");
465
466	return(error);
467}
468
469/*
470 *	Routine:	macx_swapoff
471 *	Function:
472 *		Syscall interface to remove a file from backing store
473 */
474int
475macx_swapoff(
476	struct macx_swapoff_args *args)
477{
478	__unused int	flags = args->flags;
479	kern_return_t	kr;
480	mach_port_t	backing_store;
481
482	struct vnode		*vp = 0;
483	struct nameidata 	nd, *ndp;
484	struct proc		*p =  current_proc();
485	int			i;
486	int			error;
487	vfs_context_t ctx = vfs_context_current();
488	int			orig_iopol_disk;
489
490	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF);
491
492	lck_mtx_lock(macx_lock);
493
494	backing_store = NULL;
495	ndp = &nd;
496
497	if ((error = suser(kauth_cred_get(), 0)))
498		goto swapoff_bailout;
499
500	/*
501	 * Get the vnode for the paging area.
502	 */
503	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
504	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
505	       (user_addr_t) args->filename, ctx);
506
507	if ((error = namei(ndp)))
508		goto swapoff_bailout;
509	nameidone(ndp);
510	vp = ndp->ni_vp;
511
512	if (vp->v_type != VREG) {
513		error = EINVAL;
514		goto swapoff_bailout;
515	}
516#if CONFIG_MACF
517	vnode_lock(vp);
518	error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp);
519	vnode_unlock(vp);
520	if (error)
521		goto swapoff_bailout;
522#endif
523
524	for(i = 0; i < MAX_BACKING_STORE; i++) {
525		if(bs_port_table[i].vp == vp) {
526			break;
527		}
528	}
529	if (i == MAX_BACKING_STORE) {
530		error = EINVAL;
531		goto swapoff_bailout;
532	}
533	backing_store = (mach_port_t)bs_port_table[i].bs;
534
535	orig_iopol_disk = proc_get_task_policy(current_task(), current_thread(),
536	                                       TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL);
537
538	proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL,
539	                     TASK_POLICY_IOPOL, IOPOL_THROTTLE);
540
541	kr = default_pager_backing_store_delete(backing_store);
542
543	proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL,
544	                     TASK_POLICY_IOPOL, orig_iopol_disk);
545
546	switch (kr) {
547		case KERN_SUCCESS:
548			error = 0;
549			bs_port_table[i].vp = 0;
550			/* This vnode is no longer used for swapfile */
551			vnode_lock_spin(vp);
552			CLR(vp->v_flag, VSWAP);
553			vnode_unlock(vp);
554
555			/* get rid of macx_swapon() "long term" reference */
556			vnode_rele(vp);
557
558			break;
559		case KERN_FAILURE:
560			error = EAGAIN;
561			break;
562		default:
563			error = EAGAIN;
564			break;
565	}
566
567swapoff_bailout:
568	/* get rid of macx_swapoff() namei() reference */
569	if (vp)
570		vnode_put(vp);
571	lck_mtx_unlock(macx_lock);
572	AUDIT_MACH_SYSCALL_EXIT(error);
573
574	if (error)
575		printf("macx_swapoff FAILED - %d\n", error);
576	else
577		printf("macx_swapoff SUCCESS\n");
578
579	return(error);
580}
581
582/*
583 *	Routine:	macx_swapinfo
584 *	Function:
585 *		Syscall interface to get general swap statistics
586 */
587extern uint64_t vm_swap_get_total_space(void);
588extern uint64_t vm_swap_get_used_space(void);
589extern uint64_t vm_swap_get_free_space(void);
590extern boolean_t vm_swap_up;
591
592int
593macx_swapinfo(
594	memory_object_size_t	*total_p,
595	memory_object_size_t	*avail_p,
596	vm_size_t		*pagesize_p,
597	boolean_t		*encrypted_p)
598{
599	int			error;
600	memory_object_default_t	default_pager;
601	default_pager_info_64_t	dpi64;
602	kern_return_t		kr;
603
604	error = 0;
605	if (COMPRESSED_PAGER_IS_ACTIVE) {
606
607		if (vm_swap_up == TRUE) {
608
609			*total_p = vm_swap_get_total_space();
610			*avail_p = vm_swap_get_free_space();
611			*pagesize_p = (vm_size_t)PAGE_SIZE_64;
612			*encrypted_p = TRUE;
613
614		} else {
615
616			*total_p = 0;
617			*avail_p = 0;
618			*pagesize_p = 0;
619			*encrypted_p = FALSE;
620		}
621	} else {
622
623		/*
624		 * Get a handle on the default pager.
625		 */
626		default_pager = MEMORY_OBJECT_DEFAULT_NULL;
627		kr = host_default_memory_manager(host_priv_self(), &default_pager, 0);
628		if (kr != KERN_SUCCESS) {
629			error = EAGAIN;	/* XXX why EAGAIN ? */
630			goto done;
631		}
632		if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) {
633			/*
634			 * The default pager has not initialized yet,
635			 * so it can't be using any swap space at all.
636			 */
637			*total_p = 0;
638			*avail_p = 0;
639			*pagesize_p = 0;
640			*encrypted_p = FALSE;
641			goto done;
642		}
643
644		/*
645		 * Get swap usage data from default pager.
646		 */
647		kr = default_pager_info_64(default_pager, &dpi64);
648		if (kr != KERN_SUCCESS) {
649			error = ENOTSUP;
650			goto done;
651		}
652
653		/*
654		 * Provide default pager info to caller.
655		 */
656		*total_p = dpi64.dpi_total_space;
657		*avail_p = dpi64.dpi_free_space;
658		*pagesize_p = dpi64.dpi_page_size;
659		if (dpi64.dpi_flags & DPI_ENCRYPTED) {
660			*encrypted_p = TRUE;
661		} else {
662			*encrypted_p = FALSE;
663		}
664
665done:
666		if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) {
667			/* release our handle on default pager */
668			memory_object_default_deallocate(default_pager);
669		}
670	}
671	return error;
672}
673