1/*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections.  This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76/*
77 * Mapped file (mmap) interface to VM
78 */
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/filedesc.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/resourcevar.h>
86#include <sys/vnode_internal.h>
87#include <sys/acct.h>
88#include <sys/wait.h>
89#include <sys/file_internal.h>
90#include <sys/vadvise.h>
91#include <sys/trace.h>
92#include <sys/mman.h>
93#include <sys/conf.h>
94#include <sys/stat.h>
95#include <sys/ubc.h>
96#include <sys/ubc_internal.h>
97#include <sys/sysproto.h>
98#if CONFIG_PROTECT
99#include <sys/cprotect.h>
100#endif
101
102#include <sys/syscall.h>
103#include <sys/kdebug.h>
104#include <sys/bsdtask_info.h>
105
106#include <security/audit/audit.h>
107#include <bsm/audit_kevents.h>
108
109#include <mach/mach_types.h>
110#include <mach/mach_traps.h>
111#include <mach/vm_sync.h>
112#include <mach/vm_behavior.h>
113#include <mach/vm_inherit.h>
114#include <mach/vm_statistics.h>
115#include <mach/mach_vm.h>
116#include <mach/vm_map.h>
117#include <mach/host_priv.h>
118
119#include <machine/machine_routines.h>
120
121#include <kern/cpu_number.h>
122#include <kern/host.h>
123#include <kern/task.h>
124#include <kern/page_decrypt.h>
125
126#include <IOKit/IOReturn.h>
127
128#include <vm/vm_map.h>
129#include <vm/vm_kern.h>
130#include <vm/vm_pager.h>
131#include <vm/vm_protos.h>
132
133/*
134 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
135 * XXX usage is PROT_* from an interface perspective.  Thus the values of
136 * XXX VM_PROT_* and PROT_* need to correspond.
137 */
138int
139mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
140{
141	/*
142	 *	Map in special device (must be SHARED) or file
143	 */
144	struct fileproc *fp;
145	register struct		vnode *vp;
146	int			flags;
147	int			prot;
148	int			err=0;
149	vm_map_t		user_map;
150	kern_return_t		result;
151	vm_map_offset_t		user_addr;
152	vm_map_size_t		user_size;
153	vm_object_offset_t	pageoff;
154	vm_object_offset_t	file_pos;
155	int			alloc_flags=0;
156	boolean_t		docow;
157	vm_prot_t		maxprot;
158	void 			*handle;
159	memory_object_t		pager = MEMORY_OBJECT_NULL;
160	memory_object_control_t	 control;
161	int 			mapanon=0;
162	int 			fpref=0;
163	int error =0;
164	int fd = uap->fd;
165	int num_retries = 0;
166
167	user_map = current_map();
168	user_addr = (vm_map_offset_t)uap->addr;
169	user_size = (vm_map_size_t) uap->len;
170
171	AUDIT_ARG(addr, user_addr);
172	AUDIT_ARG(len, user_size);
173	AUDIT_ARG(fd, uap->fd);
174
175	prot = (uap->prot & VM_PROT_ALL);
176#if 3777787
177	/*
178	 * Since the hardware currently does not support writing without
179	 * read-before-write, or execution-without-read, if the request is
180	 * for write or execute access, we must imply read access as well;
181	 * otherwise programs expecting this to work will fail to operate.
182	 */
183	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
184		prot |= VM_PROT_READ;
185#endif	/* radar 3777787 */
186
187	flags = uap->flags;
188	vp = NULLVP;
189
190	/*
191	 * The vm code does not have prototypes & compiler doesn't do the'
192	 * the right thing when you cast 64bit value and pass it in function
193	 * call. So here it is.
194	 */
195	file_pos = (vm_object_offset_t)uap->pos;
196
197
198	/* make sure mapping fits into numeric range etc */
199	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
200		return (EINVAL);
201
202	/*
203	 * Align the file position to a page boundary,
204	 * and save its page offset component.
205	 */
206	pageoff = (file_pos & vm_map_page_mask(user_map));
207	file_pos -= (vm_object_offset_t)pageoff;
208
209
210	/* Adjust size for rounding (on both ends). */
211	user_size += pageoff;	/* low end... */
212	user_size = vm_map_round_page(user_size,
213				      vm_map_page_mask(user_map)); /* hi end */
214
215	if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){
216		return EINVAL;
217	}
218	/*
219	 * Check for illegal addresses.  Watch out for address wrap... Note
220	 * that VM_*_ADDRESS are not constants due to casts (argh).
221	 */
222	if (flags & MAP_FIXED) {
223		/*
224		 * The specified address must have the same remainder
225		 * as the file offset taken modulo PAGE_SIZE, so it
226		 * should be aligned after adjustment by pageoff.
227		 */
228		user_addr -= pageoff;
229		if (user_addr & vm_map_page_mask(user_map))
230			return (EINVAL);
231	}
232#ifdef notyet
233	/* DO not have apis to get this info, need to wait till then*/
234	/*
235	 * XXX for non-fixed mappings where no hint is provided or
236	 * the hint would fall in the potential heap space,
237	 * place it after the end of the largest possible heap.
238	 *
239	 * There should really be a pmap call to determine a reasonable
240	 * location.
241	 */
242	else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
243					  vm_map_page_mask(user_map)))
244		addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
245					 vm_map_page_mask(user_map));
246
247#endif
248
249	alloc_flags = 0;
250
251	if (flags & MAP_ANON) {
252
253		maxprot = VM_PROT_ALL;
254#if CONFIG_MACF
255		/*
256		 * Entitlement check.
257		 */
258		error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
259		if (error) {
260			return EINVAL;
261		}
262#endif /* MAC */
263
264		/*
265		 * Mapping blank space is trivial.  Use positive fds as the alias
266		 * value for memory tracking.
267		 */
268		if (fd != -1) {
269			/*
270			 * Use "fd" to pass (some) Mach VM allocation flags,
271			 * (see the VM_FLAGS_* definitions).
272			 */
273			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
274					    VM_FLAGS_PURGABLE);
275			if (alloc_flags != fd) {
276				/* reject if there are any extra flags */
277				return EINVAL;
278			}
279		}
280
281		handle = NULL;
282		file_pos = 0;
283		mapanon = 1;
284	} else {
285		struct vnode_attr va;
286		vfs_context_t ctx = vfs_context_current();
287
288		if (flags & MAP_JIT)
289			return EINVAL;
290
291		/*
292		 * Mapping file, get fp for validation. Obtain vnode and make
293		 * sure it is of appropriate type.
294		 */
295		err = fp_lookup(p, fd, &fp, 0);
296		if (err)
297			return(err);
298		fpref = 1;
299		switch (FILEGLOB_DTYPE(fp->f_fglob)) {
300		case DTYPE_PSXSHM:
301			uap->addr = (user_addr_t)user_addr;
302			uap->len = (user_size_t)user_size;
303			uap->prot = prot;
304			uap->flags = flags;
305			uap->pos = file_pos;
306			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
307			goto bad;
308		case DTYPE_VNODE:
309			break;
310		default:
311			error = EINVAL;
312			goto bad;
313		}
314		vp = (struct vnode *)fp->f_fglob->fg_data;
315		error = vnode_getwithref(vp);
316		if(error != 0)
317			goto bad;
318
319		if (vp->v_type != VREG && vp->v_type != VCHR) {
320			(void)vnode_put(vp);
321			error = EINVAL;
322			goto bad;
323		}
324
325		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
326
327		/*
328		 * POSIX: mmap needs to update access time for mapped files
329		 */
330		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
331			VATTR_INIT(&va);
332			nanotime(&va.va_access_time);
333			VATTR_SET_ACTIVE(&va, va_access_time);
334			vnode_setattr(vp, &va, ctx);
335		}
336
337		/*
338		 * XXX hack to handle use of /dev/zero to map anon memory (ala
339		 * SunOS).
340		 */
341		if (vp->v_type == VCHR || vp->v_type == VSTR) {
342			(void)vnode_put(vp);
343			error = ENODEV;
344			goto bad;
345		} else {
346			/*
347			 * Ensure that file and memory protections are
348			 * compatible.  Note that we only worry about
349			 * writability if mapping is shared; in this case,
350			 * current and max prot are dictated by the open file.
351			 * XXX use the vnode instead?  Problem is: what
352			 * credentials do we use for determination? What if
353			 * proc does a setuid?
354			 */
355			maxprot = VM_PROT_EXECUTE;	/* ??? */
356			if (fp->f_fglob->fg_flag & FREAD)
357				maxprot |= VM_PROT_READ;
358			else if (prot & PROT_READ) {
359				(void)vnode_put(vp);
360				error = EACCES;
361				goto bad;
362			}
363			/*
364			 * If we are sharing potential changes (either via
365			 * MAP_SHARED or via the implicit sharing of character
366			 * device mappings), and we are trying to get write
367			 * permission although we opened it without asking
368			 * for it, bail out.
369			 */
370
371			if ((flags & MAP_SHARED) != 0) {
372				if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
373				    /*
374				     * Do not allow writable mappings of
375				     * swap files (see vm_swapfile_pager.c).
376				     */
377				    !vnode_isswap(vp)) {
378 					/*
379 					 * check for write access
380 					 *
381 					 * Note that we already made this check when granting FWRITE
382 					 * against the file, so it seems redundant here.
383 					 */
384 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
385
386 					/* if not granted for any reason, but we wanted it, bad */
387 					if ((prot & PROT_WRITE) && (error != 0)) {
388 						vnode_put(vp);
389  						goto bad;
390  					}
391
392 					/* if writable, remember */
393 					if (error == 0)
394  						maxprot |= VM_PROT_WRITE;
395
396				} else if ((prot & PROT_WRITE) != 0) {
397					(void)vnode_put(vp);
398					error = EACCES;
399					goto bad;
400				}
401			} else
402				maxprot |= VM_PROT_WRITE;
403
404			handle = (void *)vp;
405#if CONFIG_MACF
406			error = mac_file_check_mmap(vfs_context_ucred(ctx),
407			    fp->f_fglob, prot, flags, &maxprot);
408			if (error) {
409				(void)vnode_put(vp);
410				goto bad;
411			}
412#endif /* MAC */
413
414#if CONFIG_PROTECT
415			{
416				error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
417				if (error) {
418					(void) vnode_put(vp);
419					goto bad;
420				}
421			}
422#endif /* CONFIG_PROTECT */
423
424
425		}
426	}
427
428	if (user_size == 0)  {
429		if (!mapanon)
430			(void)vnode_put(vp);
431		error = 0;
432		goto bad;
433	}
434
435	/*
436	 *	We bend a little - round the start and end addresses
437	 *	to the nearest page boundary.
438	 */
439	user_size = vm_map_round_page(user_size,
440				      vm_map_page_mask(user_map));
441
442	if (file_pos & vm_map_page_mask(user_map)) {
443		if (!mapanon)
444			(void)vnode_put(vp);
445		error = EINVAL;
446		goto bad;
447	}
448
449	if ((flags & MAP_FIXED) == 0) {
450		alloc_flags |= VM_FLAGS_ANYWHERE;
451		user_addr = vm_map_round_page(user_addr,
452					      vm_map_page_mask(user_map));
453	} else {
454		if (user_addr != vm_map_trunc_page(user_addr,
455						   vm_map_page_mask(user_map))) {
456		        if (!mapanon)
457			        (void)vnode_put(vp);
458			error = EINVAL;
459			goto bad;
460		}
461		/*
462		 * mmap(MAP_FIXED) will replace any existing mappings in the
463		 * specified range, if the new mapping is successful.
464		 * If we just deallocate the specified address range here,
465		 * another thread might jump in and allocate memory in that
466		 * range before we get a chance to establish the new mapping,
467		 * and we won't have a chance to restore the old mappings.
468		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
469		 * has to deallocate the existing mappings and establish the
470		 * new ones atomically.
471		 */
472		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
473	}
474
475	if (flags & MAP_NOCACHE)
476		alloc_flags |= VM_FLAGS_NO_CACHE;
477
478	if (flags & MAP_JIT){
479		alloc_flags |= VM_FLAGS_MAP_JIT;
480	}
481	/*
482	 * Lookup/allocate object.
483	 */
484	if (handle == NULL) {
485		control = NULL;
486#ifdef notyet
487/* Hmm .. */
488#if defined(VM_PROT_READ_IS_EXEC)
489		if (prot & VM_PROT_READ)
490			prot |= VM_PROT_EXECUTE;
491		if (maxprot & VM_PROT_READ)
492			maxprot |= VM_PROT_EXECUTE;
493#endif
494#endif
495
496#if 3777787
497		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
498			prot |= VM_PROT_READ;
499		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
500			maxprot |= VM_PROT_READ;
501#endif	/* radar 3777787 */
502map_anon_retry:
503		result = vm_map_enter_mem_object(user_map,
504						 &user_addr, user_size,
505						 0, alloc_flags,
506						 IPC_PORT_NULL, 0, FALSE,
507						 prot, maxprot,
508						 (flags & MAP_SHARED) ?
509						 VM_INHERIT_SHARE :
510						 VM_INHERIT_DEFAULT);
511
512		/* If a non-binding address was specified for this anonymous
513		 * mapping, retry the mapping with a zero base
514		 * in the event the mapping operation failed due to
515		 * lack of space between the address and the map's maximum.
516		 */
517		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
518			user_addr = vm_map_page_size(user_map);
519			goto map_anon_retry;
520		}
521	} else {
522		if (vnode_isswap(vp)) {
523			/*
524			 * Map swap files with a special pager
525			 * that returns obfuscated contents.
526			 */
527			control = NULL;
528			pager = swapfile_pager_setup(vp);
529			if (pager != MEMORY_OBJECT_NULL) {
530				control = swapfile_pager_control(pager);
531			}
532		} else {
533			control = ubc_getobject(vp, UBC_FLAGS_NONE);
534		}
535
536		if (control == NULL) {
537			(void)vnode_put(vp);
538			error = ENOMEM;
539			goto bad;
540		}
541
542		/*
543		 *  Set credentials:
544		 *	FIXME: if we're writing the file we need a way to
545		 *      ensure that someone doesn't replace our R/W creds
546		 * 	with ones that only work for read.
547		 */
548
549		ubc_setthreadcred(vp, p, current_thread());
550		docow = FALSE;
551		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
552			docow = TRUE;
553		}
554
555#ifdef notyet
556/* Hmm .. */
557#if defined(VM_PROT_READ_IS_EXEC)
558		if (prot & VM_PROT_READ)
559			prot |= VM_PROT_EXECUTE;
560		if (maxprot & VM_PROT_READ)
561			maxprot |= VM_PROT_EXECUTE;
562#endif
563#endif /* notyet */
564
565#if 3777787
566		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
567			prot |= VM_PROT_READ;
568		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
569			maxprot |= VM_PROT_READ;
570#endif	/* radar 3777787 */
571map_file_retry:
572		result = vm_map_enter_mem_object_control(user_map,
573						 &user_addr, user_size,
574						 0, alloc_flags,
575						 control, file_pos,
576						 docow, prot, maxprot,
577						 (flags & MAP_SHARED) ?
578						 VM_INHERIT_SHARE :
579						 VM_INHERIT_DEFAULT);
580
581		/* If a non-binding address was specified for this file backed
582		 * mapping, retry the mapping with a zero base
583		 * in the event the mapping operation failed due to
584		 * lack of space between the address and the map's maximum.
585		 */
586		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
587			user_addr = vm_map_page_size(user_map);
588			goto map_file_retry;
589		}
590	}
591
592	if (!mapanon) {
593		(void)vnode_put(vp);
594	}
595
596	switch (result) {
597	case KERN_SUCCESS:
598		*retval = user_addr + pageoff;
599		error = 0;
600		break;
601	case KERN_INVALID_ADDRESS:
602	case KERN_NO_SPACE:
603		error =  ENOMEM;
604		break;
605	case KERN_PROTECTION_FAILURE:
606		error =  EACCES;
607		break;
608	default:
609		error =  EINVAL;
610		break;
611	}
612bad:
613	if (pager != MEMORY_OBJECT_NULL) {
614		/*
615		 * Release the reference on the pager.
616		 * If the mapping was successful, it now holds
617		 * an extra reference.
618		 */
619		memory_object_deallocate(pager);
620	}
621	if (fpref)
622		fp_drop(p, fd, fp, 0);
623
624	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
625	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
626			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
627	return(error);
628}
629
630int
631msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
632{
633	__pthread_testcancel(1);
634	return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
635}
636
637int
638msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
639{
640	mach_vm_offset_t addr;
641	mach_vm_size_t size;
642	int flags;
643	vm_map_t user_map;
644	int rv;
645	vm_sync_t sync_flags=0;
646
647	user_map = current_map();
648	addr = (mach_vm_offset_t) uap->addr;
649	size = (mach_vm_size_t)uap->len;
650	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
651	if (addr & vm_map_page_mask(user_map)) {
652		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
653		return EINVAL;
654	}
655	if (size == 0) {
656		/*
657		 * We cannot support this properly without maintaining
658		 * list all mmaps done. Cannot use vm_map_entry as they could be
659		 * split or coalesced by indepenedant actions. So instead of
660		 * inaccurate results, lets just return error as invalid size
661		 * specified
662		 */
663		return (EINVAL); /* XXX breaks posix apps */
664	}
665
666	flags = uap->flags;
667	/* disallow contradictory flags */
668	if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
669		return (EINVAL);
670
671	if (flags & MS_KILLPAGES)
672	        sync_flags |= VM_SYNC_KILLPAGES;
673	if (flags & MS_DEACTIVATE)
674	        sync_flags |= VM_SYNC_DEACTIVATE;
675	if (flags & MS_INVALIDATE)
676	        sync_flags |= VM_SYNC_INVALIDATE;
677
678	if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
679	        if (flags & MS_ASYNC)
680		        sync_flags |= VM_SYNC_ASYNCHRONOUS;
681		else
682		        sync_flags |= VM_SYNC_SYNCHRONOUS;
683	}
684
685	sync_flags |= VM_SYNC_CONTIGUOUS;	/* complain if holes */
686
687	rv = mach_vm_msync(user_map, addr, size, sync_flags);
688
689	switch (rv) {
690	case KERN_SUCCESS:
691		break;
692	case KERN_INVALID_ADDRESS:	/* hole in region being sync'ed */
693		return (ENOMEM);
694	case KERN_FAILURE:
695		return (EIO);
696	default:
697		return (EINVAL);
698	}
699	return (0);
700}
701
702
703int
704munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
705{
706	mach_vm_offset_t	user_addr;
707	mach_vm_size_t		user_size;
708	kern_return_t		result;
709	vm_map_t		user_map;
710
711	user_map = current_map();
712	user_addr = (mach_vm_offset_t) uap->addr;
713	user_size = (mach_vm_size_t) uap->len;
714
715	AUDIT_ARG(addr, user_addr);
716	AUDIT_ARG(len, user_size);
717
718	if (user_addr & vm_map_page_mask(user_map)) {
719		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
720		return EINVAL;
721	}
722
723	if (user_addr + user_size < user_addr)
724		return(EINVAL);
725
726	if (user_size == 0) {
727		/* UNIX SPEC: size is 0, return EINVAL */
728		return EINVAL;
729	}
730
731	result = mach_vm_deallocate(user_map, user_addr, user_size);
732	if (result != KERN_SUCCESS) {
733		return(EINVAL);
734	}
735	return(0);
736}
737
738int
739mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
740{
741	register vm_prot_t prot;
742	mach_vm_offset_t	user_addr;
743	mach_vm_size_t	user_size;
744	kern_return_t	result;
745	vm_map_t	user_map;
746#if CONFIG_MACF
747	int error;
748#endif
749
750	AUDIT_ARG(addr, uap->addr);
751	AUDIT_ARG(len, uap->len);
752	AUDIT_ARG(value32, uap->prot);
753
754	user_map = current_map();
755	user_addr = (mach_vm_offset_t) uap->addr;
756	user_size = (mach_vm_size_t) uap->len;
757	prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
758
759	if (user_addr & vm_map_page_mask(user_map)) {
760		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
761		return EINVAL;
762	}
763
764#ifdef notyet
765/* Hmm .. */
766#if defined(VM_PROT_READ_IS_EXEC)
767	if (prot & VM_PROT_READ)
768		prot |= VM_PROT_EXECUTE;
769#endif
770#endif /* notyet */
771
772#if 3936456
773	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
774		prot |= VM_PROT_READ;
775#endif	/* 3936456 */
776
777#if CONFIG_MACF
778	/*
779	 * The MAC check for mprotect is of limited use for 2 reasons:
780	 * Without mmap revocation, the caller could have asked for the max
781	 * protections initially instead of a reduced set, so a mprotect
782	 * check would offer no new security.
783	 * It is not possible to extract the vnode from the pager object(s)
784	 * of the target memory range.
785	 * However, the MAC check may be used to prevent a process from,
786	 * e.g., making the stack executable.
787	 */
788	error = mac_proc_check_mprotect(p, user_addr,
789	    		user_size, prot);
790	if (error)
791		return (error);
792#endif
793
794	if(prot & VM_PROT_TRUSTED) {
795#if CONFIG_DYNAMIC_CODE_SIGNING
796		/* CODE SIGNING ENFORCEMENT - JIT support */
797		/* The special protection value VM_PROT_TRUSTED requests that we treat
798		 * this page as if it had a valid code signature.
799		 * If this is enabled, there MUST be a MAC policy implementing the
800		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
801		 * compromised because the check would always succeed and thusly any
802		 * process could sign dynamically. */
803		result = vm_map_sign(
804			user_map,
805			vm_map_trunc_page(user_addr,
806					  vm_map_page_mask(user_map)),
807			vm_map_round_page(user_addr+user_size,
808					  vm_map_page_mask(user_map)));
809		switch (result) {
810			case KERN_SUCCESS:
811				break;
812			case KERN_INVALID_ADDRESS:
813				/* UNIX SPEC: for an invalid address range, return ENOMEM */
814				return ENOMEM;
815			default:
816				return EINVAL;
817		}
818#else
819		return ENOTSUP;
820#endif
821	}
822	prot &= ~VM_PROT_TRUSTED;
823
824	result = mach_vm_protect(user_map, user_addr, user_size,
825				 FALSE, prot);
826	switch (result) {
827	case KERN_SUCCESS:
828		return (0);
829	case KERN_PROTECTION_FAILURE:
830		return (EACCES);
831	case KERN_INVALID_ADDRESS:
832		/* UNIX SPEC: for an invalid address range, return ENOMEM */
833		return ENOMEM;
834	}
835	return (EINVAL);
836}
837
838
839int
840minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
841{
842	mach_vm_offset_t addr;
843	mach_vm_size_t size;
844	register vm_inherit_t inherit;
845	vm_map_t	user_map;
846	kern_return_t	result;
847
848	AUDIT_ARG(addr, uap->addr);
849	AUDIT_ARG(len, uap->len);
850	AUDIT_ARG(value32, uap->inherit);
851
852	addr = (mach_vm_offset_t)uap->addr;
853	size = (mach_vm_size_t)uap->len;
854	inherit = uap->inherit;
855
856	user_map = current_map();
857	result = mach_vm_inherit(user_map, addr, size,
858				inherit);
859	switch (result) {
860	case KERN_SUCCESS:
861		return (0);
862	case KERN_PROTECTION_FAILURE:
863		return (EACCES);
864	}
865	return (EINVAL);
866}
867
868int
869madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
870{
871	vm_map_t user_map;
872	mach_vm_offset_t start;
873	mach_vm_size_t size;
874	vm_behavior_t new_behavior;
875	kern_return_t	result;
876
877	/*
878	 * Since this routine is only advisory, we default to conservative
879	 * behavior.
880	 */
881	switch (uap->behav) {
882		case MADV_RANDOM:
883			new_behavior = VM_BEHAVIOR_RANDOM;
884			break;
885		case MADV_SEQUENTIAL:
886			new_behavior = VM_BEHAVIOR_SEQUENTIAL;
887			break;
888		case MADV_NORMAL:
889			new_behavior = VM_BEHAVIOR_DEFAULT;
890			break;
891		case MADV_WILLNEED:
892			new_behavior = VM_BEHAVIOR_WILLNEED;
893			break;
894		case MADV_DONTNEED:
895			new_behavior = VM_BEHAVIOR_DONTNEED;
896			break;
897		case MADV_FREE:
898			new_behavior = VM_BEHAVIOR_FREE;
899			break;
900		case MADV_ZERO_WIRED_PAGES:
901			new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
902			break;
903		case MADV_FREE_REUSABLE:
904			new_behavior = VM_BEHAVIOR_REUSABLE;
905			break;
906		case MADV_FREE_REUSE:
907			new_behavior = VM_BEHAVIOR_REUSE;
908			break;
909		case MADV_CAN_REUSE:
910			new_behavior = VM_BEHAVIOR_CAN_REUSE;
911			break;
912		default:
913			return(EINVAL);
914	}
915
916	start = (mach_vm_offset_t) uap->addr;
917	size = (mach_vm_size_t) uap->len;
918
919	user_map = current_map();
920
921	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
922	switch (result) {
923	case KERN_SUCCESS:
924		return 0;
925	case KERN_INVALID_ADDRESS:
926		return EINVAL;
927	case KERN_NO_SPACE:
928		return ENOMEM;
929	}
930
931	return EINVAL;
932}
933
934int
935mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
936{
937	mach_vm_offset_t addr, first_addr, end;
938	vm_map_t map;
939	user_addr_t vec;
940	int error;
941	int vecindex, lastvecindex;
942	int mincoreinfo=0;
943	int pqueryinfo;
944	kern_return_t	ret;
945	int numref;
946
947	char c;
948
949	map = current_map();
950
951	/*
952	 * Make sure that the addresses presented are valid for user
953	 * mode.
954	 */
955	first_addr = addr = vm_map_trunc_page(uap->addr,
956					      vm_map_page_mask(map));
957	end = addr + vm_map_round_page(uap->len,
958				       vm_map_page_mask(map));
959
960	if (end < addr)
961		return (EINVAL);
962
963	/*
964	 * Address of byte vector
965	 */
966	vec = uap->vec;
967
968	map = current_map();
969
970	/*
971	 * Do this on a map entry basis so that if the pages are not
972	 * in the current processes address space, we can easily look
973	 * up the pages elsewhere.
974	 */
975	lastvecindex = -1;
976	for( ; addr < end; addr += PAGE_SIZE ) {
977		pqueryinfo = 0;
978		ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
979		if (ret != KERN_SUCCESS)
980			pqueryinfo = 0;
981		mincoreinfo = 0;
982		if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
983			mincoreinfo |= MINCORE_INCORE;
984		if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
985			mincoreinfo |= MINCORE_REFERENCED;
986		if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
987			mincoreinfo |= MINCORE_MODIFIED;
988
989
990		/*
991		 * calculate index into user supplied byte vector
992		 */
993		vecindex = (addr - first_addr)>> PAGE_SHIFT;
994
995		/*
996		 * If we have skipped map entries, we need to make sure that
997		 * the byte vector is zeroed for those skipped entries.
998		 */
999		while((lastvecindex + 1) < vecindex) {
1000			c = 0;
1001			error = copyout(&c, vec + lastvecindex, 1);
1002			if (error) {
1003				return (EFAULT);
1004			}
1005			++lastvecindex;
1006		}
1007
1008		/*
1009		 * Pass the page information to the user
1010		 */
1011		c = (char)mincoreinfo;
1012		error = copyout(&c, vec + vecindex, 1);
1013		if (error) {
1014			return (EFAULT);
1015		}
1016		lastvecindex = vecindex;
1017	}
1018
1019
1020	/*
1021	 * Zero the last entries in the byte vector.
1022	 */
1023	vecindex = (end - first_addr) >> PAGE_SHIFT;
1024	while((lastvecindex + 1) < vecindex) {
1025		c = 0;
1026		error = copyout(&c, vec + lastvecindex, 1);
1027		if (error) {
1028			return (EFAULT);
1029		}
1030		++lastvecindex;
1031	}
1032
1033	return (0);
1034}
1035
1036int
1037mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1038{
1039	vm_map_t user_map;
1040	vm_map_offset_t addr;
1041	vm_map_size_t size, pageoff;
1042	kern_return_t	result;
1043
1044	AUDIT_ARG(addr, uap->addr);
1045	AUDIT_ARG(len, uap->len);
1046
1047	addr = (vm_map_offset_t) uap->addr;
1048	size = (vm_map_size_t)uap->len;
1049
1050	/* disable wrap around */
1051	if (addr + size < addr)
1052		return (EINVAL);
1053
1054	if (size == 0)
1055		return (0);
1056
1057	user_map = current_map();
1058	pageoff = (addr & vm_map_page_mask(user_map));
1059	addr -= pageoff;
1060	size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map));
1061
1062	/* have to call vm_map_wire directly to pass "I don't know" protections */
1063	result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
1064
1065	if (result == KERN_RESOURCE_SHORTAGE)
1066		return EAGAIN;
1067	else if (result != KERN_SUCCESS)
1068		return ENOMEM;
1069
1070	return 0;	/* KERN_SUCCESS */
1071}
1072
1073int
1074munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1075{
1076	mach_vm_offset_t addr;
1077	mach_vm_size_t size;
1078	vm_map_t user_map;
1079	kern_return_t	result;
1080
1081	AUDIT_ARG(addr, uap->addr);
1082	AUDIT_ARG(addr, uap->len);
1083
1084	addr = (mach_vm_offset_t) uap->addr;
1085	size = (mach_vm_size_t)uap->len;
1086	user_map = current_map();
1087
1088	/* JMM - need to remove all wirings by spec - this just removes one */
1089	result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1090	return (result == KERN_SUCCESS ? 0 : ENOMEM);
1091}
1092
1093
1094int
1095mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1096{
1097	return (ENOSYS);
1098}
1099
1100int
1101munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1102{
1103	return(ENOSYS);
1104}
1105
1106#if CONFIG_CODE_DECRYPTION
1107int
1108mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1109{
1110    mach_vm_offset_t	user_addr;
1111    mach_vm_size_t	user_size;
1112    kern_return_t	result;
1113    vm_map_t	user_map;
1114    uint32_t	cryptid;
1115    cpu_type_t	cputype;
1116    cpu_subtype_t	cpusubtype;
1117    pager_crypt_info_t crypt_info;
1118    const char * cryptname = 0;
1119    char *vpath;
1120    int len, ret;
1121    struct proc_regioninfo_internal pinfo;
1122    vnode_t vp;
1123    uintptr_t vnodeaddr;
1124    uint32_t vid;
1125
1126    AUDIT_ARG(addr, uap->addr);
1127    AUDIT_ARG(len, uap->len);
1128
1129    user_map = current_map();
1130    user_addr = (mach_vm_offset_t) uap->addr;
1131    user_size = (mach_vm_size_t) uap->len;
1132
1133    cryptid = uap->cryptid;
1134    cputype = uap->cputype;
1135    cpusubtype = uap->cpusubtype;
1136
1137    if (user_addr & vm_map_page_mask(user_map)) {
1138        /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1139        return EINVAL;
1140    }
1141
1142    switch(cryptid) {
1143        case 0:
1144            /* not encrypted, just an empty load command */
1145            return 0;
1146        case 1:
1147            cryptname="com.apple.unfree";
1148            break;
1149        case 0x10:
1150            /* some random cryptid that you could manually put into
1151             * your binary if you want NULL */
1152            cryptname="com.apple.null";
1153            break;
1154        default:
1155            return EINVAL;
1156    }
1157
1158    if (NULL == text_crypter_create) return ENOTSUP;
1159
1160    ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1161    if (ret == 0 || !vnodeaddr) {
1162        /* No really, this returns 0 if the memory address is not backed by a file */
1163        return (EINVAL);
1164    }
1165
1166    vp = (vnode_t)vnodeaddr;
1167    if ((vnode_getwithvid(vp, vid)) == 0) {
1168        MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1169        if(vpath == NULL) {
1170            vnode_put(vp);
1171            return (ENOMEM);
1172        }
1173
1174        len = MAXPATHLEN;
1175        ret = vn_getpath(vp, vpath, &len);
1176        if(ret) {
1177            FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1178            vnode_put(vp);
1179            return (ret);
1180        }
1181
1182        vnode_put(vp);
1183    } else {
1184        return (EINVAL);
1185    }
1186
1187#if 0
1188    kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1189            __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1190#endif
1191
1192    /* set up decrypter first */
1193    crypt_file_data_t crypt_data = {
1194        .filename = vpath,
1195        .cputype = cputype,
1196        .cpusubtype = cpusubtype };
1197    result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1198    FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1199
1200    if(result) {
1201        printf("%s: unable to create decrypter %s, kr=%d\n",
1202               __FUNCTION__, cryptname, result);
1203        if (result == kIOReturnNotPrivileged) {
1204            /* text encryption returned decryption failure */
1205            return (EPERM);
1206        } else {
1207            return (ENOMEM);
1208        }
1209    }
1210
1211    /* now remap using the decrypter */
1212    result = vm_map_apple_protected(user_map, user_addr, user_addr+user_size, &crypt_info);
1213    if (result) {
1214        printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1215        crypt_info.crypt_end(crypt_info.crypt_ops);
1216        return (EPERM);
1217    }
1218
1219    return 0;
1220}
1221#endif /* CONFIG_CODE_DECRYPTION */
1222