1/*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections.  This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76/*
77 * Mapped file (mmap) interface to VM
78 */
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/filedesc.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/resourcevar.h>
86#include <sys/vnode_internal.h>
87#include <sys/acct.h>
88#include <sys/wait.h>
89#include <sys/file_internal.h>
90#include <sys/vadvise.h>
91#include <sys/trace.h>
92#include <sys/mman.h>
93#include <sys/conf.h>
94#include <sys/stat.h>
95#include <sys/ubc.h>
96#include <sys/ubc_internal.h>
97#include <sys/sysproto.h>
98#if CONFIG_PROTECT
99#include <sys/cprotect.h>
100#endif
101
102#include <sys/syscall.h>
103#include <sys/kdebug.h>
104
105#include <security/audit/audit.h>
106#include <bsm/audit_kevents.h>
107
108#include <mach/mach_types.h>
109#include <mach/mach_traps.h>
110#include <mach/vm_sync.h>
111#include <mach/vm_behavior.h>
112#include <mach/vm_inherit.h>
113#include <mach/vm_statistics.h>
114#include <mach/mach_vm.h>
115#include <mach/vm_map.h>
116#include <mach/host_priv.h>
117
118#include <machine/machine_routines.h>
119
120#include <kern/cpu_number.h>
121#include <kern/host.h>
122#include <kern/task.h>
123
124#include <vm/vm_map.h>
125#include <vm/vm_kern.h>
126#include <vm/vm_pager.h>
127#include <vm/vm_protos.h>
128
129/* XXX the following function should probably be static */
130kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
131				boolean_t, vm_size_t);
132
133/*
134 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
135 * XXX usage is PROT_* from an interface perspective.  Thus the values of
136 * XXX VM_PROT_* and PROT_* need to correspond.
137 */
138int
139mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
140{
141	/*
142	 *	Map in special device (must be SHARED) or file
143	 */
144	struct fileproc *fp;
145	register struct		vnode *vp;
146	int			flags;
147	int			prot;
148	int			err=0;
149	vm_map_t		user_map;
150	kern_return_t		result;
151	vm_map_offset_t		user_addr;
152	vm_map_size_t		user_size;
153	vm_object_offset_t	pageoff;
154	vm_object_offset_t	file_pos;
155	int			alloc_flags=0;
156	boolean_t		docow;
157	vm_prot_t		maxprot;
158	void 			*handle;
159	memory_object_t		pager = MEMORY_OBJECT_NULL;
160	memory_object_control_t	 control;
161	int 			mapanon=0;
162	int 			fpref=0;
163	int error =0;
164	int fd = uap->fd;
165	int num_retries = 0;
166
167	user_addr = (vm_map_offset_t)uap->addr;
168	user_size = (vm_map_size_t) uap->len;
169
170	AUDIT_ARG(addr, user_addr);
171	AUDIT_ARG(len, user_size);
172	AUDIT_ARG(fd, uap->fd);
173
174	prot = (uap->prot & VM_PROT_ALL);
175#if 3777787
176	/*
177	 * Since the hardware currently does not support writing without
178	 * read-before-write, or execution-without-read, if the request is
179	 * for write or execute access, we must imply read access as well;
180	 * otherwise programs expecting this to work will fail to operate.
181	 */
182	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
183		prot |= VM_PROT_READ;
184#endif	/* radar 3777787 */
185
186	flags = uap->flags;
187	vp = NULLVP;
188
189	/*
190	 * The vm code does not have prototypes & compiler doesn't do the'
191	 * the right thing when you cast 64bit value and pass it in function
192	 * call. So here it is.
193	 */
194	file_pos = (vm_object_offset_t)uap->pos;
195
196
197	/* make sure mapping fits into numeric range etc */
198	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
199		return (EINVAL);
200
201	/*
202	 * Align the file position to a page boundary,
203	 * and save its page offset component.
204	 */
205	pageoff = (file_pos & PAGE_MASK);
206	file_pos -= (vm_object_offset_t)pageoff;
207
208
209	/* Adjust size for rounding (on both ends). */
210	user_size += pageoff;			/* low end... */
211	user_size = mach_vm_round_page(user_size);	/* hi end */
212
213	if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){
214		return EINVAL;
215	}
216	/*
217	 * Check for illegal addresses.  Watch out for address wrap... Note
218	 * that VM_*_ADDRESS are not constants due to casts (argh).
219	 */
220	if (flags & MAP_FIXED) {
221		/*
222		 * The specified address must have the same remainder
223		 * as the file offset taken modulo PAGE_SIZE, so it
224		 * should be aligned after adjustment by pageoff.
225		 */
226		user_addr -= pageoff;
227		if (user_addr & PAGE_MASK)
228			return (EINVAL);
229	}
230#ifdef notyet
231	/* DO not have apis to get this info, need to wait till then*/
232	/*
233	 * XXX for non-fixed mappings where no hint is provided or
234	 * the hint would fall in the potential heap space,
235	 * place it after the end of the largest possible heap.
236	 *
237	 * There should really be a pmap call to determine a reasonable
238	 * location.
239	 */
240	else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
241		addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
242
243#endif
244
245	alloc_flags = 0;
246
247	if (flags & MAP_ANON) {
248
249		maxprot = VM_PROT_ALL;
250#if CONFIG_MACF
251		/*
252		 * Entitlement check.
253		 */
254		error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
255		if (error) {
256			return EINVAL;
257		}
258#endif /* MAC */
259
260		/*
261		 * Mapping blank space is trivial.  Use positive fds as the alias
262		 * value for memory tracking.
263		 */
264		if (fd != -1) {
265			/*
266			 * Use "fd" to pass (some) Mach VM allocation flags,
267			 * (see the VM_FLAGS_* definitions).
268			 */
269			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
270					    VM_FLAGS_PURGABLE);
271			if (alloc_flags != fd) {
272				/* reject if there are any extra flags */
273				return EINVAL;
274			}
275		}
276
277		handle = NULL;
278		file_pos = 0;
279		mapanon = 1;
280	} else {
281		struct vnode_attr va;
282		vfs_context_t ctx = vfs_context_current();
283
284		if (flags & MAP_JIT)
285			return EINVAL;
286
287		/*
288		 * Mapping file, get fp for validation. Obtain vnode and make
289		 * sure it is of appropriate type.
290		 */
291		err = fp_lookup(p, fd, &fp, 0);
292		if (err)
293			return(err);
294		fpref = 1;
295		if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
296			uap->addr = (user_addr_t)user_addr;
297			uap->len = (user_size_t)user_size;
298			uap->prot = prot;
299			uap->flags = flags;
300			uap->pos = file_pos;
301			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
302			goto bad;
303		}
304
305		if (fp->f_fglob->fg_type != DTYPE_VNODE) {
306			error = EINVAL;
307			goto bad;
308		}
309		vp = (struct vnode *)fp->f_fglob->fg_data;
310		error = vnode_getwithref(vp);
311		if(error != 0)
312			goto bad;
313
314		if (vp->v_type != VREG && vp->v_type != VCHR) {
315			(void)vnode_put(vp);
316			error = EINVAL;
317			goto bad;
318		}
319
320		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
321
322		/*
323		 * POSIX: mmap needs to update access time for mapped files
324		 */
325		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
326			VATTR_INIT(&va);
327			nanotime(&va.va_access_time);
328			VATTR_SET_ACTIVE(&va, va_access_time);
329			vnode_setattr(vp, &va, ctx);
330		}
331
332		/*
333		 * XXX hack to handle use of /dev/zero to map anon memory (ala
334		 * SunOS).
335		 */
336		if (vp->v_type == VCHR || vp->v_type == VSTR) {
337			(void)vnode_put(vp);
338			error = ENODEV;
339			goto bad;
340		} else {
341			/*
342			 * Ensure that file and memory protections are
343			 * compatible.  Note that we only worry about
344			 * writability if mapping is shared; in this case,
345			 * current and max prot are dictated by the open file.
346			 * XXX use the vnode instead?  Problem is: what
347			 * credentials do we use for determination? What if
348			 * proc does a setuid?
349			 */
350			maxprot = VM_PROT_EXECUTE;	/* ??? */
351			if (fp->f_fglob->fg_flag & FREAD)
352				maxprot |= VM_PROT_READ;
353			else if (prot & PROT_READ) {
354				(void)vnode_put(vp);
355				error = EACCES;
356				goto bad;
357			}
358			/*
359			 * If we are sharing potential changes (either via
360			 * MAP_SHARED or via the implicit sharing of character
361			 * device mappings), and we are trying to get write
362			 * permission although we opened it without asking
363			 * for it, bail out.
364			 */
365
366			if ((flags & MAP_SHARED) != 0) {
367				if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
368				    /*
369				     * Do not allow writable mappings of
370				     * swap files (see vm_swapfile_pager.c).
371				     */
372				    !vnode_isswap(vp)) {
373 					/*
374 					 * check for write access
375 					 *
376 					 * Note that we already made this check when granting FWRITE
377 					 * against the file, so it seems redundant here.
378 					 */
379 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
380
381 					/* if not granted for any reason, but we wanted it, bad */
382 					if ((prot & PROT_WRITE) && (error != 0)) {
383 						vnode_put(vp);
384  						goto bad;
385  					}
386
387 					/* if writable, remember */
388 					if (error == 0)
389  						maxprot |= VM_PROT_WRITE;
390
391				} else if ((prot & PROT_WRITE) != 0) {
392					(void)vnode_put(vp);
393					error = EACCES;
394					goto bad;
395				}
396			} else
397				maxprot |= VM_PROT_WRITE;
398
399			handle = (void *)vp;
400#if CONFIG_MACF
401			error = mac_file_check_mmap(vfs_context_ucred(ctx),
402			    fp->f_fglob, prot, flags, &maxprot);
403			if (error) {
404				(void)vnode_put(vp);
405				goto bad;
406			}
407#endif /* MAC */
408
409#if CONFIG_PROTECT
410			{
411				error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
412				if (error) {
413					(void) vnode_put(vp);
414					goto bad;
415				}
416			}
417#endif /* CONFIG_PROTECT */
418
419
420		}
421	}
422
423	if (user_size == 0)  {
424		if (!mapanon)
425			(void)vnode_put(vp);
426		error = 0;
427		goto bad;
428	}
429
430	/*
431	 *	We bend a little - round the start and end addresses
432	 *	to the nearest page boundary.
433	 */
434	user_size = mach_vm_round_page(user_size);
435
436	if (file_pos & PAGE_MASK_64) {
437		if (!mapanon)
438			(void)vnode_put(vp);
439		error = EINVAL;
440		goto bad;
441	}
442
443	user_map = current_map();
444
445	if ((flags & MAP_FIXED) == 0) {
446		alloc_flags |= VM_FLAGS_ANYWHERE;
447		user_addr = mach_vm_round_page(user_addr);
448	} else {
449		if (user_addr != mach_vm_trunc_page(user_addr)) {
450		        if (!mapanon)
451			        (void)vnode_put(vp);
452			error = EINVAL;
453			goto bad;
454		}
455		/*
456		 * mmap(MAP_FIXED) will replace any existing mappings in the
457		 * specified range, if the new mapping is successful.
458		 * If we just deallocate the specified address range here,
459		 * another thread might jump in and allocate memory in that
460		 * range before we get a chance to establish the new mapping,
461		 * and we won't have a chance to restore the old mappings.
462		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
463		 * has to deallocate the existing mappings and establish the
464		 * new ones atomically.
465		 */
466		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
467	}
468
469	if (flags & MAP_NOCACHE)
470		alloc_flags |= VM_FLAGS_NO_CACHE;
471
472	if (flags & MAP_JIT){
473		alloc_flags |= VM_FLAGS_MAP_JIT;
474	}
475	/*
476	 * Lookup/allocate object.
477	 */
478	if (handle == NULL) {
479		control = NULL;
480#ifdef notyet
481/* Hmm .. */
482#if defined(VM_PROT_READ_IS_EXEC)
483		if (prot & VM_PROT_READ)
484			prot |= VM_PROT_EXECUTE;
485		if (maxprot & VM_PROT_READ)
486			maxprot |= VM_PROT_EXECUTE;
487#endif
488#endif
489
490#if 3777787
491		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
492			prot |= VM_PROT_READ;
493		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
494			maxprot |= VM_PROT_READ;
495#endif	/* radar 3777787 */
496map_anon_retry:
497		result = vm_map_enter_mem_object(user_map,
498						 &user_addr, user_size,
499						 0, alloc_flags,
500						 IPC_PORT_NULL, 0, FALSE,
501						 prot, maxprot,
502						 (flags & MAP_SHARED) ?
503						 VM_INHERIT_SHARE :
504						 VM_INHERIT_DEFAULT);
505
506		/* If a non-binding address was specified for this anonymous
507		 * mapping, retry the mapping with a zero base
508		 * in the event the mapping operation failed due to
509		 * lack of space between the address and the map's maximum.
510		 */
511		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
512			user_addr = PAGE_SIZE;
513			goto map_anon_retry;
514		}
515	} else {
516		if (vnode_isswap(vp)) {
517			/*
518			 * Map swap files with a special pager
519			 * that returns obfuscated contents.
520			 */
521			control = NULL;
522			pager = swapfile_pager_setup(vp);
523			if (pager != MEMORY_OBJECT_NULL) {
524				control = swapfile_pager_control(pager);
525			}
526		} else {
527			control = ubc_getobject(vp, UBC_FLAGS_NONE);
528		}
529
530		if (control == NULL) {
531			(void)vnode_put(vp);
532			error = ENOMEM;
533			goto bad;
534		}
535
536		/*
537		 *  Set credentials:
538		 *	FIXME: if we're writing the file we need a way to
539		 *      ensure that someone doesn't replace our R/W creds
540		 * 	with ones that only work for read.
541		 */
542
543		ubc_setthreadcred(vp, p, current_thread());
544		docow = FALSE;
545		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
546			docow = TRUE;
547		}
548
549#ifdef notyet
550/* Hmm .. */
551#if defined(VM_PROT_READ_IS_EXEC)
552		if (prot & VM_PROT_READ)
553			prot |= VM_PROT_EXECUTE;
554		if (maxprot & VM_PROT_READ)
555			maxprot |= VM_PROT_EXECUTE;
556#endif
557#endif /* notyet */
558
559#if 3777787
560		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
561			prot |= VM_PROT_READ;
562		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
563			maxprot |= VM_PROT_READ;
564#endif	/* radar 3777787 */
565map_file_retry:
566		result = vm_map_enter_mem_object_control(user_map,
567						 &user_addr, user_size,
568						 0, alloc_flags,
569						 control, file_pos,
570						 docow, prot, maxprot,
571						 (flags & MAP_SHARED) ?
572						 VM_INHERIT_SHARE :
573						 VM_INHERIT_DEFAULT);
574
575		/* If a non-binding address was specified for this file backed
576		 * mapping, retry the mapping with a zero base
577		 * in the event the mapping operation failed due to
578		 * lack of space between the address and the map's maximum.
579		 */
580		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
581			user_addr = PAGE_SIZE;
582			goto map_file_retry;
583		}
584	}
585
586	if (!mapanon) {
587		(void)vnode_put(vp);
588	}
589
590	switch (result) {
591	case KERN_SUCCESS:
592		*retval = user_addr + pageoff;
593		error = 0;
594		break;
595	case KERN_INVALID_ADDRESS:
596	case KERN_NO_SPACE:
597		error =  ENOMEM;
598		break;
599	case KERN_PROTECTION_FAILURE:
600		error =  EACCES;
601		break;
602	default:
603		error =  EINVAL;
604		break;
605	}
606bad:
607	if (pager != MEMORY_OBJECT_NULL) {
608		/*
609		 * Release the reference on the pager.
610		 * If the mapping was successful, it now holds
611		 * an extra reference.
612		 */
613		memory_object_deallocate(pager);
614	}
615	if (fpref)
616		fp_drop(p, fd, fp, 0);
617
618	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
619	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
620			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
621	return(error);
622}
623
624int
625msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
626{
627	__pthread_testcancel(1);
628	return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
629}
630
631int
632msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
633{
634	mach_vm_offset_t addr;
635	mach_vm_size_t size;
636	int flags;
637	vm_map_t user_map;
638	int rv;
639	vm_sync_t sync_flags=0;
640
641	addr = (mach_vm_offset_t) uap->addr;
642	size = (mach_vm_size_t)uap->len;
643	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
644	if (addr & PAGE_MASK_64) {
645		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
646		return EINVAL;
647	}
648	if (size == 0) {
649		/*
650		 * We cannot support this properly without maintaining
651		 * list all mmaps done. Cannot use vm_map_entry as they could be
652		 * split or coalesced by indepenedant actions. So instead of
653		 * inaccurate results, lets just return error as invalid size
654		 * specified
655		 */
656		return (EINVAL); /* XXX breaks posix apps */
657	}
658
659	flags = uap->flags;
660	/* disallow contradictory flags */
661	if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
662		return (EINVAL);
663
664	if (flags & MS_KILLPAGES)
665	        sync_flags |= VM_SYNC_KILLPAGES;
666	if (flags & MS_DEACTIVATE)
667	        sync_flags |= VM_SYNC_DEACTIVATE;
668	if (flags & MS_INVALIDATE)
669	        sync_flags |= VM_SYNC_INVALIDATE;
670
671	if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
672	        if (flags & MS_ASYNC)
673		        sync_flags |= VM_SYNC_ASYNCHRONOUS;
674		else
675		        sync_flags |= VM_SYNC_SYNCHRONOUS;
676	}
677
678	sync_flags |= VM_SYNC_CONTIGUOUS;	/* complain if holes */
679
680	user_map = current_map();
681	rv = mach_vm_msync(user_map, addr, size, sync_flags);
682
683	switch (rv) {
684	case KERN_SUCCESS:
685		break;
686	case KERN_INVALID_ADDRESS:	/* hole in region being sync'ed */
687		return (ENOMEM);
688	case KERN_FAILURE:
689		return (EIO);
690	default:
691		return (EINVAL);
692	}
693	return (0);
694}
695
696
697int
698munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
699{
700	mach_vm_offset_t	user_addr;
701	mach_vm_size_t	user_size;
702	kern_return_t	result;
703
704	user_addr = (mach_vm_offset_t) uap->addr;
705	user_size = (mach_vm_size_t) uap->len;
706
707	AUDIT_ARG(addr, user_addr);
708	AUDIT_ARG(len, user_size);
709
710	if (user_addr & PAGE_MASK_64) {
711		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
712		return EINVAL;
713	}
714
715	if (user_addr + user_size < user_addr)
716		return(EINVAL);
717
718	if (user_size == 0) {
719		/* UNIX SPEC: size is 0, return EINVAL */
720		return EINVAL;
721	}
722
723	result = mach_vm_deallocate(current_map(), user_addr, user_size);
724	if (result != KERN_SUCCESS) {
725		return(EINVAL);
726	}
727	return(0);
728}
729
730int
731mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
732{
733	register vm_prot_t prot;
734	mach_vm_offset_t	user_addr;
735	mach_vm_size_t	user_size;
736	kern_return_t	result;
737	vm_map_t	user_map;
738#if CONFIG_MACF
739	int error;
740#endif
741
742	AUDIT_ARG(addr, uap->addr);
743	AUDIT_ARG(len, uap->len);
744	AUDIT_ARG(value32, uap->prot);
745
746	user_addr = (mach_vm_offset_t) uap->addr;
747	user_size = (mach_vm_size_t) uap->len;
748	prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
749
750	if (user_addr & PAGE_MASK_64) {
751		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
752		return EINVAL;
753	}
754
755#ifdef notyet
756/* Hmm .. */
757#if defined(VM_PROT_READ_IS_EXEC)
758	if (prot & VM_PROT_READ)
759		prot |= VM_PROT_EXECUTE;
760#endif
761#endif /* notyet */
762
763#if 3936456
764	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
765		prot |= VM_PROT_READ;
766#endif	/* 3936456 */
767
768	user_map = current_map();
769
770#if CONFIG_MACF
771	/*
772	 * The MAC check for mprotect is of limited use for 2 reasons:
773	 * Without mmap revocation, the caller could have asked for the max
774	 * protections initially instead of a reduced set, so a mprotect
775	 * check would offer no new security.
776	 * It is not possible to extract the vnode from the pager object(s)
777	 * of the target memory range.
778	 * However, the MAC check may be used to prevent a process from,
779	 * e.g., making the stack executable.
780	 */
781	error = mac_proc_check_mprotect(p, user_addr,
782	    		user_size, prot);
783	if (error)
784		return (error);
785#endif
786
787	if(prot & VM_PROT_TRUSTED) {
788#if CONFIG_DYNAMIC_CODE_SIGNING
789		/* CODE SIGNING ENFORCEMENT - JIT support */
790		/* The special protection value VM_PROT_TRUSTED requests that we treat
791		 * this page as if it had a valid code signature.
792		 * If this is enabled, there MUST be a MAC policy implementing the
793		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
794		 * compromised because the check would always succeed and thusly any
795		 * process could sign dynamically. */
796		result = vm_map_sign(user_map,
797				     vm_map_trunc_page(user_addr),
798				     vm_map_round_page(user_addr+user_size));
799		switch (result) {
800			case KERN_SUCCESS:
801				break;
802			case KERN_INVALID_ADDRESS:
803				/* UNIX SPEC: for an invalid address range, return ENOMEM */
804				return ENOMEM;
805			default:
806				return EINVAL;
807		}
808#else
809		return ENOTSUP;
810#endif
811	}
812	prot &= ~VM_PROT_TRUSTED;
813
814	result = mach_vm_protect(user_map, user_addr, user_size,
815				 FALSE, prot);
816	switch (result) {
817	case KERN_SUCCESS:
818		return (0);
819	case KERN_PROTECTION_FAILURE:
820		return (EACCES);
821	case KERN_INVALID_ADDRESS:
822		/* UNIX SPEC: for an invalid address range, return ENOMEM */
823		return ENOMEM;
824	}
825	return (EINVAL);
826}
827
828
829int
830minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
831{
832	mach_vm_offset_t addr;
833	mach_vm_size_t size;
834	register vm_inherit_t inherit;
835	vm_map_t	user_map;
836	kern_return_t	result;
837
838	AUDIT_ARG(addr, uap->addr);
839	AUDIT_ARG(len, uap->len);
840	AUDIT_ARG(value32, uap->inherit);
841
842	addr = (mach_vm_offset_t)uap->addr;
843	size = (mach_vm_size_t)uap->len;
844	inherit = uap->inherit;
845
846	user_map = current_map();
847	result = mach_vm_inherit(user_map, addr, size,
848				inherit);
849	switch (result) {
850	case KERN_SUCCESS:
851		return (0);
852	case KERN_PROTECTION_FAILURE:
853		return (EACCES);
854	}
855	return (EINVAL);
856}
857
858int
859madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
860{
861	vm_map_t user_map;
862	mach_vm_offset_t start;
863	mach_vm_size_t size;
864	vm_behavior_t new_behavior;
865	kern_return_t	result;
866
867	/*
868	 * Since this routine is only advisory, we default to conservative
869	 * behavior.
870	 */
871	switch (uap->behav) {
872		case MADV_RANDOM:
873			new_behavior = VM_BEHAVIOR_RANDOM;
874			break;
875		case MADV_SEQUENTIAL:
876			new_behavior = VM_BEHAVIOR_SEQUENTIAL;
877			break;
878		case MADV_NORMAL:
879			new_behavior = VM_BEHAVIOR_DEFAULT;
880			break;
881		case MADV_WILLNEED:
882			new_behavior = VM_BEHAVIOR_WILLNEED;
883			break;
884		case MADV_DONTNEED:
885			new_behavior = VM_BEHAVIOR_DONTNEED;
886			break;
887		case MADV_FREE:
888			new_behavior = VM_BEHAVIOR_FREE;
889			break;
890		case MADV_ZERO_WIRED_PAGES:
891			new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
892			break;
893		case MADV_FREE_REUSABLE:
894			new_behavior = VM_BEHAVIOR_REUSABLE;
895			break;
896		case MADV_FREE_REUSE:
897			new_behavior = VM_BEHAVIOR_REUSE;
898			break;
899		case MADV_CAN_REUSE:
900			new_behavior = VM_BEHAVIOR_CAN_REUSE;
901			break;
902		default:
903			return(EINVAL);
904	}
905
906	start = (mach_vm_offset_t) uap->addr;
907	size = (mach_vm_size_t) uap->len;
908
909	user_map = current_map();
910
911	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
912	switch (result) {
913	case KERN_SUCCESS:
914		return 0;
915	case KERN_INVALID_ADDRESS:
916		return EINVAL;
917	case KERN_NO_SPACE:
918		return ENOMEM;
919	}
920
921	return EINVAL;
922}
923
924int
925mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
926{
927	mach_vm_offset_t addr, first_addr, end;
928	vm_map_t map;
929	user_addr_t vec;
930	int error;
931	int vecindex, lastvecindex;
932	int mincoreinfo=0;
933	int pqueryinfo;
934	kern_return_t	ret;
935	int numref;
936
937	char c;
938
939	map = current_map();
940
941	/*
942	 * Make sure that the addresses presented are valid for user
943	 * mode.
944	 */
945	first_addr = addr = mach_vm_trunc_page(uap->addr);
946	end = addr + mach_vm_round_page(uap->len);
947
948	if (end < addr)
949		return (EINVAL);
950
951	/*
952	 * Address of byte vector
953	 */
954	vec = uap->vec;
955
956	map = current_map();
957
958	/*
959	 * Do this on a map entry basis so that if the pages are not
960	 * in the current processes address space, we can easily look
961	 * up the pages elsewhere.
962	 */
963	lastvecindex = -1;
964	for( ; addr < end; addr += PAGE_SIZE ) {
965		pqueryinfo = 0;
966		ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
967		if (ret != KERN_SUCCESS)
968			pqueryinfo = 0;
969		mincoreinfo = 0;
970		if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
971			mincoreinfo |= MINCORE_INCORE;
972		if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
973			mincoreinfo |= MINCORE_REFERENCED;
974		if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
975			mincoreinfo |= MINCORE_MODIFIED;
976
977
978		/*
979		 * calculate index into user supplied byte vector
980		 */
981		vecindex = (addr - first_addr)>> PAGE_SHIFT;
982
983		/*
984		 * If we have skipped map entries, we need to make sure that
985		 * the byte vector is zeroed for those skipped entries.
986		 */
987		while((lastvecindex + 1) < vecindex) {
988			c = 0;
989			error = copyout(&c, vec + lastvecindex, 1);
990			if (error) {
991				return (EFAULT);
992			}
993			++lastvecindex;
994		}
995
996		/*
997		 * Pass the page information to the user
998		 */
999		c = (char)mincoreinfo;
1000		error = copyout(&c, vec + vecindex, 1);
1001		if (error) {
1002			return (EFAULT);
1003		}
1004		lastvecindex = vecindex;
1005	}
1006
1007
1008	/*
1009	 * Zero the last entries in the byte vector.
1010	 */
1011	vecindex = (end - first_addr) >> PAGE_SHIFT;
1012	while((lastvecindex + 1) < vecindex) {
1013		c = 0;
1014		error = copyout(&c, vec + lastvecindex, 1);
1015		if (error) {
1016			return (EFAULT);
1017		}
1018		++lastvecindex;
1019	}
1020
1021	return (0);
1022}
1023
1024int
1025mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1026{
1027	vm_map_t user_map;
1028	vm_map_offset_t addr;
1029	vm_map_size_t size, pageoff;
1030	kern_return_t	result;
1031
1032	AUDIT_ARG(addr, uap->addr);
1033	AUDIT_ARG(len, uap->len);
1034
1035	addr = (vm_map_offset_t) uap->addr;
1036	size = (vm_map_size_t)uap->len;
1037
1038	/* disable wrap around */
1039	if (addr + size < addr)
1040		return (EINVAL);
1041
1042	if (size == 0)
1043		return (0);
1044
1045	pageoff = (addr & PAGE_MASK);
1046	addr -= pageoff;
1047	size = vm_map_round_page(size+pageoff);
1048	user_map = current_map();
1049
1050	/* have to call vm_map_wire directly to pass "I don't know" protections */
1051	result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
1052
1053	if (result == KERN_RESOURCE_SHORTAGE)
1054		return EAGAIN;
1055	else if (result != KERN_SUCCESS)
1056		return ENOMEM;
1057
1058	return 0;	/* KERN_SUCCESS */
1059}
1060
1061int
1062munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1063{
1064	mach_vm_offset_t addr;
1065	mach_vm_size_t size;
1066	vm_map_t user_map;
1067	kern_return_t	result;
1068
1069	AUDIT_ARG(addr, uap->addr);
1070	AUDIT_ARG(addr, uap->len);
1071
1072	addr = (mach_vm_offset_t) uap->addr;
1073	size = (mach_vm_size_t)uap->len;
1074	user_map = current_map();
1075
1076	/* JMM - need to remove all wirings by spec - this just removes one */
1077	result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1078	return (result == KERN_SUCCESS ? 0 : ENOMEM);
1079}
1080
1081
1082int
1083mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1084{
1085	return (ENOSYS);
1086}
1087
1088int
1089munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1090{
1091	return(ENOSYS);
1092}
1093
1094#if		!defined(CONFIG_EMBEDDED)
1095/* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1096kern_return_t
1097map_fd(struct map_fd_args *args)
1098{
1099	int		fd = args->fd;
1100	vm_offset_t	offset = args->offset;
1101	vm_offset_t	*va = args->va;
1102	boolean_t	findspace = args->findspace;
1103	vm_size_t	size = args->size;
1104	kern_return_t ret;
1105
1106	AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
1107	AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va));
1108	AUDIT_ARG(fd, fd);
1109
1110	ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1111
1112	AUDIT_MACH_SYSCALL_EXIT(ret);
1113	return ret;
1114}
1115
1116kern_return_t
1117map_fd_funneled(
1118	int			fd,
1119	vm_object_offset_t	offset,
1120	vm_offset_t		*va,
1121	boolean_t		findspace,
1122	vm_size_t		size)
1123{
1124	kern_return_t	result;
1125	struct fileproc	*fp;
1126	struct vnode	*vp;
1127	void *	pager;
1128	vm_offset_t	map_addr=0;
1129	vm_size_t	map_size;
1130	int		err=0;
1131	vm_prot_t	maxprot = VM_PROT_ALL;
1132	vm_map_t	my_map;
1133	proc_t		p = current_proc();
1134	struct vnode_attr vattr;
1135
1136	/*
1137	 *	Find the inode; verify that it's a regular file.
1138	 */
1139
1140	err = fp_lookup(p, fd, &fp, 0);
1141	if (err)
1142		return(err);
1143
1144	if (fp->f_fglob->fg_type != DTYPE_VNODE){
1145		err = KERN_INVALID_ARGUMENT;
1146		goto bad;
1147	}
1148
1149	if (!(fp->f_fglob->fg_flag & FREAD)) {
1150		err = KERN_PROTECTION_FAILURE;
1151		goto bad;
1152	}
1153
1154	vp = (struct vnode *)fp->f_fglob->fg_data;
1155	err = vnode_getwithref(vp);
1156	if(err != 0)
1157		goto bad;
1158
1159	if (vp->v_type != VREG) {
1160		(void)vnode_put(vp);
1161		err = KERN_INVALID_ARGUMENT;
1162		goto bad;
1163	}
1164
1165#if CONFIG_MACF
1166	err = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1167			fp->f_fglob, VM_PROT_DEFAULT, MAP_FILE, &maxprot);
1168	if (err) {
1169		(void)vnode_put(vp);
1170		goto bad;
1171	}
1172#endif /* MAC */
1173
1174#if CONFIG_PROTECT
1175	/* check for content protection access */
1176	{
1177		err = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1178	 	if (err != 0) {
1179 			(void) vnode_put(vp);
1180 			goto bad;
1181 		}
1182	}
1183#endif /* CONFIG_PROTECT */
1184
1185	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1186
1187	/*
1188	 * POSIX: mmap needs to update access time for mapped files
1189	 */
1190	if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1191		VATTR_INIT(&vattr);
1192		nanotime(&vattr.va_access_time);
1193		VATTR_SET_ACTIVE(&vattr, va_access_time);
1194		vnode_setattr(vp, &vattr, vfs_context_current());
1195	}
1196
1197	if (offset & PAGE_MASK_64) {
1198		printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
1199		(void)vnode_put(vp);
1200		err = KERN_INVALID_ARGUMENT;
1201		goto bad;
1202	}
1203	map_size = round_page(size);
1204
1205	/*
1206	 * Allow user to map in a zero length file.
1207	 */
1208	if (size == 0) {
1209		(void)vnode_put(vp);
1210		err = KERN_SUCCESS;
1211		goto bad;
1212	}
1213	/*
1214	 *	Map in the file.
1215	 */
1216	pager = (void *)ubc_getpager(vp);
1217	if (pager == NULL) {
1218		(void)vnode_put(vp);
1219		err = KERN_FAILURE;
1220		goto bad;
1221	}
1222
1223
1224	my_map = current_map();
1225
1226	result = vm_map_64(
1227			my_map,
1228			&map_addr, map_size, (vm_offset_t)0,
1229			VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1230			VM_PROT_DEFAULT, maxprot,
1231			VM_INHERIT_DEFAULT);
1232	if (result != KERN_SUCCESS) {
1233		(void)vnode_put(vp);
1234		err = result;
1235		goto bad;
1236	}
1237
1238
1239	if (!findspace) {
1240		//K64todo fix for 64bit user?
1241		uint32_t	dst_addr;
1242		vm_map_copy_t	tmp;
1243
1244		if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr))	||
1245					trunc_page(dst_addr) != dst_addr) {
1246			(void) vm_map_remove(
1247					my_map,
1248					map_addr, map_addr + map_size,
1249					VM_MAP_NO_FLAGS);
1250			(void)vnode_put(vp);
1251			err = KERN_INVALID_ADDRESS;
1252			goto bad;
1253		}
1254
1255		result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1256				       (vm_map_size_t)map_size, TRUE, &tmp);
1257		if (result != KERN_SUCCESS) {
1258
1259			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1260					vm_map_round_page(map_addr + map_size),
1261					VM_MAP_NO_FLAGS);
1262			(void)vnode_put(vp);
1263			err = result;
1264			goto bad;
1265		}
1266
1267		result = vm_map_copy_overwrite(my_map,
1268					(vm_map_address_t)dst_addr, tmp, FALSE);
1269		if (result != KERN_SUCCESS) {
1270			vm_map_copy_discard(tmp);
1271			(void)vnode_put(vp);
1272			err = result;
1273			goto bad;
1274		}
1275	} else {
1276		// K64todo bug compatible now, should fix for 64bit user
1277		uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr);
1278		if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) {
1279			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1280					vm_map_round_page(map_addr + map_size),
1281					VM_MAP_NO_FLAGS);
1282			(void)vnode_put(vp);
1283			err = KERN_INVALID_ADDRESS;
1284			goto bad;
1285		}
1286	}
1287
1288	ubc_setthreadcred(vp, current_proc(), current_thread());
1289	(void)vnode_put(vp);
1290	err = 0;
1291bad:
1292	fp_drop(p, fd, fp, 0);
1293	return (err);
1294}
1295#endif		/* !defined(CONFIG_EMBEDDED) */
1296
1297