1/* 2 * Copyright (c) 2007 Apple Inc. All Rights Reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1988 University of Utah. 30 * Copyright (c) 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * the Systems Programming Group of the University of Utah Computer 35 * Science Department. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 66 * 67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95 68 */ 69/* 70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 71 * support for mandatory and extensible security protections. This notice 72 * is included in support of clause 2.2 (b) of the Apple Public License, 73 * Version 2.0. 74 */ 75 76/* 77 * Mapped file (mmap) interface to VM 78 */ 79 80#include <sys/param.h> 81#include <sys/systm.h> 82#include <sys/filedesc.h> 83#include <sys/proc_internal.h> 84#include <sys/kauth.h> 85#include <sys/resourcevar.h> 86#include <sys/vnode_internal.h> 87#include <sys/acct.h> 88#include <sys/wait.h> 89#include <sys/file_internal.h> 90#include <sys/vadvise.h> 91#include <sys/trace.h> 92#include <sys/mman.h> 93#include <sys/conf.h> 94#include <sys/stat.h> 95#include <sys/ubc.h> 96#include <sys/ubc_internal.h> 97#include <sys/sysproto.h> 98#if CONFIG_PROTECT 99#include <sys/cprotect.h> 100#endif 101 102#include <sys/syscall.h> 103#include <sys/kdebug.h> 104 105#include <security/audit/audit.h> 106#include <bsm/audit_kevents.h> 107 108#include <mach/mach_types.h> 109#include <mach/mach_traps.h> 110#include <mach/vm_sync.h> 111#include <mach/vm_behavior.h> 112#include <mach/vm_inherit.h> 113#include <mach/vm_statistics.h> 114#include <mach/mach_vm.h> 115#include <mach/vm_map.h> 116#include <mach/host_priv.h> 117 118#include <machine/machine_routines.h> 119 120#include <kern/cpu_number.h> 121#include <kern/host.h> 122#include <kern/task.h> 123 124#include <vm/vm_map.h> 125#include <vm/vm_kern.h> 126#include <vm/vm_pager.h> 127#include <vm/vm_protos.h> 128 129/* XXX the following function should probably be static */ 130kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *, 131 boolean_t, vm_size_t); 132 133/* 134 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct 135 * XXX usage is PROT_* from an interface perspective. Thus the values of 136 * XXX VM_PROT_* and PROT_* need to correspond. 137 */ 138int 139mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) 140{ 141 /* 142 * Map in special device (must be SHARED) or file 143 */ 144 struct fileproc *fp; 145 register struct vnode *vp; 146 int flags; 147 int prot; 148 int err=0; 149 vm_map_t user_map; 150 kern_return_t result; 151 vm_map_offset_t user_addr; 152 vm_map_size_t user_size; 153 vm_object_offset_t pageoff; 154 vm_object_offset_t file_pos; 155 int alloc_flags=0; 156 boolean_t docow; 157 vm_prot_t maxprot; 158 void *handle; 159 memory_object_t pager = MEMORY_OBJECT_NULL; 160 memory_object_control_t control; 161 int mapanon=0; 162 int fpref=0; 163 int error =0; 164 int fd = uap->fd; 165 int num_retries = 0; 166 167 user_addr = (vm_map_offset_t)uap->addr; 168 user_size = (vm_map_size_t) uap->len; 169 170 AUDIT_ARG(addr, user_addr); 171 AUDIT_ARG(len, user_size); 172 AUDIT_ARG(fd, uap->fd); 173 174 prot = (uap->prot & VM_PROT_ALL); 175#if 3777787 176 /* 177 * Since the hardware currently does not support writing without 178 * read-before-write, or execution-without-read, if the request is 179 * for write or execute access, we must imply read access as well; 180 * otherwise programs expecting this to work will fail to operate. 181 */ 182 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 183 prot |= VM_PROT_READ; 184#endif /* radar 3777787 */ 185 186 flags = uap->flags; 187 vp = NULLVP; 188 189 /* 190 * The vm code does not have prototypes & compiler doesn't do the' 191 * the right thing when you cast 64bit value and pass it in function 192 * call. So here it is. 193 */ 194 file_pos = (vm_object_offset_t)uap->pos; 195 196 197 /* make sure mapping fits into numeric range etc */ 198 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) 199 return (EINVAL); 200 201 /* 202 * Align the file position to a page boundary, 203 * and save its page offset component. 204 */ 205 pageoff = (file_pos & PAGE_MASK); 206 file_pos -= (vm_object_offset_t)pageoff; 207 208 209 /* Adjust size for rounding (on both ends). */ 210 user_size += pageoff; /* low end... */ 211 user_size = mach_vm_round_page(user_size); /* hi end */ 212 213 if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){ 214 return EINVAL; 215 } 216 /* 217 * Check for illegal addresses. Watch out for address wrap... Note 218 * that VM_*_ADDRESS are not constants due to casts (argh). 219 */ 220 if (flags & MAP_FIXED) { 221 /* 222 * The specified address must have the same remainder 223 * as the file offset taken modulo PAGE_SIZE, so it 224 * should be aligned after adjustment by pageoff. 225 */ 226 user_addr -= pageoff; 227 if (user_addr & PAGE_MASK) 228 return (EINVAL); 229 } 230#ifdef notyet 231 /* DO not have apis to get this info, need to wait till then*/ 232 /* 233 * XXX for non-fixed mappings where no hint is provided or 234 * the hint would fall in the potential heap space, 235 * place it after the end of the largest possible heap. 236 * 237 * There should really be a pmap call to determine a reasonable 238 * location. 239 */ 240 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) 241 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 242 243#endif 244 245 alloc_flags = 0; 246 247 if (flags & MAP_ANON) { 248 249 maxprot = VM_PROT_ALL; 250#if CONFIG_MACF 251 /* 252 * Entitlement check. 253 */ 254 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot); 255 if (error) { 256 return EINVAL; 257 } 258#endif /* MAC */ 259 260 /* 261 * Mapping blank space is trivial. Use positive fds as the alias 262 * value for memory tracking. 263 */ 264 if (fd != -1) { 265 /* 266 * Use "fd" to pass (some) Mach VM allocation flags, 267 * (see the VM_FLAGS_* definitions). 268 */ 269 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK | 270 VM_FLAGS_PURGABLE); 271 if (alloc_flags != fd) { 272 /* reject if there are any extra flags */ 273 return EINVAL; 274 } 275 } 276 277 handle = NULL; 278 file_pos = 0; 279 mapanon = 1; 280 } else { 281 struct vnode_attr va; 282 vfs_context_t ctx = vfs_context_current(); 283 284 if (flags & MAP_JIT) 285 return EINVAL; 286 287 /* 288 * Mapping file, get fp for validation. Obtain vnode and make 289 * sure it is of appropriate type. 290 */ 291 err = fp_lookup(p, fd, &fp, 0); 292 if (err) 293 return(err); 294 fpref = 1; 295 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { 296 uap->addr = (user_addr_t)user_addr; 297 uap->len = (user_size_t)user_size; 298 uap->prot = prot; 299 uap->flags = flags; 300 uap->pos = file_pos; 301 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); 302 goto bad; 303 } 304 305 if (fp->f_fglob->fg_type != DTYPE_VNODE) { 306 error = EINVAL; 307 goto bad; 308 } 309 vp = (struct vnode *)fp->f_fglob->fg_data; 310 error = vnode_getwithref(vp); 311 if(error != 0) 312 goto bad; 313 314 if (vp->v_type != VREG && vp->v_type != VCHR) { 315 (void)vnode_put(vp); 316 error = EINVAL; 317 goto bad; 318 } 319 320 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 321 322 /* 323 * POSIX: mmap needs to update access time for mapped files 324 */ 325 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { 326 VATTR_INIT(&va); 327 nanotime(&va.va_access_time); 328 VATTR_SET_ACTIVE(&va, va_access_time); 329 vnode_setattr(vp, &va, ctx); 330 } 331 332 /* 333 * XXX hack to handle use of /dev/zero to map anon memory (ala 334 * SunOS). 335 */ 336 if (vp->v_type == VCHR || vp->v_type == VSTR) { 337 (void)vnode_put(vp); 338 error = ENODEV; 339 goto bad; 340 } else { 341 /* 342 * Ensure that file and memory protections are 343 * compatible. Note that we only worry about 344 * writability if mapping is shared; in this case, 345 * current and max prot are dictated by the open file. 346 * XXX use the vnode instead? Problem is: what 347 * credentials do we use for determination? What if 348 * proc does a setuid? 349 */ 350 maxprot = VM_PROT_EXECUTE; /* ??? */ 351 if (fp->f_fglob->fg_flag & FREAD) 352 maxprot |= VM_PROT_READ; 353 else if (prot & PROT_READ) { 354 (void)vnode_put(vp); 355 error = EACCES; 356 goto bad; 357 } 358 /* 359 * If we are sharing potential changes (either via 360 * MAP_SHARED or via the implicit sharing of character 361 * device mappings), and we are trying to get write 362 * permission although we opened it without asking 363 * for it, bail out. 364 */ 365 366 if ((flags & MAP_SHARED) != 0) { 367 if ((fp->f_fglob->fg_flag & FWRITE) != 0 && 368 /* 369 * Do not allow writable mappings of 370 * swap files (see vm_swapfile_pager.c). 371 */ 372 !vnode_isswap(vp)) { 373 /* 374 * check for write access 375 * 376 * Note that we already made this check when granting FWRITE 377 * against the file, so it seems redundant here. 378 */ 379 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); 380 381 /* if not granted for any reason, but we wanted it, bad */ 382 if ((prot & PROT_WRITE) && (error != 0)) { 383 vnode_put(vp); 384 goto bad; 385 } 386 387 /* if writable, remember */ 388 if (error == 0) 389 maxprot |= VM_PROT_WRITE; 390 391 } else if ((prot & PROT_WRITE) != 0) { 392 (void)vnode_put(vp); 393 error = EACCES; 394 goto bad; 395 } 396 } else 397 maxprot |= VM_PROT_WRITE; 398 399 handle = (void *)vp; 400#if CONFIG_MACF 401 error = mac_file_check_mmap(vfs_context_ucred(ctx), 402 fp->f_fglob, prot, flags, &maxprot); 403 if (error) { 404 (void)vnode_put(vp); 405 goto bad; 406 } 407#endif /* MAC */ 408 409#if CONFIG_PROTECT 410 { 411 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); 412 if (error) { 413 (void) vnode_put(vp); 414 goto bad; 415 } 416 } 417#endif /* CONFIG_PROTECT */ 418 419 420 } 421 } 422 423 if (user_size == 0) { 424 if (!mapanon) 425 (void)vnode_put(vp); 426 error = 0; 427 goto bad; 428 } 429 430 /* 431 * We bend a little - round the start and end addresses 432 * to the nearest page boundary. 433 */ 434 user_size = mach_vm_round_page(user_size); 435 436 if (file_pos & PAGE_MASK_64) { 437 if (!mapanon) 438 (void)vnode_put(vp); 439 error = EINVAL; 440 goto bad; 441 } 442 443 user_map = current_map(); 444 445 if ((flags & MAP_FIXED) == 0) { 446 alloc_flags |= VM_FLAGS_ANYWHERE; 447 user_addr = mach_vm_round_page(user_addr); 448 } else { 449 if (user_addr != mach_vm_trunc_page(user_addr)) { 450 if (!mapanon) 451 (void)vnode_put(vp); 452 error = EINVAL; 453 goto bad; 454 } 455 /* 456 * mmap(MAP_FIXED) will replace any existing mappings in the 457 * specified range, if the new mapping is successful. 458 * If we just deallocate the specified address range here, 459 * another thread might jump in and allocate memory in that 460 * range before we get a chance to establish the new mapping, 461 * and we won't have a chance to restore the old mappings. 462 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it 463 * has to deallocate the existing mappings and establish the 464 * new ones atomically. 465 */ 466 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; 467 } 468 469 if (flags & MAP_NOCACHE) 470 alloc_flags |= VM_FLAGS_NO_CACHE; 471 472 if (flags & MAP_JIT){ 473 alloc_flags |= VM_FLAGS_MAP_JIT; 474 } 475 /* 476 * Lookup/allocate object. 477 */ 478 if (handle == NULL) { 479 control = NULL; 480#ifdef notyet 481/* Hmm .. */ 482#if defined(VM_PROT_READ_IS_EXEC) 483 if (prot & VM_PROT_READ) 484 prot |= VM_PROT_EXECUTE; 485 if (maxprot & VM_PROT_READ) 486 maxprot |= VM_PROT_EXECUTE; 487#endif 488#endif 489 490#if 3777787 491 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 492 prot |= VM_PROT_READ; 493 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 494 maxprot |= VM_PROT_READ; 495#endif /* radar 3777787 */ 496map_anon_retry: 497 result = vm_map_enter_mem_object(user_map, 498 &user_addr, user_size, 499 0, alloc_flags, 500 IPC_PORT_NULL, 0, FALSE, 501 prot, maxprot, 502 (flags & MAP_SHARED) ? 503 VM_INHERIT_SHARE : 504 VM_INHERIT_DEFAULT); 505 506 /* If a non-binding address was specified for this anonymous 507 * mapping, retry the mapping with a zero base 508 * in the event the mapping operation failed due to 509 * lack of space between the address and the map's maximum. 510 */ 511 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { 512 user_addr = PAGE_SIZE; 513 goto map_anon_retry; 514 } 515 } else { 516 if (vnode_isswap(vp)) { 517 /* 518 * Map swap files with a special pager 519 * that returns obfuscated contents. 520 */ 521 control = NULL; 522 pager = swapfile_pager_setup(vp); 523 if (pager != MEMORY_OBJECT_NULL) { 524 control = swapfile_pager_control(pager); 525 } 526 } else { 527 control = ubc_getobject(vp, UBC_FLAGS_NONE); 528 } 529 530 if (control == NULL) { 531 (void)vnode_put(vp); 532 error = ENOMEM; 533 goto bad; 534 } 535 536 /* 537 * Set credentials: 538 * FIXME: if we're writing the file we need a way to 539 * ensure that someone doesn't replace our R/W creds 540 * with ones that only work for read. 541 */ 542 543 ubc_setthreadcred(vp, p, current_thread()); 544 docow = FALSE; 545 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 546 docow = TRUE; 547 } 548 549#ifdef notyet 550/* Hmm .. */ 551#if defined(VM_PROT_READ_IS_EXEC) 552 if (prot & VM_PROT_READ) 553 prot |= VM_PROT_EXECUTE; 554 if (maxprot & VM_PROT_READ) 555 maxprot |= VM_PROT_EXECUTE; 556#endif 557#endif /* notyet */ 558 559#if 3777787 560 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 561 prot |= VM_PROT_READ; 562 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 563 maxprot |= VM_PROT_READ; 564#endif /* radar 3777787 */ 565map_file_retry: 566 result = vm_map_enter_mem_object_control(user_map, 567 &user_addr, user_size, 568 0, alloc_flags, 569 control, file_pos, 570 docow, prot, maxprot, 571 (flags & MAP_SHARED) ? 572 VM_INHERIT_SHARE : 573 VM_INHERIT_DEFAULT); 574 575 /* If a non-binding address was specified for this file backed 576 * mapping, retry the mapping with a zero base 577 * in the event the mapping operation failed due to 578 * lack of space between the address and the map's maximum. 579 */ 580 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { 581 user_addr = PAGE_SIZE; 582 goto map_file_retry; 583 } 584 } 585 586 if (!mapanon) { 587 (void)vnode_put(vp); 588 } 589 590 switch (result) { 591 case KERN_SUCCESS: 592 *retval = user_addr + pageoff; 593 error = 0; 594 break; 595 case KERN_INVALID_ADDRESS: 596 case KERN_NO_SPACE: 597 error = ENOMEM; 598 break; 599 case KERN_PROTECTION_FAILURE: 600 error = EACCES; 601 break; 602 default: 603 error = EINVAL; 604 break; 605 } 606bad: 607 if (pager != MEMORY_OBJECT_NULL) { 608 /* 609 * Release the reference on the pager. 610 * If the mapping was successful, it now holds 611 * an extra reference. 612 */ 613 memory_object_deallocate(pager); 614 } 615 if (fpref) 616 fp_drop(p, fd, fp, 0); 617 618 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); 619 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), 620 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); 621 return(error); 622} 623 624int 625msync(__unused proc_t p, struct msync_args *uap, int32_t *retval) 626{ 627 __pthread_testcancel(1); 628 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval)); 629} 630 631int 632msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval) 633{ 634 mach_vm_offset_t addr; 635 mach_vm_size_t size; 636 int flags; 637 vm_map_t user_map; 638 int rv; 639 vm_sync_t sync_flags=0; 640 641 addr = (mach_vm_offset_t) uap->addr; 642 size = (mach_vm_size_t)uap->len; 643 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0); 644 if (addr & PAGE_MASK_64) { 645 /* UNIX SPEC: user address is not page-aligned, return EINVAL */ 646 return EINVAL; 647 } 648 if (size == 0) { 649 /* 650 * We cannot support this properly without maintaining 651 * list all mmaps done. Cannot use vm_map_entry as they could be 652 * split or coalesced by indepenedant actions. So instead of 653 * inaccurate results, lets just return error as invalid size 654 * specified 655 */ 656 return (EINVAL); /* XXX breaks posix apps */ 657 } 658 659 flags = uap->flags; 660 /* disallow contradictory flags */ 661 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) 662 return (EINVAL); 663 664 if (flags & MS_KILLPAGES) 665 sync_flags |= VM_SYNC_KILLPAGES; 666 if (flags & MS_DEACTIVATE) 667 sync_flags |= VM_SYNC_DEACTIVATE; 668 if (flags & MS_INVALIDATE) 669 sync_flags |= VM_SYNC_INVALIDATE; 670 671 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) { 672 if (flags & MS_ASYNC) 673 sync_flags |= VM_SYNC_ASYNCHRONOUS; 674 else 675 sync_flags |= VM_SYNC_SYNCHRONOUS; 676 } 677 678 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */ 679 680 user_map = current_map(); 681 rv = mach_vm_msync(user_map, addr, size, sync_flags); 682 683 switch (rv) { 684 case KERN_SUCCESS: 685 break; 686 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */ 687 return (ENOMEM); 688 case KERN_FAILURE: 689 return (EIO); 690 default: 691 return (EINVAL); 692 } 693 return (0); 694} 695 696 697int 698munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval) 699{ 700 mach_vm_offset_t user_addr; 701 mach_vm_size_t user_size; 702 kern_return_t result; 703 704 user_addr = (mach_vm_offset_t) uap->addr; 705 user_size = (mach_vm_size_t) uap->len; 706 707 AUDIT_ARG(addr, user_addr); 708 AUDIT_ARG(len, user_size); 709 710 if (user_addr & PAGE_MASK_64) { 711 /* UNIX SPEC: user address is not page-aligned, return EINVAL */ 712 return EINVAL; 713 } 714 715 if (user_addr + user_size < user_addr) 716 return(EINVAL); 717 718 if (user_size == 0) { 719 /* UNIX SPEC: size is 0, return EINVAL */ 720 return EINVAL; 721 } 722 723 result = mach_vm_deallocate(current_map(), user_addr, user_size); 724 if (result != KERN_SUCCESS) { 725 return(EINVAL); 726 } 727 return(0); 728} 729 730int 731mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) 732{ 733 register vm_prot_t prot; 734 mach_vm_offset_t user_addr; 735 mach_vm_size_t user_size; 736 kern_return_t result; 737 vm_map_t user_map; 738#if CONFIG_MACF 739 int error; 740#endif 741 742 AUDIT_ARG(addr, uap->addr); 743 AUDIT_ARG(len, uap->len); 744 AUDIT_ARG(value32, uap->prot); 745 746 user_addr = (mach_vm_offset_t) uap->addr; 747 user_size = (mach_vm_size_t) uap->len; 748 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED)); 749 750 if (user_addr & PAGE_MASK_64) { 751 /* UNIX SPEC: user address is not page-aligned, return EINVAL */ 752 return EINVAL; 753 } 754 755#ifdef notyet 756/* Hmm .. */ 757#if defined(VM_PROT_READ_IS_EXEC) 758 if (prot & VM_PROT_READ) 759 prot |= VM_PROT_EXECUTE; 760#endif 761#endif /* notyet */ 762 763#if 3936456 764 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) 765 prot |= VM_PROT_READ; 766#endif /* 3936456 */ 767 768 user_map = current_map(); 769 770#if CONFIG_MACF 771 /* 772 * The MAC check for mprotect is of limited use for 2 reasons: 773 * Without mmap revocation, the caller could have asked for the max 774 * protections initially instead of a reduced set, so a mprotect 775 * check would offer no new security. 776 * It is not possible to extract the vnode from the pager object(s) 777 * of the target memory range. 778 * However, the MAC check may be used to prevent a process from, 779 * e.g., making the stack executable. 780 */ 781 error = mac_proc_check_mprotect(p, user_addr, 782 user_size, prot); 783 if (error) 784 return (error); 785#endif 786 787 if(prot & VM_PROT_TRUSTED) { 788#if CONFIG_DYNAMIC_CODE_SIGNING 789 /* CODE SIGNING ENFORCEMENT - JIT support */ 790 /* The special protection value VM_PROT_TRUSTED requests that we treat 791 * this page as if it had a valid code signature. 792 * If this is enabled, there MUST be a MAC policy implementing the 793 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be 794 * compromised because the check would always succeed and thusly any 795 * process could sign dynamically. */ 796 result = vm_map_sign(user_map, 797 vm_map_trunc_page(user_addr), 798 vm_map_round_page(user_addr+user_size)); 799 switch (result) { 800 case KERN_SUCCESS: 801 break; 802 case KERN_INVALID_ADDRESS: 803 /* UNIX SPEC: for an invalid address range, return ENOMEM */ 804 return ENOMEM; 805 default: 806 return EINVAL; 807 } 808#else 809 return ENOTSUP; 810#endif 811 } 812 prot &= ~VM_PROT_TRUSTED; 813 814 result = mach_vm_protect(user_map, user_addr, user_size, 815 FALSE, prot); 816 switch (result) { 817 case KERN_SUCCESS: 818 return (0); 819 case KERN_PROTECTION_FAILURE: 820 return (EACCES); 821 case KERN_INVALID_ADDRESS: 822 /* UNIX SPEC: for an invalid address range, return ENOMEM */ 823 return ENOMEM; 824 } 825 return (EINVAL); 826} 827 828 829int 830minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval) 831{ 832 mach_vm_offset_t addr; 833 mach_vm_size_t size; 834 register vm_inherit_t inherit; 835 vm_map_t user_map; 836 kern_return_t result; 837 838 AUDIT_ARG(addr, uap->addr); 839 AUDIT_ARG(len, uap->len); 840 AUDIT_ARG(value32, uap->inherit); 841 842 addr = (mach_vm_offset_t)uap->addr; 843 size = (mach_vm_size_t)uap->len; 844 inherit = uap->inherit; 845 846 user_map = current_map(); 847 result = mach_vm_inherit(user_map, addr, size, 848 inherit); 849 switch (result) { 850 case KERN_SUCCESS: 851 return (0); 852 case KERN_PROTECTION_FAILURE: 853 return (EACCES); 854 } 855 return (EINVAL); 856} 857 858int 859madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval) 860{ 861 vm_map_t user_map; 862 mach_vm_offset_t start; 863 mach_vm_size_t size; 864 vm_behavior_t new_behavior; 865 kern_return_t result; 866 867 /* 868 * Since this routine is only advisory, we default to conservative 869 * behavior. 870 */ 871 switch (uap->behav) { 872 case MADV_RANDOM: 873 new_behavior = VM_BEHAVIOR_RANDOM; 874 break; 875 case MADV_SEQUENTIAL: 876 new_behavior = VM_BEHAVIOR_SEQUENTIAL; 877 break; 878 case MADV_NORMAL: 879 new_behavior = VM_BEHAVIOR_DEFAULT; 880 break; 881 case MADV_WILLNEED: 882 new_behavior = VM_BEHAVIOR_WILLNEED; 883 break; 884 case MADV_DONTNEED: 885 new_behavior = VM_BEHAVIOR_DONTNEED; 886 break; 887 case MADV_FREE: 888 new_behavior = VM_BEHAVIOR_FREE; 889 break; 890 case MADV_ZERO_WIRED_PAGES: 891 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES; 892 break; 893 case MADV_FREE_REUSABLE: 894 new_behavior = VM_BEHAVIOR_REUSABLE; 895 break; 896 case MADV_FREE_REUSE: 897 new_behavior = VM_BEHAVIOR_REUSE; 898 break; 899 case MADV_CAN_REUSE: 900 new_behavior = VM_BEHAVIOR_CAN_REUSE; 901 break; 902 default: 903 return(EINVAL); 904 } 905 906 start = (mach_vm_offset_t) uap->addr; 907 size = (mach_vm_size_t) uap->len; 908 909 user_map = current_map(); 910 911 result = mach_vm_behavior_set(user_map, start, size, new_behavior); 912 switch (result) { 913 case KERN_SUCCESS: 914 return 0; 915 case KERN_INVALID_ADDRESS: 916 return EINVAL; 917 case KERN_NO_SPACE: 918 return ENOMEM; 919 } 920 921 return EINVAL; 922} 923 924int 925mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval) 926{ 927 mach_vm_offset_t addr, first_addr, end; 928 vm_map_t map; 929 user_addr_t vec; 930 int error; 931 int vecindex, lastvecindex; 932 int mincoreinfo=0; 933 int pqueryinfo; 934 kern_return_t ret; 935 int numref; 936 937 char c; 938 939 map = current_map(); 940 941 /* 942 * Make sure that the addresses presented are valid for user 943 * mode. 944 */ 945 first_addr = addr = mach_vm_trunc_page(uap->addr); 946 end = addr + mach_vm_round_page(uap->len); 947 948 if (end < addr) 949 return (EINVAL); 950 951 /* 952 * Address of byte vector 953 */ 954 vec = uap->vec; 955 956 map = current_map(); 957 958 /* 959 * Do this on a map entry basis so that if the pages are not 960 * in the current processes address space, we can easily look 961 * up the pages elsewhere. 962 */ 963 lastvecindex = -1; 964 for( ; addr < end; addr += PAGE_SIZE ) { 965 pqueryinfo = 0; 966 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref); 967 if (ret != KERN_SUCCESS) 968 pqueryinfo = 0; 969 mincoreinfo = 0; 970 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) 971 mincoreinfo |= MINCORE_INCORE; 972 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) 973 mincoreinfo |= MINCORE_REFERENCED; 974 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) 975 mincoreinfo |= MINCORE_MODIFIED; 976 977 978 /* 979 * calculate index into user supplied byte vector 980 */ 981 vecindex = (addr - first_addr)>> PAGE_SHIFT; 982 983 /* 984 * If we have skipped map entries, we need to make sure that 985 * the byte vector is zeroed for those skipped entries. 986 */ 987 while((lastvecindex + 1) < vecindex) { 988 c = 0; 989 error = copyout(&c, vec + lastvecindex, 1); 990 if (error) { 991 return (EFAULT); 992 } 993 ++lastvecindex; 994 } 995 996 /* 997 * Pass the page information to the user 998 */ 999 c = (char)mincoreinfo; 1000 error = copyout(&c, vec + vecindex, 1); 1001 if (error) { 1002 return (EFAULT); 1003 } 1004 lastvecindex = vecindex; 1005 } 1006 1007 1008 /* 1009 * Zero the last entries in the byte vector. 1010 */ 1011 vecindex = (end - first_addr) >> PAGE_SHIFT; 1012 while((lastvecindex + 1) < vecindex) { 1013 c = 0; 1014 error = copyout(&c, vec + lastvecindex, 1); 1015 if (error) { 1016 return (EFAULT); 1017 } 1018 ++lastvecindex; 1019 } 1020 1021 return (0); 1022} 1023 1024int 1025mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) 1026{ 1027 vm_map_t user_map; 1028 vm_map_offset_t addr; 1029 vm_map_size_t size, pageoff; 1030 kern_return_t result; 1031 1032 AUDIT_ARG(addr, uap->addr); 1033 AUDIT_ARG(len, uap->len); 1034 1035 addr = (vm_map_offset_t) uap->addr; 1036 size = (vm_map_size_t)uap->len; 1037 1038 /* disable wrap around */ 1039 if (addr + size < addr) 1040 return (EINVAL); 1041 1042 if (size == 0) 1043 return (0); 1044 1045 pageoff = (addr & PAGE_MASK); 1046 addr -= pageoff; 1047 size = vm_map_round_page(size+pageoff); 1048 user_map = current_map(); 1049 1050 /* have to call vm_map_wire directly to pass "I don't know" protections */ 1051 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE); 1052 1053 if (result == KERN_RESOURCE_SHORTAGE) 1054 return EAGAIN; 1055 else if (result != KERN_SUCCESS) 1056 return ENOMEM; 1057 1058 return 0; /* KERN_SUCCESS */ 1059} 1060 1061int 1062munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval) 1063{ 1064 mach_vm_offset_t addr; 1065 mach_vm_size_t size; 1066 vm_map_t user_map; 1067 kern_return_t result; 1068 1069 AUDIT_ARG(addr, uap->addr); 1070 AUDIT_ARG(addr, uap->len); 1071 1072 addr = (mach_vm_offset_t) uap->addr; 1073 size = (mach_vm_size_t)uap->len; 1074 user_map = current_map(); 1075 1076 /* JMM - need to remove all wirings by spec - this just removes one */ 1077 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE); 1078 return (result == KERN_SUCCESS ? 0 : ENOMEM); 1079} 1080 1081 1082int 1083mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval) 1084{ 1085 return (ENOSYS); 1086} 1087 1088int 1089munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval) 1090{ 1091 return(ENOSYS); 1092} 1093 1094#if !defined(CONFIG_EMBEDDED) 1095/* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ 1096kern_return_t 1097map_fd(struct map_fd_args *args) 1098{ 1099 int fd = args->fd; 1100 vm_offset_t offset = args->offset; 1101 vm_offset_t *va = args->va; 1102 boolean_t findspace = args->findspace; 1103 vm_size_t size = args->size; 1104 kern_return_t ret; 1105 1106 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD); 1107 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va)); 1108 AUDIT_ARG(fd, fd); 1109 1110 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size); 1111 1112 AUDIT_MACH_SYSCALL_EXIT(ret); 1113 return ret; 1114} 1115 1116kern_return_t 1117map_fd_funneled( 1118 int fd, 1119 vm_object_offset_t offset, 1120 vm_offset_t *va, 1121 boolean_t findspace, 1122 vm_size_t size) 1123{ 1124 kern_return_t result; 1125 struct fileproc *fp; 1126 struct vnode *vp; 1127 void * pager; 1128 vm_offset_t map_addr=0; 1129 vm_size_t map_size; 1130 int err=0; 1131 vm_prot_t maxprot = VM_PROT_ALL; 1132 vm_map_t my_map; 1133 proc_t p = current_proc(); 1134 struct vnode_attr vattr; 1135 1136 /* 1137 * Find the inode; verify that it's a regular file. 1138 */ 1139 1140 err = fp_lookup(p, fd, &fp, 0); 1141 if (err) 1142 return(err); 1143 1144 if (fp->f_fglob->fg_type != DTYPE_VNODE){ 1145 err = KERN_INVALID_ARGUMENT; 1146 goto bad; 1147 } 1148 1149 if (!(fp->f_fglob->fg_flag & FREAD)) { 1150 err = KERN_PROTECTION_FAILURE; 1151 goto bad; 1152 } 1153 1154 vp = (struct vnode *)fp->f_fglob->fg_data; 1155 err = vnode_getwithref(vp); 1156 if(err != 0) 1157 goto bad; 1158 1159 if (vp->v_type != VREG) { 1160 (void)vnode_put(vp); 1161 err = KERN_INVALID_ARGUMENT; 1162 goto bad; 1163 } 1164 1165#if CONFIG_MACF 1166 err = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), 1167 fp->f_fglob, VM_PROT_DEFAULT, MAP_FILE, &maxprot); 1168 if (err) { 1169 (void)vnode_put(vp); 1170 goto bad; 1171 } 1172#endif /* MAC */ 1173 1174#if CONFIG_PROTECT 1175 /* check for content protection access */ 1176 { 1177 err = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); 1178 if (err != 0) { 1179 (void) vnode_put(vp); 1180 goto bad; 1181 } 1182 } 1183#endif /* CONFIG_PROTECT */ 1184 1185 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 1186 1187 /* 1188 * POSIX: mmap needs to update access time for mapped files 1189 */ 1190 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { 1191 VATTR_INIT(&vattr); 1192 nanotime(&vattr.va_access_time); 1193 VATTR_SET_ACTIVE(&vattr, va_access_time); 1194 vnode_setattr(vp, &vattr, vfs_context_current()); 1195 } 1196 1197 if (offset & PAGE_MASK_64) { 1198 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); 1199 (void)vnode_put(vp); 1200 err = KERN_INVALID_ARGUMENT; 1201 goto bad; 1202 } 1203 map_size = round_page(size); 1204 1205 /* 1206 * Allow user to map in a zero length file. 1207 */ 1208 if (size == 0) { 1209 (void)vnode_put(vp); 1210 err = KERN_SUCCESS; 1211 goto bad; 1212 } 1213 /* 1214 * Map in the file. 1215 */ 1216 pager = (void *)ubc_getpager(vp); 1217 if (pager == NULL) { 1218 (void)vnode_put(vp); 1219 err = KERN_FAILURE; 1220 goto bad; 1221 } 1222 1223 1224 my_map = current_map(); 1225 1226 result = vm_map_64( 1227 my_map, 1228 &map_addr, map_size, (vm_offset_t)0, 1229 VM_FLAGS_ANYWHERE, pager, offset, TRUE, 1230 VM_PROT_DEFAULT, maxprot, 1231 VM_INHERIT_DEFAULT); 1232 if (result != KERN_SUCCESS) { 1233 (void)vnode_put(vp); 1234 err = result; 1235 goto bad; 1236 } 1237 1238 1239 if (!findspace) { 1240 //K64todo fix for 64bit user? 1241 uint32_t dst_addr; 1242 vm_map_copy_t tmp; 1243 1244 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || 1245 trunc_page(dst_addr) != dst_addr) { 1246 (void) vm_map_remove( 1247 my_map, 1248 map_addr, map_addr + map_size, 1249 VM_MAP_NO_FLAGS); 1250 (void)vnode_put(vp); 1251 err = KERN_INVALID_ADDRESS; 1252 goto bad; 1253 } 1254 1255 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, 1256 (vm_map_size_t)map_size, TRUE, &tmp); 1257 if (result != KERN_SUCCESS) { 1258 1259 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), 1260 vm_map_round_page(map_addr + map_size), 1261 VM_MAP_NO_FLAGS); 1262 (void)vnode_put(vp); 1263 err = result; 1264 goto bad; 1265 } 1266 1267 result = vm_map_copy_overwrite(my_map, 1268 (vm_map_address_t)dst_addr, tmp, FALSE); 1269 if (result != KERN_SUCCESS) { 1270 vm_map_copy_discard(tmp); 1271 (void)vnode_put(vp); 1272 err = result; 1273 goto bad; 1274 } 1275 } else { 1276 // K64todo bug compatible now, should fix for 64bit user 1277 uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr); 1278 if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) { 1279 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), 1280 vm_map_round_page(map_addr + map_size), 1281 VM_MAP_NO_FLAGS); 1282 (void)vnode_put(vp); 1283 err = KERN_INVALID_ADDRESS; 1284 goto bad; 1285 } 1286 } 1287 1288 ubc_setthreadcred(vp, current_proc(), current_thread()); 1289 (void)vnode_put(vp); 1290 err = 0; 1291bad: 1292 fp_drop(p, fd, fp, 0); 1293 return (err); 1294} 1295#endif /* !defined(CONFIG_EMBEDDED) */ 1296 1297