1/*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39/* 40 * Mapped file (mmap) interface to VM 41 */ 42 43#include <sys/cdefs.h> 44__FBSDID("$FreeBSD: stable/11/sys/vm/vm_mmap.c 356634 2020-01-11 15:06:06Z kevans $"); 45 46#include "opt_compat.h" 47#include "opt_hwpmc_hooks.h" 48#include "opt_vm.h" 49 50#include <sys/param.h> 51#include <sys/systm.h> 52#include <sys/capsicum.h> 53#include <sys/kernel.h> 54#include <sys/lock.h> 55#include <sys/mutex.h> 56#include <sys/sysproto.h> 57#include <sys/filedesc.h> 58#include <sys/priv.h> 59#include <sys/proc.h> 60#include <sys/procctl.h> 61#include <sys/racct.h> 62#include <sys/resource.h> 63#include <sys/resourcevar.h> 64#include <sys/rwlock.h> 65#include <sys/sysctl.h> 66#include <sys/vnode.h> 67#include <sys/fcntl.h> 68#include <sys/file.h> 69#include <sys/mman.h> 70#include <sys/mount.h> 71#include <sys/conf.h> 72#include <sys/stat.h> 73#include <sys/syscallsubr.h> 74#include <sys/sysent.h> 75#include <sys/vmmeter.h> 76 77#include <security/audit/audit.h> 78#include <security/mac/mac_framework.h> 79 80#include <vm/vm.h> 81#include <vm/vm_param.h> 82#include <vm/pmap.h> 83#include <vm/vm_map.h> 84#include <vm/vm_object.h> 85#include <vm/vm_page.h> 86#include <vm/vm_pager.h> 87#include <vm/vm_pageout.h> 88#include <vm/vm_extern.h> 89#include <vm/vm_page.h> 90#include <vm/vnode_pager.h> 91 92#ifdef HWPMC_HOOKS 93#include <sys/pmckern.h> 94#endif 95 96int old_mlock = 0; 97SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 98 "Do not apply RLIMIT_MEMLOCK on mlockall"); 99static int mincore_mapped = 1; 100SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, 101 "mincore reports mappings, not residency"); 102 103#ifdef MAP_32BIT 104#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 105#endif 106 107#ifndef _SYS_SYSPROTO_H_ 108struct sbrk_args { 109 int incr; 110}; 111#endif 112 113int 114sys_sbrk(struct thread *td, struct sbrk_args *uap) 115{ 116 /* Not yet implemented */ 117 return (EOPNOTSUPP); 118} 119 120#ifndef _SYS_SYSPROTO_H_ 121struct sstk_args { 122 int incr; 123}; 124#endif 125 126int 127sys_sstk(struct thread *td, struct sstk_args *uap) 128{ 129 /* Not yet implemented */ 130 return (EOPNOTSUPP); 131} 132 133#if defined(COMPAT_43) 134#ifndef _SYS_SYSPROTO_H_ 135struct getpagesize_args { 136 int dummy; 137}; 138#endif 139 140int 141ogetpagesize(struct thread *td, struct getpagesize_args *uap) 142{ 143 144 td->td_retval[0] = PAGE_SIZE; 145 return (0); 146} 147#endif /* COMPAT_43 */ 148 149 150/* 151 * Memory Map (mmap) system call. Note that the file offset 152 * and address are allowed to be NOT page aligned, though if 153 * the MAP_FIXED flag it set, both must have the same remainder 154 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 155 * page-aligned, the actual mapping starts at trunc_page(addr) 156 * and the return value is adjusted up by the page offset. 157 * 158 * Generally speaking, only character devices which are themselves 159 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 160 * there would be no cache coherency between a descriptor and a VM mapping 161 * both to the same character device. 162 */ 163#ifndef _SYS_SYSPROTO_H_ 164struct mmap_args { 165 void *addr; 166 size_t len; 167 int prot; 168 int flags; 169 int fd; 170 long pad; 171 off_t pos; 172}; 173#endif 174 175int 176sys_mmap(struct thread *td, struct mmap_args *uap) 177{ 178 179 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 180 uap->flags, uap->fd, uap->pos)); 181} 182 183int 184kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, 185 int fd, off_t pos) 186{ 187 188 return (kern_mmap_fpcheck(td, addr0, size, prot, flags, fd, pos, NULL)); 189} 190 191/* 192 * When mmap'ing a file, check_fp_fn may be used for the caller to do any 193 * last-minute validation based on the referenced file in a non-racy way. 194 */ 195int 196kern_mmap_fpcheck(struct thread *td, uintptr_t addr0, size_t size, int prot, 197 int flags, int fd, off_t pos, mmap_check_fp_fn check_fp_fn) 198{ 199 struct vmspace *vms; 200 struct file *fp; 201 vm_offset_t addr; 202 vm_size_t pageoff; 203 vm_prot_t cap_maxprot; 204 int align, error; 205 cap_rights_t rights; 206 207 vms = td->td_proc->p_vmspace; 208 fp = NULL; 209 AUDIT_ARG_FD(fd); 210 addr = addr0; 211 212 /* 213 * Ignore old flags that used to be defined but did not do anything. 214 */ 215 flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 216 217 /* 218 * Enforce the constraints. 219 * Mapping of length 0 is only allowed for old binaries. 220 * Anonymous mapping shall specify -1 as filedescriptor and 221 * zero position for new code. Be nice to ancient a.out 222 * binaries and correct pos for anonymous mapping, since old 223 * ld.so sometimes issues anonymous map requests with non-zero 224 * pos. 225 */ 226 if (!SV_CURPROC_FLAG(SV_AOUT)) { 227 if ((size == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 228 ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 229 return (EINVAL); 230 } else { 231 if ((flags & MAP_ANON) != 0) 232 pos = 0; 233 } 234 235 if (flags & MAP_STACK) { 236 if ((fd != -1) || 237 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 238 return (EINVAL); 239 flags |= MAP_ANON; 240 pos = 0; 241 } 242 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 243 MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 244 MAP_PREFAULT_READ | MAP_GUARD | 245#ifdef MAP_32BIT 246 MAP_32BIT | 247#endif 248 MAP_ALIGNMENT_MASK)) != 0) 249 return (EINVAL); 250 if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 251 return (EINVAL); 252 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 253 return (EINVAL); 254 if (prot != PROT_NONE && 255 (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 256 return (EINVAL); 257 if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 258 pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT | 259 MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) 260 return (EINVAL); 261 262 /* 263 * Align the file position to a page boundary, 264 * and save its page offset component. 265 */ 266 pageoff = (pos & PAGE_MASK); 267 pos -= pageoff; 268 269 /* Adjust size for rounding (on both ends). */ 270 size += pageoff; /* low end... */ 271 size = (vm_size_t) round_page(size); /* hi end */ 272 273 /* Ensure alignment is at least a page and fits in a pointer. */ 274 align = flags & MAP_ALIGNMENT_MASK; 275 if (align != 0 && align != MAP_ALIGNED_SUPER && 276 (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 277 align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 278 return (EINVAL); 279 280 /* 281 * Check for illegal addresses. Watch out for address wrap... Note 282 * that VM_*_ADDRESS are not constants due to casts (argh). 283 */ 284 if (flags & MAP_FIXED) { 285 /* 286 * The specified address must have the same remainder 287 * as the file offset taken modulo PAGE_SIZE, so it 288 * should be aligned after adjustment by pageoff. 289 */ 290 addr -= pageoff; 291 if (addr & PAGE_MASK) 292 return (EINVAL); 293 294 /* Address range must be all in user VM space. */ 295 if (addr < vm_map_min(&vms->vm_map) || 296 addr + size > vm_map_max(&vms->vm_map)) 297 return (EINVAL); 298 if (addr + size < addr) 299 return (EINVAL); 300#ifdef MAP_32BIT 301 if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 302 return (EINVAL); 303 } else if (flags & MAP_32BIT) { 304 /* 305 * For MAP_32BIT, override the hint if it is too high and 306 * do not bother moving the mapping past the heap (since 307 * the heap is usually above 2GB). 308 */ 309 if (addr + size > MAP_32BIT_MAX_ADDR) 310 addr = 0; 311#endif 312 } else { 313 /* 314 * XXX for non-fixed mappings where no hint is provided or 315 * the hint would fall in the potential heap space, 316 * place it after the end of the largest possible heap. 317 * 318 * There should really be a pmap call to determine a reasonable 319 * location. 320 */ 321 if (addr == 0 || 322 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 323 addr < round_page((vm_offset_t)vms->vm_daddr + 324 lim_max(td, RLIMIT_DATA)))) 325 addr = round_page((vm_offset_t)vms->vm_daddr + 326 lim_max(td, RLIMIT_DATA)); 327 } 328 if (size == 0) { 329 /* 330 * Return success without mapping anything for old 331 * binaries that request a page-aligned mapping of 332 * length 0. For modern binaries, this function 333 * returns an error earlier. 334 */ 335 error = 0; 336 } else if ((flags & MAP_GUARD) != 0) { 337 error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 338 VM_PROT_NONE, flags, NULL, pos, FALSE, td); 339 } else if ((flags & MAP_ANON) != 0) { 340 /* 341 * Mapping blank space is trivial. 342 * 343 * This relies on VM_PROT_* matching PROT_*. 344 */ 345 error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 346 VM_PROT_ALL, flags, NULL, pos, FALSE, td); 347 } else { 348 /* 349 * Mapping file, get fp for validation and don't let the 350 * descriptor disappear on us if we block. Check capability 351 * rights, but also return the maximum rights to be combined 352 * with maxprot later. 353 */ 354 cap_rights_init(&rights, CAP_MMAP); 355 if (prot & PROT_READ) 356 cap_rights_set(&rights, CAP_MMAP_R); 357 if ((flags & MAP_SHARED) != 0) { 358 if (prot & PROT_WRITE) 359 cap_rights_set(&rights, CAP_MMAP_W); 360 } 361 if (prot & PROT_EXEC) 362 cap_rights_set(&rights, CAP_MMAP_X); 363 error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 364 if (error != 0) 365 goto done; 366 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 367 td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 368 error = EINVAL; 369 goto done; 370 } 371 if (check_fp_fn != NULL) { 372 error = check_fp_fn(fp, prot, cap_maxprot, flags); 373 if (error != 0) 374 goto done; 375 } 376 /* This relies on VM_PROT_* matching PROT_*. */ 377 error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 378 cap_maxprot, flags, pos, td); 379 } 380 381 if (error == 0) 382 td->td_retval[0] = (register_t) (addr + pageoff); 383done: 384 if (fp) 385 fdrop(fp, td); 386 387 return (error); 388} 389 390#if defined(COMPAT_FREEBSD6) 391int 392freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 393{ 394 395 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 396 uap->flags, uap->fd, uap->pos)); 397} 398#endif 399 400#ifdef COMPAT_43 401#ifndef _SYS_SYSPROTO_H_ 402struct ommap_args { 403 caddr_t addr; 404 int len; 405 int prot; 406 int flags; 407 int fd; 408 long pos; 409}; 410#endif 411int 412ommap(struct thread *td, struct ommap_args *uap) 413{ 414 static const char cvtbsdprot[8] = { 415 0, 416 PROT_EXEC, 417 PROT_WRITE, 418 PROT_EXEC | PROT_WRITE, 419 PROT_READ, 420 PROT_EXEC | PROT_READ, 421 PROT_WRITE | PROT_READ, 422 PROT_EXEC | PROT_WRITE | PROT_READ, 423 }; 424 int flags, prot; 425 426#define OMAP_ANON 0x0002 427#define OMAP_COPY 0x0020 428#define OMAP_SHARED 0x0010 429#define OMAP_FIXED 0x0100 430 431 prot = cvtbsdprot[uap->prot & 0x7]; 432#ifdef COMPAT_FREEBSD32 433#if defined(__amd64__) 434 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 435 prot != 0) 436 prot |= PROT_EXEC; 437#endif 438#endif 439 flags = 0; 440 if (uap->flags & OMAP_ANON) 441 flags |= MAP_ANON; 442 if (uap->flags & OMAP_COPY) 443 flags |= MAP_COPY; 444 if (uap->flags & OMAP_SHARED) 445 flags |= MAP_SHARED; 446 else 447 flags |= MAP_PRIVATE; 448 if (uap->flags & OMAP_FIXED) 449 flags |= MAP_FIXED; 450 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 451 uap->fd, uap->pos)); 452} 453#endif /* COMPAT_43 */ 454 455 456#ifndef _SYS_SYSPROTO_H_ 457struct msync_args { 458 void *addr; 459 size_t len; 460 int flags; 461}; 462#endif 463int 464sys_msync(struct thread *td, struct msync_args *uap) 465{ 466 467 return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 468} 469 470int 471kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 472{ 473 vm_offset_t addr; 474 vm_size_t pageoff; 475 vm_map_t map; 476 int rv; 477 478 addr = addr0; 479 pageoff = (addr & PAGE_MASK); 480 addr -= pageoff; 481 size += pageoff; 482 size = (vm_size_t) round_page(size); 483 if (addr + size < addr) 484 return (EINVAL); 485 486 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 487 return (EINVAL); 488 489 map = &td->td_proc->p_vmspace->vm_map; 490 491 /* 492 * Clean the pages and interpret the return value. 493 */ 494 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 495 (flags & MS_INVALIDATE) != 0); 496 switch (rv) { 497 case KERN_SUCCESS: 498 return (0); 499 case KERN_INVALID_ADDRESS: 500 return (ENOMEM); 501 case KERN_INVALID_ARGUMENT: 502 return (EBUSY); 503 case KERN_FAILURE: 504 return (EIO); 505 default: 506 return (EINVAL); 507 } 508} 509 510#ifndef _SYS_SYSPROTO_H_ 511struct munmap_args { 512 void *addr; 513 size_t len; 514}; 515#endif 516int 517sys_munmap(struct thread *td, struct munmap_args *uap) 518{ 519 520 return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 521} 522 523int 524kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 525{ 526#ifdef HWPMC_HOOKS 527 struct pmckern_map_out pkm; 528 vm_map_entry_t entry; 529 bool pmc_handled; 530#endif 531 vm_offset_t addr; 532 vm_size_t pageoff; 533 vm_map_t map; 534 535 if (size == 0) 536 return (EINVAL); 537 538 addr = addr0; 539 pageoff = (addr & PAGE_MASK); 540 addr -= pageoff; 541 size += pageoff; 542 size = (vm_size_t) round_page(size); 543 if (addr + size < addr) 544 return (EINVAL); 545 546 /* 547 * Check for illegal addresses. Watch out for address wrap... 548 */ 549 map = &td->td_proc->p_vmspace->vm_map; 550 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 551 return (EINVAL); 552 vm_map_lock(map); 553#ifdef HWPMC_HOOKS 554 pmc_handled = false; 555 if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 556 pmc_handled = true; 557 /* 558 * Inform hwpmc if the address range being unmapped contains 559 * an executable region. 560 */ 561 pkm.pm_address = (uintptr_t) NULL; 562 if (vm_map_lookup_entry(map, addr, &entry)) { 563 for (; entry->start < addr + size; 564 entry = entry->next) { 565 if (vm_map_check_protection(map, entry->start, 566 entry->end, VM_PROT_EXECUTE) == TRUE) { 567 pkm.pm_address = (uintptr_t) addr; 568 pkm.pm_size = (size_t) size; 569 break; 570 } 571 } 572 } 573 } 574#endif 575 vm_map_delete(map, addr, addr + size); 576 577#ifdef HWPMC_HOOKS 578 if (__predict_false(pmc_handled)) { 579 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 580 vm_map_lock_downgrade(map); 581 if (pkm.pm_address != (uintptr_t) NULL) 582 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 583 vm_map_unlock_read(map); 584 } else 585#endif 586 vm_map_unlock(map); 587 588 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 589 return (0); 590} 591 592#ifndef _SYS_SYSPROTO_H_ 593struct mprotect_args { 594 const void *addr; 595 size_t len; 596 int prot; 597}; 598#endif 599int 600sys_mprotect(struct thread *td, struct mprotect_args *uap) 601{ 602 603 return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 604} 605 606int 607kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 608{ 609 vm_offset_t addr; 610 vm_size_t pageoff; 611 612 addr = addr0; 613 prot = (prot & VM_PROT_ALL); 614 pageoff = (addr & PAGE_MASK); 615 addr -= pageoff; 616 size += pageoff; 617 size = (vm_size_t) round_page(size); 618#ifdef COMPAT_FREEBSD32 619 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 620 if (((addr + size) & 0xffffffff) < addr) 621 return (EINVAL); 622 } else 623#endif 624 if (addr + size < addr) 625 return (EINVAL); 626 627 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 628 addr + size, prot, FALSE)) { 629 case KERN_SUCCESS: 630 return (0); 631 case KERN_PROTECTION_FAILURE: 632 return (EACCES); 633 case KERN_RESOURCE_SHORTAGE: 634 return (ENOMEM); 635 } 636 return (EINVAL); 637} 638 639#ifndef _SYS_SYSPROTO_H_ 640struct minherit_args { 641 void *addr; 642 size_t len; 643 int inherit; 644}; 645#endif 646int 647sys_minherit(struct thread *td, struct minherit_args *uap) 648{ 649 vm_offset_t addr; 650 vm_size_t size, pageoff; 651 vm_inherit_t inherit; 652 653 addr = (vm_offset_t)uap->addr; 654 size = uap->len; 655 inherit = uap->inherit; 656 657 pageoff = (addr & PAGE_MASK); 658 addr -= pageoff; 659 size += pageoff; 660 size = (vm_size_t) round_page(size); 661 if (addr + size < addr) 662 return (EINVAL); 663 664 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 665 addr + size, inherit)) { 666 case KERN_SUCCESS: 667 return (0); 668 case KERN_PROTECTION_FAILURE: 669 return (EACCES); 670 } 671 return (EINVAL); 672} 673 674#ifndef _SYS_SYSPROTO_H_ 675struct madvise_args { 676 void *addr; 677 size_t len; 678 int behav; 679}; 680#endif 681 682int 683sys_madvise(struct thread *td, struct madvise_args *uap) 684{ 685 686 return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 687} 688 689int 690kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 691{ 692 vm_map_t map; 693 vm_offset_t addr, end, start; 694 int flags; 695 696 /* 697 * Check for our special case, advising the swap pager we are 698 * "immortal." 699 */ 700 if (behav == MADV_PROTECT) { 701 flags = PPROT_SET; 702 return (kern_procctl(td, P_PID, td->td_proc->p_pid, 703 PROC_SPROTECT, &flags)); 704 } 705 706 /* 707 * Check for illegal behavior 708 */ 709 if (behav < 0 || behav > MADV_CORE) 710 return (EINVAL); 711 /* 712 * Check for illegal addresses. Watch out for address wrap... Note 713 * that VM_*_ADDRESS are not constants due to casts (argh). 714 */ 715 map = &td->td_proc->p_vmspace->vm_map; 716 addr = addr0; 717 if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 718 return (EINVAL); 719 if ((addr + len) < addr) 720 return (EINVAL); 721 722 /* 723 * Since this routine is only advisory, we default to conservative 724 * behavior. 725 */ 726 start = trunc_page(addr); 727 end = round_page(addr + len); 728 729 if (vm_map_madvise(map, start, end, behav)) 730 return (EINVAL); 731 return (0); 732} 733 734#ifndef _SYS_SYSPROTO_H_ 735struct mincore_args { 736 const void *addr; 737 size_t len; 738 char *vec; 739}; 740#endif 741 742int 743sys_mincore(struct thread *td, struct mincore_args *uap) 744{ 745 746 return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 747} 748 749int 750kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 751{ 752 vm_offset_t addr, first_addr; 753 vm_offset_t end, cend; 754 pmap_t pmap; 755 vm_map_t map; 756 int error = 0; 757 int vecindex, lastvecindex; 758 vm_map_entry_t current; 759 vm_map_entry_t entry; 760 vm_object_t object; 761 vm_paddr_t locked_pa; 762 vm_page_t m; 763 vm_pindex_t pindex; 764 int mincoreinfo; 765 unsigned int timestamp; 766 boolean_t locked; 767 768 /* 769 * Make sure that the addresses presented are valid for user 770 * mode. 771 */ 772 first_addr = addr = trunc_page(addr0); 773 end = addr + (vm_size_t)round_page(len); 774 map = &td->td_proc->p_vmspace->vm_map; 775 if (end > vm_map_max(map) || end < addr) 776 return (ENOMEM); 777 778 pmap = vmspace_pmap(td->td_proc->p_vmspace); 779 780 vm_map_lock_read(map); 781RestartScan: 782 timestamp = map->timestamp; 783 784 if (!vm_map_lookup_entry(map, addr, &entry)) { 785 vm_map_unlock_read(map); 786 return (ENOMEM); 787 } 788 789 /* 790 * Do this on a map entry basis so that if the pages are not 791 * in the current processes address space, we can easily look 792 * up the pages elsewhere. 793 */ 794 lastvecindex = -1; 795 for (current = entry; current->start < end; current = current->next) { 796 797 /* 798 * check for contiguity 799 */ 800 if (current->end < end && current->next->start > current->end) { 801 vm_map_unlock_read(map); 802 return (ENOMEM); 803 } 804 805 /* 806 * ignore submaps (for now) or null objects 807 */ 808 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 809 current->object.vm_object == NULL) 810 continue; 811 812 /* 813 * limit this scan to the current map entry and the 814 * limits for the mincore call 815 */ 816 if (addr < current->start) 817 addr = current->start; 818 cend = current->end; 819 if (cend > end) 820 cend = end; 821 822 /* 823 * scan this entry one page at a time 824 */ 825 while (addr < cend) { 826 /* 827 * Check pmap first, it is likely faster, also 828 * it can provide info as to whether we are the 829 * one referencing or modifying the page. 830 */ 831 object = NULL; 832 locked_pa = 0; 833 retry: 834 m = NULL; 835 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 836 if (mincore_mapped) { 837 /* 838 * We only care about this pmap's 839 * mapping of the page, if any. 840 */ 841 if (locked_pa != 0) { 842 vm_page_unlock(PHYS_TO_VM_PAGE( 843 locked_pa)); 844 } 845 } else if (locked_pa != 0) { 846 /* 847 * The page is mapped by this process but not 848 * both accessed and modified. It is also 849 * managed. Acquire the object lock so that 850 * other mappings might be examined. 851 */ 852 m = PHYS_TO_VM_PAGE(locked_pa); 853 if (m->object != object) { 854 if (object != NULL) 855 VM_OBJECT_WUNLOCK(object); 856 object = m->object; 857 locked = VM_OBJECT_TRYWLOCK(object); 858 vm_page_unlock(m); 859 if (!locked) { 860 VM_OBJECT_WLOCK(object); 861 vm_page_lock(m); 862 goto retry; 863 } 864 } else 865 vm_page_unlock(m); 866 KASSERT(m->valid == VM_PAGE_BITS_ALL, 867 ("mincore: page %p is mapped but invalid", 868 m)); 869 } else if (mincoreinfo == 0) { 870 /* 871 * The page is not mapped by this process. If 872 * the object implements managed pages, then 873 * determine if the page is resident so that 874 * the mappings might be examined. 875 */ 876 if (current->object.vm_object != object) { 877 if (object != NULL) 878 VM_OBJECT_WUNLOCK(object); 879 object = current->object.vm_object; 880 VM_OBJECT_WLOCK(object); 881 } 882 if (object->type == OBJT_DEFAULT || 883 object->type == OBJT_SWAP || 884 object->type == OBJT_VNODE) { 885 pindex = OFF_TO_IDX(current->offset + 886 (addr - current->start)); 887 m = vm_page_lookup(object, pindex); 888 if (m != NULL && m->valid == 0) 889 m = NULL; 890 if (m != NULL) 891 mincoreinfo = MINCORE_INCORE; 892 } 893 } 894 if (m != NULL) { 895 /* Examine other mappings to the page. */ 896 if (m->dirty == 0 && pmap_is_modified(m)) 897 vm_page_dirty(m); 898 if (m->dirty != 0) 899 mincoreinfo |= MINCORE_MODIFIED_OTHER; 900 /* 901 * The first test for PGA_REFERENCED is an 902 * optimization. The second test is 903 * required because a concurrent pmap 904 * operation could clear the last reference 905 * and set PGA_REFERENCED before the call to 906 * pmap_is_referenced(). 907 */ 908 if ((m->aflags & PGA_REFERENCED) != 0 || 909 pmap_is_referenced(m) || 910 (m->aflags & PGA_REFERENCED) != 0) 911 mincoreinfo |= MINCORE_REFERENCED_OTHER; 912 } 913 if (object != NULL) 914 VM_OBJECT_WUNLOCK(object); 915 916 /* 917 * subyte may page fault. In case it needs to modify 918 * the map, we release the lock. 919 */ 920 vm_map_unlock_read(map); 921 922 /* 923 * calculate index into user supplied byte vector 924 */ 925 vecindex = atop(addr - first_addr); 926 927 /* 928 * If we have skipped map entries, we need to make sure that 929 * the byte vector is zeroed for those skipped entries. 930 */ 931 while ((lastvecindex + 1) < vecindex) { 932 ++lastvecindex; 933 error = subyte(vec + lastvecindex, 0); 934 if (error) { 935 error = EFAULT; 936 goto done2; 937 } 938 } 939 940 /* 941 * Pass the page information to the user 942 */ 943 error = subyte(vec + vecindex, mincoreinfo); 944 if (error) { 945 error = EFAULT; 946 goto done2; 947 } 948 949 /* 950 * If the map has changed, due to the subyte, the previous 951 * output may be invalid. 952 */ 953 vm_map_lock_read(map); 954 if (timestamp != map->timestamp) 955 goto RestartScan; 956 957 lastvecindex = vecindex; 958 addr += PAGE_SIZE; 959 } 960 } 961 962 /* 963 * subyte may page fault. In case it needs to modify 964 * the map, we release the lock. 965 */ 966 vm_map_unlock_read(map); 967 968 /* 969 * Zero the last entries in the byte vector. 970 */ 971 vecindex = atop(end - first_addr); 972 while ((lastvecindex + 1) < vecindex) { 973 ++lastvecindex; 974 error = subyte(vec + lastvecindex, 0); 975 if (error) { 976 error = EFAULT; 977 goto done2; 978 } 979 } 980 981 /* 982 * If the map has changed, due to the subyte, the previous 983 * output may be invalid. 984 */ 985 vm_map_lock_read(map); 986 if (timestamp != map->timestamp) 987 goto RestartScan; 988 vm_map_unlock_read(map); 989done2: 990 return (error); 991} 992 993#ifndef _SYS_SYSPROTO_H_ 994struct mlock_args { 995 const void *addr; 996 size_t len; 997}; 998#endif 999int 1000sys_mlock(struct thread *td, struct mlock_args *uap) 1001{ 1002 1003 return (kern_mlock(td->td_proc, td->td_ucred, 1004 __DECONST(uintptr_t, uap->addr), uap->len)); 1005} 1006 1007int 1008kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 1009{ 1010 vm_offset_t addr, end, last, start; 1011 vm_size_t npages, size; 1012 vm_map_t map; 1013 unsigned long nsize; 1014 int error; 1015 1016 error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 1017 if (error) 1018 return (error); 1019 addr = addr0; 1020 size = len; 1021 last = addr + size; 1022 start = trunc_page(addr); 1023 end = round_page(last); 1024 if (last < addr || end < addr) 1025 return (EINVAL); 1026 npages = atop(end - start); 1027 if (npages > vm_page_max_wired) 1028 return (ENOMEM); 1029 map = &proc->p_vmspace->vm_map; 1030 PROC_LOCK(proc); 1031 nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1032 if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 1033 PROC_UNLOCK(proc); 1034 return (ENOMEM); 1035 } 1036 PROC_UNLOCK(proc); 1037 if (npages + vm_cnt.v_wire_count > vm_page_max_wired) 1038 return (EAGAIN); 1039#ifdef RACCT 1040 if (racct_enable) { 1041 PROC_LOCK(proc); 1042 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1043 PROC_UNLOCK(proc); 1044 if (error != 0) 1045 return (ENOMEM); 1046 } 1047#endif 1048 error = vm_map_wire(map, start, end, 1049 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1050#ifdef RACCT 1051 if (racct_enable && error != KERN_SUCCESS) { 1052 PROC_LOCK(proc); 1053 racct_set(proc, RACCT_MEMLOCK, 1054 ptoa(pmap_wired_count(map->pmap))); 1055 PROC_UNLOCK(proc); 1056 } 1057#endif 1058 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1059} 1060 1061#ifndef _SYS_SYSPROTO_H_ 1062struct mlockall_args { 1063 int how; 1064}; 1065#endif 1066 1067int 1068sys_mlockall(struct thread *td, struct mlockall_args *uap) 1069{ 1070 vm_map_t map; 1071 int error; 1072 1073 map = &td->td_proc->p_vmspace->vm_map; 1074 error = priv_check(td, PRIV_VM_MLOCK); 1075 if (error) 1076 return (error); 1077 1078 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1079 return (EINVAL); 1080 1081 /* 1082 * If wiring all pages in the process would cause it to exceed 1083 * a hard resource limit, return ENOMEM. 1084 */ 1085 if (!old_mlock && uap->how & MCL_CURRENT) { 1086 PROC_LOCK(td->td_proc); 1087 if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) { 1088 PROC_UNLOCK(td->td_proc); 1089 return (ENOMEM); 1090 } 1091 PROC_UNLOCK(td->td_proc); 1092 } 1093#ifdef RACCT 1094 if (racct_enable) { 1095 PROC_LOCK(td->td_proc); 1096 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1097 PROC_UNLOCK(td->td_proc); 1098 if (error != 0) 1099 return (ENOMEM); 1100 } 1101#endif 1102 1103 if (uap->how & MCL_FUTURE) { 1104 vm_map_lock(map); 1105 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1106 vm_map_unlock(map); 1107 error = 0; 1108 } 1109 1110 if (uap->how & MCL_CURRENT) { 1111 /* 1112 * P1003.1-2001 mandates that all currently mapped pages 1113 * will be memory resident and locked (wired) upon return 1114 * from mlockall(). vm_map_wire() will wire pages, by 1115 * calling vm_fault_wire() for each page in the region. 1116 */ 1117 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1118 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1119 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1120 } 1121#ifdef RACCT 1122 if (racct_enable && error != KERN_SUCCESS) { 1123 PROC_LOCK(td->td_proc); 1124 racct_set(td->td_proc, RACCT_MEMLOCK, 1125 ptoa(pmap_wired_count(map->pmap))); 1126 PROC_UNLOCK(td->td_proc); 1127 } 1128#endif 1129 1130 return (error); 1131} 1132 1133#ifndef _SYS_SYSPROTO_H_ 1134struct munlockall_args { 1135 register_t dummy; 1136}; 1137#endif 1138 1139int 1140sys_munlockall(struct thread *td, struct munlockall_args *uap) 1141{ 1142 vm_map_t map; 1143 int error; 1144 1145 map = &td->td_proc->p_vmspace->vm_map; 1146 error = priv_check(td, PRIV_VM_MUNLOCK); 1147 if (error) 1148 return (error); 1149 1150 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1151 vm_map_lock(map); 1152 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1153 vm_map_unlock(map); 1154 1155 /* Forcibly unwire all pages. */ 1156 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1157 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1158#ifdef RACCT 1159 if (racct_enable && error == KERN_SUCCESS) { 1160 PROC_LOCK(td->td_proc); 1161 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1162 PROC_UNLOCK(td->td_proc); 1163 } 1164#endif 1165 1166 return (error); 1167} 1168 1169#ifndef _SYS_SYSPROTO_H_ 1170struct munlock_args { 1171 const void *addr; 1172 size_t len; 1173}; 1174#endif 1175int 1176sys_munlock(struct thread *td, struct munlock_args *uap) 1177{ 1178 1179 return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 1180} 1181 1182int 1183kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 1184{ 1185 vm_offset_t addr, end, last, start; 1186#ifdef RACCT 1187 vm_map_t map; 1188#endif 1189 int error; 1190 1191 error = priv_check(td, PRIV_VM_MUNLOCK); 1192 if (error) 1193 return (error); 1194 addr = addr0; 1195 last = addr + size; 1196 start = trunc_page(addr); 1197 end = round_page(last); 1198 if (last < addr || end < addr) 1199 return (EINVAL); 1200 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1201 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1202#ifdef RACCT 1203 if (racct_enable && error == KERN_SUCCESS) { 1204 PROC_LOCK(td->td_proc); 1205 map = &td->td_proc->p_vmspace->vm_map; 1206 racct_set(td->td_proc, RACCT_MEMLOCK, 1207 ptoa(pmap_wired_count(map->pmap))); 1208 PROC_UNLOCK(td->td_proc); 1209 } 1210#endif 1211 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1212} 1213 1214/* 1215 * vm_mmap_vnode() 1216 * 1217 * Helper function for vm_mmap. Perform sanity check specific for mmap 1218 * operations on vnodes. 1219 */ 1220int 1221vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1222 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1223 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1224 boolean_t *writecounted) 1225{ 1226 struct vattr va; 1227 vm_object_t obj; 1228 vm_ooffset_t foff; 1229 struct ucred *cred; 1230 int error, flags, locktype; 1231 1232 cred = td->td_ucred; 1233 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1234 locktype = LK_EXCLUSIVE; 1235 else 1236 locktype = LK_SHARED; 1237 if ((error = vget(vp, locktype, td)) != 0) 1238 return (error); 1239 AUDIT_ARG_VNODE1(vp); 1240 foff = *foffp; 1241 flags = *flagsp; 1242 obj = vp->v_object; 1243 if (vp->v_type == VREG) { 1244 /* 1245 * Get the proper underlying object 1246 */ 1247 if (obj == NULL) { 1248 error = EINVAL; 1249 goto done; 1250 } 1251 if (obj->type == OBJT_VNODE && obj->handle != vp) { 1252 vput(vp); 1253 vp = (struct vnode *)obj->handle; 1254 /* 1255 * Bypass filesystems obey the mpsafety of the 1256 * underlying fs. Tmpfs never bypasses. 1257 */ 1258 error = vget(vp, locktype, td); 1259 if (error != 0) 1260 return (error); 1261 } 1262 if (locktype == LK_EXCLUSIVE) { 1263 *writecounted = TRUE; 1264 vnode_pager_update_writecount(obj, 0, objsize); 1265 } 1266 } else { 1267 error = EINVAL; 1268 goto done; 1269 } 1270 if ((error = VOP_GETATTR(vp, &va, cred))) 1271 goto done; 1272#ifdef MAC 1273 /* This relies on VM_PROT_* matching PROT_*. */ 1274 error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1275 if (error != 0) 1276 goto done; 1277#endif 1278 if ((flags & MAP_SHARED) != 0) { 1279 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1280 if (prot & VM_PROT_WRITE) { 1281 error = EPERM; 1282 goto done; 1283 } 1284 *maxprotp &= ~VM_PROT_WRITE; 1285 } 1286 } 1287 /* 1288 * If it is a regular file without any references 1289 * we do not need to sync it. 1290 * Adjust object size to be the size of actual file. 1291 */ 1292 objsize = round_page(va.va_size); 1293 if (va.va_nlink == 0) 1294 flags |= MAP_NOSYNC; 1295 if (obj->type == OBJT_VNODE) { 1296 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1297 cred); 1298 if (obj == NULL) { 1299 error = ENOMEM; 1300 goto done; 1301 } 1302 } else { 1303 KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1304 ("wrong object type")); 1305 VM_OBJECT_WLOCK(obj); 1306 vm_object_reference_locked(obj); 1307#if VM_NRESERVLEVEL > 0 1308 vm_object_color(obj, 0); 1309#endif 1310 VM_OBJECT_WUNLOCK(obj); 1311 } 1312 *objp = obj; 1313 *flagsp = flags; 1314 1315 vfs_mark_atime(vp, cred); 1316 1317done: 1318 if (error != 0 && *writecounted) { 1319 *writecounted = FALSE; 1320 vnode_pager_update_writecount(obj, objsize, 0); 1321 } 1322 vput(vp); 1323 return (error); 1324} 1325 1326/* 1327 * vm_mmap_cdev() 1328 * 1329 * Helper function for vm_mmap. Perform sanity check specific for mmap 1330 * operations on cdevs. 1331 */ 1332int 1333vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1334 vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 1335 vm_ooffset_t *foff, vm_object_t *objp) 1336{ 1337 vm_object_t obj; 1338 int error, flags; 1339 1340 flags = *flagsp; 1341 1342 if (dsw->d_flags & D_MMAP_ANON) { 1343 *objp = NULL; 1344 *foff = 0; 1345 *maxprotp = VM_PROT_ALL; 1346 *flagsp |= MAP_ANON; 1347 return (0); 1348 } 1349 /* 1350 * cdevs do not provide private mappings of any kind. 1351 */ 1352 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1353 (prot & VM_PROT_WRITE) != 0) 1354 return (EACCES); 1355 if (flags & (MAP_PRIVATE|MAP_COPY)) 1356 return (EINVAL); 1357 /* 1358 * Force device mappings to be shared. 1359 */ 1360 flags |= MAP_SHARED; 1361#ifdef MAC_XXX 1362 error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 1363 if (error != 0) 1364 return (error); 1365#endif 1366 /* 1367 * First, try d_mmap_single(). If that is not implemented 1368 * (returns ENODEV), fall back to using the device pager. 1369 * Note that d_mmap_single() must return a reference to the 1370 * object (it needs to bump the reference count of the object 1371 * it returns somehow). 1372 * 1373 * XXX assumes VM_PROT_* == PROT_* 1374 */ 1375 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1376 if (error != ENODEV) 1377 return (error); 1378 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1379 td->td_ucred); 1380 if (obj == NULL) 1381 return (EINVAL); 1382 *objp = obj; 1383 *flagsp = flags; 1384 return (0); 1385} 1386 1387/* 1388 * vm_mmap() 1389 * 1390 * Internal version of mmap used by exec, sys5 shared memory, and 1391 * various device drivers. Handle is either a vnode pointer, a 1392 * character device, or NULL for MAP_ANON. 1393 */ 1394int 1395vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1396 vm_prot_t maxprot, int flags, 1397 objtype_t handle_type, void *handle, 1398 vm_ooffset_t foff) 1399{ 1400 vm_object_t object; 1401 struct thread *td = curthread; 1402 int error; 1403 boolean_t writecounted; 1404 1405 if (size == 0) 1406 return (EINVAL); 1407 1408 size = round_page(size); 1409 object = NULL; 1410 writecounted = FALSE; 1411 1412 /* 1413 * Lookup/allocate object. 1414 */ 1415 switch (handle_type) { 1416 case OBJT_DEVICE: { 1417 struct cdevsw *dsw; 1418 struct cdev *cdev; 1419 int ref; 1420 1421 cdev = handle; 1422 dsw = dev_refthread(cdev, &ref); 1423 if (dsw == NULL) 1424 return (ENXIO); 1425 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 1426 dsw, &foff, &object); 1427 dev_relthread(cdev, ref); 1428 break; 1429 } 1430 case OBJT_VNODE: 1431 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1432 handle, &foff, &object, &writecounted); 1433 break; 1434 case OBJT_DEFAULT: 1435 if (handle == NULL) { 1436 error = 0; 1437 break; 1438 } 1439 /* FALLTHROUGH */ 1440 default: 1441 error = EINVAL; 1442 break; 1443 } 1444 if (error) 1445 return (error); 1446 1447 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1448 foff, writecounted, td); 1449 if (error != 0 && object != NULL) { 1450 /* 1451 * If this mapping was accounted for in the vnode's 1452 * writecount, then undo that now. 1453 */ 1454 if (writecounted) 1455 vnode_pager_release_writecount(object, 0, size); 1456 vm_object_deallocate(object); 1457 } 1458 return (error); 1459} 1460 1461/* 1462 * Internal version of mmap that maps a specific VM object into an 1463 * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 1464 */ 1465int 1466vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1467 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 1468 boolean_t writecounted, struct thread *td) 1469{ 1470 boolean_t curmap, fitit; 1471 vm_offset_t max_addr; 1472 int docow, error, findspace, rv; 1473 1474 curmap = map == &td->td_proc->p_vmspace->vm_map; 1475 if (curmap) { 1476 PROC_LOCK(td->td_proc); 1477 if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) { 1478 PROC_UNLOCK(td->td_proc); 1479 return (ENOMEM); 1480 } 1481 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1482 PROC_UNLOCK(td->td_proc); 1483 return (ENOMEM); 1484 } 1485 if (!old_mlock && map->flags & MAP_WIREFUTURE) { 1486 if (ptoa(pmap_wired_count(map->pmap)) + size > 1487 lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) { 1488 racct_set_force(td->td_proc, RACCT_VMEM, 1489 map->size); 1490 PROC_UNLOCK(td->td_proc); 1491 return (ENOMEM); 1492 } 1493 error = racct_set(td->td_proc, RACCT_MEMLOCK, 1494 ptoa(pmap_wired_count(map->pmap)) + size); 1495 if (error != 0) { 1496 racct_set_force(td->td_proc, RACCT_VMEM, 1497 map->size); 1498 PROC_UNLOCK(td->td_proc); 1499 return (error); 1500 } 1501 } 1502 PROC_UNLOCK(td->td_proc); 1503 } 1504 1505 /* 1506 * We currently can only deal with page aligned file offsets. 1507 * The mmap() system call already enforces this by subtracting 1508 * the page offset from the file offset, but checking here 1509 * catches errors in device drivers (e.g. d_single_mmap() 1510 * callbacks) and other internal mapping requests (such as in 1511 * exec). 1512 */ 1513 if (foff & PAGE_MASK) 1514 return (EINVAL); 1515 1516 if ((flags & MAP_FIXED) == 0) { 1517 fitit = TRUE; 1518 *addr = round_page(*addr); 1519 } else { 1520 if (*addr != trunc_page(*addr)) 1521 return (EINVAL); 1522 fitit = FALSE; 1523 } 1524 1525 if (flags & MAP_ANON) { 1526 if (object != NULL || foff != 0) 1527 return (EINVAL); 1528 docow = 0; 1529 } else if (flags & MAP_PREFAULT_READ) 1530 docow = MAP_PREFAULT; 1531 else 1532 docow = MAP_PREFAULT_PARTIAL; 1533 1534 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1535 docow |= MAP_COPY_ON_WRITE; 1536 if (flags & MAP_NOSYNC) 1537 docow |= MAP_DISABLE_SYNCER; 1538 if (flags & MAP_NOCORE) 1539 docow |= MAP_DISABLE_COREDUMP; 1540 /* Shared memory is also shared with children. */ 1541 if (flags & MAP_SHARED) 1542 docow |= MAP_INHERIT_SHARE; 1543 if (writecounted) 1544 docow |= MAP_VN_WRITECOUNT; 1545 if (flags & MAP_STACK) { 1546 if (object != NULL) 1547 return (EINVAL); 1548 docow |= MAP_STACK_GROWS_DOWN; 1549 } 1550 if ((flags & MAP_EXCL) != 0) 1551 docow |= MAP_CHECK_EXCL; 1552 if ((flags & MAP_GUARD) != 0) 1553 docow |= MAP_CREATE_GUARD; 1554 1555 if (fitit) { 1556 if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 1557 findspace = VMFS_SUPER_SPACE; 1558 else if ((flags & MAP_ALIGNMENT_MASK) != 0) 1559 findspace = VMFS_ALIGNED_SPACE(flags >> 1560 MAP_ALIGNMENT_SHIFT); 1561 else 1562 findspace = VMFS_OPTIMAL_SPACE; 1563 max_addr = 0; 1564#ifdef MAP_32BIT 1565 if ((flags & MAP_32BIT) != 0) 1566 max_addr = MAP_32BIT_MAX_ADDR; 1567#endif 1568 if (curmap) { 1569 rv = vm_map_find_min(map, object, foff, addr, size, 1570 round_page((vm_offset_t)td->td_proc->p_vmspace-> 1571 vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 1572 findspace, prot, maxprot, docow); 1573 } else { 1574 rv = vm_map_find(map, object, foff, addr, size, 1575 max_addr, findspace, prot, maxprot, docow); 1576 } 1577 } else { 1578 rv = vm_map_fixed(map, object, foff, *addr, size, 1579 prot, maxprot, docow); 1580 } 1581 1582 if (rv == KERN_SUCCESS) { 1583 /* 1584 * If the process has requested that all future mappings 1585 * be wired, then heed this. 1586 */ 1587 if (map->flags & MAP_WIREFUTURE) { 1588 vm_map_wire(map, *addr, *addr + size, 1589 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1590 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1591 } 1592 } 1593 return (vm_mmap_to_errno(rv)); 1594} 1595 1596/* 1597 * Translate a Mach VM return code to zero on success or the appropriate errno 1598 * on failure. 1599 */ 1600int 1601vm_mmap_to_errno(int rv) 1602{ 1603 1604 switch (rv) { 1605 case KERN_SUCCESS: 1606 return (0); 1607 case KERN_INVALID_ADDRESS: 1608 case KERN_NO_SPACE: 1609 return (ENOMEM); 1610 case KERN_PROTECTION_FAILURE: 1611 return (EACCES); 1612 default: 1613 return (EINVAL); 1614 } 1615} 1616