1/* 2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * File: ubc_subr.c 30 * Author: Umesh Vaishampayan [umeshv@apple.com] 31 * 05-Aug-1999 umeshv Created. 32 * 33 * Functions related to Unified Buffer cache. 34 * 35 * Caller of UBC functions MUST have a valid reference on the vnode. 36 * 37 */ 38 39#include <sys/types.h> 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/lock.h> 43#include <sys/mman.h> 44#include <sys/mount_internal.h> 45#include <sys/vnode_internal.h> 46#include <sys/ubc_internal.h> 47#include <sys/ucred.h> 48#include <sys/proc_internal.h> 49#include <sys/kauth.h> 50#include <sys/buf.h> 51#include <sys/user.h> 52#include <sys/codesign.h> 53 54#include <mach/mach_types.h> 55#include <mach/memory_object_types.h> 56#include <mach/memory_object_control.h> 57#include <mach/vm_map.h> 58#include <mach/upl.h> 59 60#include <kern/kern_types.h> 61#include <kern/kalloc.h> 62#include <kern/zalloc.h> 63#include <kern/thread.h> 64#include <vm/vm_kern.h> 65#include <vm/vm_protos.h> /* last */ 66 67#include <libkern/crypto/sha1.h> 68 69#include <security/mac_framework.h> 70 71/* XXX These should be in a BSD accessible Mach header, but aren't. */ 72extern kern_return_t memory_object_pages_resident(memory_object_control_t, 73 boolean_t *); 74extern kern_return_t memory_object_signed(memory_object_control_t control, 75 boolean_t is_signed); 76extern void Debugger(const char *message); 77 78 79/* XXX no one uses this interface! */ 80kern_return_t ubc_page_op_with_control( 81 memory_object_control_t control, 82 off_t f_offset, 83 int ops, 84 ppnum_t *phys_entryp, 85 int *flagsp); 86 87 88#if DIAGNOSTIC 89#if defined(assert) 90#undef assert() 91#endif 92#define assert(cond) \ 93 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond))) 94#else 95#include <kern/assert.h> 96#endif /* DIAGNOSTIC */ 97 98static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize); 99static int ubc_umcallback(vnode_t, void *); 100static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *); 101static void ubc_cs_free(struct ubc_info *uip); 102 103struct zone *ubc_info_zone; 104 105 106/* 107 * CODESIGNING 108 * Routines to navigate code signing data structures in the kernel... 109 */ 110static boolean_t 111cs_valid_range( 112 const void *start, 113 const void *end, 114 const void *lower_bound, 115 const void *upper_bound) 116{ 117 if (upper_bound < lower_bound || 118 end < start) { 119 return FALSE; 120 } 121 122 if (start < lower_bound || 123 end > upper_bound) { 124 return FALSE; 125 } 126 127 return TRUE; 128} 129 130/* 131 * Magic numbers used by Code Signing 132 */ 133enum { 134 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */ 135 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */ 136 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */ 137 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */ 138 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */ 139 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */ 140 141 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */ 142}; 143 144 145/* 146 * Structure of an embedded-signature SuperBlob 147 */ 148typedef struct __BlobIndex { 149 uint32_t type; /* type of entry */ 150 uint32_t offset; /* offset of entry */ 151} CS_BlobIndex; 152 153typedef struct __SuperBlob { 154 uint32_t magic; /* magic number */ 155 uint32_t length; /* total length of SuperBlob */ 156 uint32_t count; /* number of index entries following */ 157 CS_BlobIndex index[]; /* (count) entries */ 158 /* followed by Blobs in no particular order as indicated by offsets in index */ 159} CS_SuperBlob; 160 161 162/* 163 * C form of a CodeDirectory. 164 */ 165typedef struct __CodeDirectory { 166 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */ 167 uint32_t length; /* total length of CodeDirectory blob */ 168 uint32_t version; /* compatibility version */ 169 uint32_t flags; /* setup and mode flags */ 170 uint32_t hashOffset; /* offset of hash slot element at index zero */ 171 uint32_t identOffset; /* offset of identifier string */ 172 uint32_t nSpecialSlots; /* number of special hash slots */ 173 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */ 174 uint32_t codeLimit; /* limit to main image signature range */ 175 uint8_t hashSize; /* size of each hash in bytes */ 176 uint8_t hashType; /* type of hash (cdHashType* constants) */ 177 uint8_t spare1; /* unused (must be zero) */ 178 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */ 179 uint32_t spare2; /* unused (must be zero) */ 180 /* followed by dynamic content as located by offset fields above */ 181} CS_CodeDirectory; 182 183 184/* 185 * Locate the CodeDirectory from an embedded signature blob 186 */ 187static const 188CS_CodeDirectory *findCodeDirectory( 189 const CS_SuperBlob *embedded, 190 char *lower_bound, 191 char *upper_bound) 192{ 193 const CS_CodeDirectory *cd = NULL; 194 195 if (embedded && 196 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) && 197 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) { 198 const CS_BlobIndex *limit; 199 const CS_BlobIndex *p; 200 201 limit = &embedded->index[ntohl(embedded->count)]; 202 if (!cs_valid_range(&embedded->index[0], limit, 203 lower_bound, upper_bound)) { 204 return NULL; 205 } 206 for (p = embedded->index; p < limit; ++p) { 207 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) { 208 const unsigned char *base; 209 210 base = (const unsigned char *)embedded; 211 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset)); 212 break; 213 } 214 } 215 } else { 216 /* 217 * Detached signatures come as a bare CS_CodeDirectory, 218 * without a blob. 219 */ 220 cd = (const CS_CodeDirectory *) embedded; 221 } 222 if (cd && 223 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) && 224 cs_valid_range(cd, (const char *) cd + ntohl(cd->length), 225 lower_bound, upper_bound) && 226 cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset), 227 lower_bound, upper_bound) && 228 cs_valid_range(cd, (const char *) cd + 229 ntohl(cd->hashOffset) + 230 (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN), 231 lower_bound, upper_bound) && 232 233 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) { 234 return cd; 235 } 236 237 // not found or not a valid code directory 238 return NULL; 239} 240 241 242/* 243 * Locating a page hash 244 */ 245static const unsigned char * 246hashes( 247 const CS_CodeDirectory *cd, 248 unsigned page, 249 char *lower_bound, 250 char *upper_bound) 251{ 252 const unsigned char *base, *top, *hash; 253 uint32_t nCodeSlots; 254 255 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound)); 256 257 base = (const unsigned char *)cd + ntohl(cd->hashOffset); 258 nCodeSlots = ntohl(cd->nCodeSlots); 259 top = base + nCodeSlots * SHA1_RESULTLEN; 260 if (!cs_valid_range(base, top, 261 lower_bound, upper_bound) || 262 page > nCodeSlots) { 263 return NULL; 264 } 265 assert(page < nCodeSlots); 266 267 hash = base + page * SHA1_RESULTLEN; 268 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN, 269 lower_bound, upper_bound)) { 270 hash = NULL; 271 } 272 273 return hash; 274} 275/* 276 * CODESIGNING 277 * End of routines to navigate code signing data structures in the kernel. 278 */ 279 280 281/* 282 * ubc_init 283 * 284 * Initialization of the zone for Unified Buffer Cache. 285 * 286 * Parameters: (void) 287 * 288 * Returns: (void) 289 * 290 * Implicit returns: 291 * ubc_info_zone(global) initialized for subsequent allocations 292 */ 293__private_extern__ void 294ubc_init(void) 295{ 296 int i; 297 298 i = (vm_size_t) sizeof (struct ubc_info); 299 300 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone"); 301} 302 303 304/* 305 * ubc_info_init 306 * 307 * Allocate and attach an empty ubc_info structure to a vnode 308 * 309 * Parameters: vp Pointer to the vnode 310 * 311 * Returns: 0 Success 312 * vnode_size:ENOMEM Not enough space 313 * vnode_size:??? Other error from vnode_getattr 314 * 315 */ 316int 317ubc_info_init(struct vnode *vp) 318{ 319 return(ubc_info_init_internal(vp, 0, 0)); 320} 321 322 323/* 324 * ubc_info_init_withsize 325 * 326 * Allocate and attach a sized ubc_info structure to a vnode 327 * 328 * Parameters: vp Pointer to the vnode 329 * filesize The size of the file 330 * 331 * Returns: 0 Success 332 * vnode_size:ENOMEM Not enough space 333 * vnode_size:??? Other error from vnode_getattr 334 */ 335int 336ubc_info_init_withsize(struct vnode *vp, off_t filesize) 337{ 338 return(ubc_info_init_internal(vp, 1, filesize)); 339} 340 341 342/* 343 * ubc_info_init_internal 344 * 345 * Allocate and attach a ubc_info structure to a vnode 346 * 347 * Parameters: vp Pointer to the vnode 348 * withfsize{0,1} Zero if the size should be obtained 349 * from the vnode; otherwise, use filesize 350 * filesize The size of the file, if withfsize == 1 351 * 352 * Returns: 0 Success 353 * vnode_size:ENOMEM Not enough space 354 * vnode_size:??? Other error from vnode_getattr 355 * 356 * Notes: We call a blocking zalloc(), and the zone was created as an 357 * expandable and collectable zone, so if no memory is available, 358 * it is possible for zalloc() to block indefinitely. zalloc() 359 * may also panic if the zone of zones is exhausted, since it's 360 * NOT expandable. 361 * 362 * We unconditionally call vnode_pager_setup(), even if this is 363 * a reuse of a ubc_info; in that case, we should probably assert 364 * that it does not already have a pager association, but do not. 365 * 366 * Since memory_object_create_named() can only fail from receiving 367 * an invalid pager argument, the explicit check and panic is 368 * merely precautionary. 369 */ 370static int 371ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize) 372{ 373 register struct ubc_info *uip; 374 void * pager; 375 int error = 0; 376 kern_return_t kret; 377 memory_object_control_t control; 378 379 uip = vp->v_ubcinfo; 380 381 /* 382 * If there is not already a ubc_info attached to the vnode, we 383 * attach one; otherwise, we will reuse the one that's there. 384 */ 385 if (uip == UBC_INFO_NULL) { 386 387 uip = (struct ubc_info *) zalloc(ubc_info_zone); 388 bzero((char *)uip, sizeof(struct ubc_info)); 389 390 uip->ui_vnode = vp; 391 uip->ui_flags = UI_INITED; 392 uip->ui_ucred = NOCRED; 393 } 394 assert(uip->ui_flags != UI_NONE); 395 assert(uip->ui_vnode == vp); 396 397 /* now set this ubc_info in the vnode */ 398 vp->v_ubcinfo = uip; 399 400 /* 401 * Allocate a pager object for this vnode 402 * 403 * XXX The value of the pager parameter is currently ignored. 404 * XXX Presumably, this API changed to avoid the race between 405 * XXX setting the pager and the UI_HASPAGER flag. 406 */ 407 pager = (void *)vnode_pager_setup(vp, uip->ui_pager); 408 assert(pager); 409 410 /* 411 * Explicitly set the pager into the ubc_info, after setting the 412 * UI_HASPAGER flag. 413 */ 414 SET(uip->ui_flags, UI_HASPAGER); 415 uip->ui_pager = pager; 416 417 /* 418 * Note: We can not use VNOP_GETATTR() to get accurate 419 * value of ui_size because this may be an NFS vnode, and 420 * nfs_getattr() can call vinvalbuf(); if this happens, 421 * ubc_info is not set up to deal with that event. 422 * So use bogus size. 423 */ 424 425 /* 426 * create a vnode - vm_object association 427 * memory_object_create_named() creates a "named" reference on the 428 * memory object we hold this reference as long as the vnode is 429 * "alive." Since memory_object_create_named() took its own reference 430 * on the vnode pager we passed it, we can drop the reference 431 * vnode_pager_setup() returned here. 432 */ 433 kret = memory_object_create_named(pager, 434 (memory_object_size_t)uip->ui_size, &control); 435 vnode_pager_deallocate(pager); 436 if (kret != KERN_SUCCESS) 437 panic("ubc_info_init: memory_object_create_named returned %d", kret); 438 439 assert(control); 440 uip->ui_control = control; /* cache the value of the mo control */ 441 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */ 442 443 if (withfsize == 0) { 444 /* initialize the size */ 445 error = vnode_size(vp, &uip->ui_size, vfs_context_current()); 446 if (error) 447 uip->ui_size = 0; 448 } else { 449 uip->ui_size = filesize; 450 } 451 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */ 452 453 return (error); 454} 455 456 457/* 458 * ubc_info_free 459 * 460 * Free a ubc_info structure 461 * 462 * Parameters: uip A pointer to the ubc_info to free 463 * 464 * Returns: (void) 465 * 466 * Notes: If there is a credential that has subsequently been associated 467 * with the ubc_info via a call to ubc_setcred(), the reference 468 * to the credential is dropped. 469 * 470 * It's actually impossible for a ubc_info.ui_control to take the 471 * value MEMORY_OBJECT_CONTROL_NULL. 472 */ 473static void 474ubc_info_free(struct ubc_info *uip) 475{ 476 if (IS_VALID_CRED(uip->ui_ucred)) { 477 kauth_cred_unref(&uip->ui_ucred); 478 } 479 480 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL) 481 memory_object_control_deallocate(uip->ui_control); 482 483 cluster_release(uip); 484 ubc_cs_free(uip); 485 486 zfree(ubc_info_zone, uip); 487 return; 488} 489 490 491void 492ubc_info_deallocate(struct ubc_info *uip) 493{ 494 ubc_info_free(uip); 495} 496 497 498/* 499 * ubc_setsize 500 * 501 * Tell the VM that the the size of the file represented by the vnode has 502 * changed 503 * 504 * Parameters: vp The vp whose backing file size is 505 * being changed 506 * nsize The new size of the backing file 507 * 508 * Returns: 1 Success 509 * 0 Failure 510 * 511 * Notes: This function will indicate failure if the new size that's 512 * being attempted to be set is negative. 513 * 514 * This function will fail if there is no ubc_info currently 515 * associated with the vnode. 516 * 517 * This function will indicate success it the new size is the 518 * same or larger than the old size (in this case, the remainder 519 * of the file will require modification or use of an existing upl 520 * to access successfully). 521 * 522 * This function will fail if the new file size is smaller, and 523 * the memory region being invalidated was unable to actually be 524 * invalidated and/or the last page could not be flushed, if the 525 * new size is not aligned to a page boundary. This is usually 526 * indicative of an I/O error. 527 */ 528int 529ubc_setsize(struct vnode *vp, off_t nsize) 530{ 531 off_t osize; /* ui_size before change */ 532 off_t lastpg, olastpgend, lastoff; 533 struct ubc_info *uip; 534 memory_object_control_t control; 535 kern_return_t kret = KERN_SUCCESS; 536 537 if (nsize < (off_t)0) 538 return (0); 539 540 if (!UBCINFOEXISTS(vp)) 541 return (0); 542 543 uip = vp->v_ubcinfo; 544 osize = uip->ui_size; 545 /* 546 * Update the size before flushing the VM 547 */ 548 uip->ui_size = nsize; 549 550 if (nsize >= osize) /* Nothing more to do */ 551 return (1); /* return success */ 552 553 /* 554 * When the file shrinks, invalidate the pages beyond the 555 * new size. Also get rid of garbage beyond nsize on the 556 * last page. The ui_size already has the nsize, so any 557 * subsequent page-in will zero-fill the tail properly 558 */ 559 lastpg = trunc_page_64(nsize); 560 olastpgend = round_page_64(osize); 561 control = uip->ui_control; 562 assert(control); 563 lastoff = (nsize & PAGE_MASK_64); 564 565 if (lastoff) { 566 upl_t upl; 567 upl_page_info_t *pl; 568 569 570 /* 571 * new EOF ends up in the middle of a page 572 * zero the tail of this page if its currently 573 * present in the cache 574 */ 575 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE); 576 577 if (kret != KERN_SUCCESS) 578 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret); 579 580 if (upl_valid_page(pl, 0)) 581 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL); 582 583 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); 584 585 lastpg += PAGE_SIZE_64; 586 } 587 if (olastpgend > lastpg) { 588 /* 589 * invalidate the pages beyond the new EOF page 590 * 591 */ 592 kret = memory_object_lock_request(control, 593 (memory_object_offset_t)lastpg, 594 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, 595 MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH, 596 VM_PROT_NO_CHANGE); 597 if (kret != KERN_SUCCESS) 598 printf("ubc_setsize: invalidate failed (error = %d)\n", kret); 599 } 600 return ((kret == KERN_SUCCESS) ? 1 : 0); 601} 602 603 604/* 605 * ubc_getsize 606 * 607 * Get the size of the file assocated with the specified vnode 608 * 609 * Parameters: vp The vnode whose size is of interest 610 * 611 * Returns: 0 There is no ubc_info associated with 612 * this vnode, or the size is zero 613 * !0 The size of the file 614 * 615 * Notes: Using this routine, it is not possible for a caller to 616 * successfully distinguish between a vnode associate with a zero 617 * length file, and a vnode with no associated ubc_info. The 618 * caller therefore needs to not care, or needs to ensure that 619 * they have previously successfully called ubc_info_init() or 620 * ubc_info_init_withsize(). 621 */ 622off_t 623ubc_getsize(struct vnode *vp) 624{ 625 /* people depend on the side effect of this working this way 626 * as they call this for directory 627 */ 628 if (!UBCINFOEXISTS(vp)) 629 return ((off_t)0); 630 return (vp->v_ubcinfo->ui_size); 631} 632 633 634/* 635 * ubc_umount 636 * 637 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this 638 * mount point 639 * 640 * Parameters: mp The mount point 641 * 642 * Returns: 0 Success 643 * 644 * Notes: There is no failure indication for this function. 645 * 646 * This function is used in the unmount path; since it may block 647 * I/O indefinitely, it should not be used in the forced unmount 648 * path, since a device unavailability could also block that 649 * indefinitely. 650 * 651 * Because there is no device ejection interlock on USB, FireWire, 652 * or similar devices, it's possible that an ejection that begins 653 * subsequent to the vnode_iterate() completing, either on one of 654 * those devices, or a network mount for which the server quits 655 * responding, etc., may cause the caller to block indefinitely. 656 */ 657__private_extern__ int 658ubc_umount(struct mount *mp) 659{ 660 vnode_iterate(mp, 0, ubc_umcallback, 0); 661 return(0); 662} 663 664 665/* 666 * ubc_umcallback 667 * 668 * Used by ubc_umount() as an internal implementation detail; see ubc_umount() 669 * and vnode_iterate() for details of implementation. 670 */ 671static int 672ubc_umcallback(vnode_t vp, __unused void * args) 673{ 674 675 if (UBCINFOEXISTS(vp)) { 676 677 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL); 678 } 679 return (VNODE_RETURNED); 680} 681 682 683/* 684 * ubc_getcred 685 * 686 * Get the credentials currently active for the ubc_info associated with the 687 * vnode. 688 * 689 * Parameters: vp The vnode whose ubc_info credentials 690 * are to be retrieved 691 * 692 * Returns: !NOCRED The credentials 693 * NOCRED If there is no ubc_info for the vnode, 694 * or if there is one, but it has not had 695 * any credentials associated with it via 696 * a call to ubc_setcred() 697 */ 698kauth_cred_t 699ubc_getcred(struct vnode *vp) 700{ 701 if (UBCINFOEXISTS(vp)) 702 return (vp->v_ubcinfo->ui_ucred); 703 704 return (NOCRED); 705} 706 707 708/* 709 * ubc_setthreadcred 710 * 711 * If they are not already set, set the credentials of the ubc_info structure 712 * associated with the vnode to those of the supplied thread; otherwise leave 713 * them alone. 714 * 715 * Parameters: vp The vnode whose ubc_info creds are to 716 * be set 717 * p The process whose credentials are to 718 * be used, if not running on an assumed 719 * credential 720 * thread The thread whose credentials are to 721 * be used 722 * 723 * Returns: 1 This vnode has no associated ubc_info 724 * 0 Success 725 * 726 * Notes: This function takes a proc parameter to account for bootstrap 727 * issues where a task or thread may call this routine, either 728 * before credentials have been initialized by bsd_init(), or if 729 * there is no BSD info asscoiate with a mach thread yet. This 730 * is known to happen in both the initial swap and memory mapping 731 * calls. 732 * 733 * This function is generally used only in the following cases: 734 * 735 * o a memory mapped file via the mmap() system call 736 * o a memory mapped file via the deprecated map_fd() call 737 * o a swap store backing file 738 * o subsequent to a successful write via vn_write() 739 * 740 * The information is then used by the NFS client in order to 741 * cons up a wire message in either the page-in or page-out path. 742 * 743 * There are two potential problems with the use of this API: 744 * 745 * o Because the write path only set it on a successful 746 * write, there is a race window between setting the 747 * credential and its use to evict the pages to the 748 * remote file server 749 * 750 * o Because a page-in may occur prior to a write, the 751 * credential may not be set at this time, if the page-in 752 * is not the result of a mapping established via mmap() 753 * or map_fd(). 754 * 755 * In both these cases, this will be triggered from the paging 756 * path, which will instead use the credential of the current 757 * process, which in this case is either the dynamic_pager or 758 * the kernel task, both of which utilize "root" credentials. 759 * 760 * This may potentially permit operations to occur which should 761 * be denied, or it may cause to be denied operations which 762 * should be permitted, depending on the configuration of the NFS 763 * server. 764 */ 765int 766ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread) 767{ 768 struct ubc_info *uip; 769 kauth_cred_t credp; 770 struct uthread *uthread = get_bsdthread_info(thread); 771 772 if (!UBCINFOEXISTS(vp)) 773 return (1); 774 775 vnode_lock(vp); 776 777 uip = vp->v_ubcinfo; 778 credp = uip->ui_ucred; 779 780 if (!IS_VALID_CRED(credp)) { 781 /* use per-thread cred, if assumed identity, else proc cred */ 782 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) { 783 uip->ui_ucred = kauth_cred_proc_ref(p); 784 } else { 785 uip->ui_ucred = uthread->uu_ucred; 786 kauth_cred_ref(uip->ui_ucred); 787 } 788 } 789 vnode_unlock(vp); 790 791 return (0); 792} 793 794 795/* 796 * ubc_setcred 797 * 798 * If they are not already set, set the credentials of the ubc_info structure 799 * associated with the vnode to those of the process; otherwise leave them 800 * alone. 801 * 802 * Parameters: vp The vnode whose ubc_info creds are to 803 * be set 804 * p The process whose credentials are to 805 * be used 806 * 807 * Returns: 0 This vnode has no associated ubc_info 808 * 1 Success 809 * 810 * Notes: The return values for this function are inverted from nearly 811 * all other uses in the kernel. 812 * 813 * See also ubc_setthreadcred(), above. 814 * 815 * This function is considered deprecated, and generally should 816 * not be used, as it is incompatible with per-thread credentials; 817 * it exists for legacy KPI reasons. 818 * 819 * DEPRECATION: ubc_setcred() is being deprecated. Please use 820 * ubc_setthreadcred() instead. 821 */ 822int 823ubc_setcred(struct vnode *vp, proc_t p) 824{ 825 struct ubc_info *uip; 826 kauth_cred_t credp; 827 828 /* If there is no ubc_info, deny the operation */ 829 if ( !UBCINFOEXISTS(vp)) 830 return (0); 831 832 /* 833 * Check to see if there is already a credential reference in the 834 * ubc_info; if there is not, take one on the supplied credential. 835 */ 836 vnode_lock(vp); 837 uip = vp->v_ubcinfo; 838 credp = uip->ui_ucred; 839 if (!IS_VALID_CRED(credp)) { 840 uip->ui_ucred = kauth_cred_proc_ref(p); 841 } 842 vnode_unlock(vp); 843 844 return (1); 845} 846 847 848/* 849 * ubc_getpager 850 * 851 * Get the pager associated with the ubc_info associated with the vnode. 852 * 853 * Parameters: vp The vnode to obtain the pager from 854 * 855 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager 856 * VNODE_PAGER_NULL There is no ubc_info for this vnode 857 * 858 * Notes: For each vnode that has a ubc_info associated with it, that 859 * ubc_info SHALL have a pager associated with it, so in the 860 * normal case, it's impossible to return VNODE_PAGER_NULL for 861 * a vnode with an associated ubc_info. 862 */ 863__private_extern__ memory_object_t 864ubc_getpager(struct vnode *vp) 865{ 866 if (UBCINFOEXISTS(vp)) 867 return (vp->v_ubcinfo->ui_pager); 868 869 return (0); 870} 871 872 873/* 874 * ubc_getobject 875 * 876 * Get the memory object control associated with the ubc_info associated with 877 * the vnode 878 * 879 * Parameters: vp The vnode to obtain the memory object 880 * from 881 * flags DEPRECATED 882 * 883 * Returns: !MEMORY_OBJECT_CONTROL_NULL 884 * MEMORY_OBJECT_CONTROL_NULL 885 * 886 * Notes: Historically, if the flags were not "do not reactivate", this 887 * function would look up the memory object using the pager if 888 * it did not exist (this could be the case if the vnode had 889 * been previously reactivated). The flags would also permit a 890 * hold to be requested, which would have created an object 891 * reference, if one had not already existed. This usage is 892 * deprecated, as it would permit a race between finding and 893 * taking the reference vs. a single reference being dropped in 894 * another thread. 895 */ 896memory_object_control_t 897ubc_getobject(struct vnode *vp, __unused int flags) 898{ 899 if (UBCINFOEXISTS(vp)) 900 return((vp->v_ubcinfo->ui_control)); 901 902 return (MEMORY_OBJECT_CONTROL_NULL); 903} 904 905 906/* 907 * ubc_blktooff 908 * 909 * Convert a given block number to a memory backing object (file) offset for a 910 * given vnode 911 * 912 * Parameters: vp The vnode in which the block is located 913 * blkno The block number to convert 914 * 915 * Returns: !-1 The offset into the backing object 916 * -1 There is no ubc_info associated with 917 * the vnode 918 * -1 An error occurred in the underlying VFS 919 * while translating the block to an 920 * offset; the most likely cause is that 921 * the caller specified a block past the 922 * end of the file, but this could also be 923 * any other error from VNOP_BLKTOOFF(). 924 * 925 * Note: Representing the error in band loses some information, but does 926 * not occlude a valid offset, since an off_t of -1 is normally 927 * used to represent EOF. If we had a more reliable constant in 928 * our header files for it (i.e. explicitly cast to an off_t), we 929 * would use it here instead. 930 */ 931off_t 932ubc_blktooff(vnode_t vp, daddr64_t blkno) 933{ 934 off_t file_offset = -1; 935 int error; 936 937 if (UBCINFOEXISTS(vp)) { 938 error = VNOP_BLKTOOFF(vp, blkno, &file_offset); 939 if (error) 940 file_offset = -1; 941 } 942 943 return (file_offset); 944} 945 946 947/* 948 * ubc_offtoblk 949 * 950 * Convert a given offset in a memory backing object into a block number for a 951 * given vnode 952 * 953 * Parameters: vp The vnode in which the offset is 954 * located 955 * offset The offset into the backing object 956 * 957 * Returns: !-1 The returned block number 958 * -1 There is no ubc_info associated with 959 * the vnode 960 * -1 An error occurred in the underlying VFS 961 * while translating the block to an 962 * offset; the most likely cause is that 963 * the caller specified a block past the 964 * end of the file, but this could also be 965 * any other error from VNOP_OFFTOBLK(). 966 * 967 * Note: Representing the error in band loses some information, but does 968 * not occlude a valid block number, since block numbers exceed 969 * the valid range for offsets, due to their relative sizes. If 970 * we had a more reliable constant than -1 in our header files 971 * for it (i.e. explicitly cast to an daddr64_t), we would use it 972 * here instead. 973 */ 974daddr64_t 975ubc_offtoblk(vnode_t vp, off_t offset) 976{ 977 daddr64_t blkno = -1; 978 int error = 0; 979 980 if (UBCINFOEXISTS(vp)) { 981 error = VNOP_OFFTOBLK(vp, offset, &blkno); 982 if (error) 983 blkno = -1; 984 } 985 986 return (blkno); 987} 988 989 990/* 991 * ubc_pages_resident 992 * 993 * Determine whether or not a given vnode has pages resident via the memory 994 * object control associated with the ubc_info associated with the vnode 995 * 996 * Parameters: vp The vnode we want to know about 997 * 998 * Returns: 1 Yes 999 * 0 No 1000 */ 1001int 1002ubc_pages_resident(vnode_t vp) 1003{ 1004 kern_return_t kret; 1005 boolean_t has_pages_resident; 1006 1007 if (!UBCINFOEXISTS(vp)) 1008 return (0); 1009 1010 /* 1011 * The following call may fail if an invalid ui_control is specified, 1012 * or if there is no VM object associated with the control object. In 1013 * either case, reacting to it as if there were no pages resident will 1014 * result in correct behavior. 1015 */ 1016 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident); 1017 1018 if (kret != KERN_SUCCESS) 1019 return (0); 1020 1021 if (has_pages_resident == TRUE) 1022 return (1); 1023 1024 return (0); 1025} 1026 1027 1028/* 1029 * ubc_sync_range 1030 * 1031 * Clean and/or invalidate a range in the memory object that backs this vnode 1032 * 1033 * Parameters: vp The vnode whose associated ubc_info's 1034 * associated memory object is to have a 1035 * range invalidated within it 1036 * beg_off The start of the range, as an offset 1037 * end_off The end of the range, as an offset 1038 * flags See ubc_msync_internal() 1039 * 1040 * Returns: 1 Success 1041 * 0 Failure 1042 * 1043 * Notes: see ubc_msync_internal() for more detailed information. 1044 * 1045 * DEPRECATED: This interface is obsolete due to a failure to return error 1046 * information needed in order to correct failures. The currently 1047 * recommended interface is ubc_msync(). 1048 */ 1049int 1050ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags) 1051{ 1052 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL)); 1053} 1054 1055 1056/* 1057 * ubc_msync 1058 * 1059 * Clean and/or invalidate a range in the memory object that backs this vnode 1060 * 1061 * Parameters: vp The vnode whose associated ubc_info's 1062 * associated memory object is to have a 1063 * range invalidated within it 1064 * beg_off The start of the range, as an offset 1065 * end_off The end of the range, as an offset 1066 * resid_off The address of an off_t supplied by the 1067 * caller; may be set to NULL to ignore 1068 * flags See ubc_msync_internal() 1069 * 1070 * Returns: 0 Success 1071 * !0 Failure; an errno is returned 1072 * 1073 * Implicit Returns: 1074 * *resid_off, modified If non-NULL, the contents are ALWAYS 1075 * modified; they are initialized to the 1076 * beg_off, and in case of an I/O error, 1077 * the difference between beg_off and the 1078 * current value will reflect what was 1079 * able to be written before the error 1080 * occurred. If no error is returned, the 1081 * value of the resid_off is undefined; do 1082 * NOT use it in place of end_off if you 1083 * intend to increment from the end of the 1084 * last call and call iteratively. 1085 * 1086 * Notes: see ubc_msync_internal() for more detailed information. 1087 * 1088 */ 1089errno_t 1090ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags) 1091{ 1092 int retval; 1093 int io_errno = 0; 1094 1095 if (resid_off) 1096 *resid_off = beg_off; 1097 1098 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno); 1099 1100 if (retval == 0 && io_errno == 0) 1101 return (EINVAL); 1102 return (io_errno); 1103} 1104 1105 1106/* 1107 * Clean and/or invalidate a range in the memory object that backs this vnode 1108 * 1109 * Parameters: vp The vnode whose associated ubc_info's 1110 * associated memory object is to have a 1111 * range invalidated within it 1112 * beg_off The start of the range, as an offset 1113 * end_off The end of the range, as an offset 1114 * resid_off The address of an off_t supplied by the 1115 * caller; may be set to NULL to ignore 1116 * flags MUST contain at least one of the flags 1117 * UBC_INVALIDATE, UBC_PUSHDIRTY, or 1118 * UBC_PUSHALL; if UBC_PUSHDIRTY is used, 1119 * UBC_SYNC may also be specified to cause 1120 * this function to block until the 1121 * operation is complete. The behavior 1122 * of UBC_SYNC is otherwise undefined. 1123 * io_errno The address of an int to contain the 1124 * errno from a failed I/O operation, if 1125 * one occurs; may be set to NULL to 1126 * ignore 1127 * 1128 * Returns: 1 Success 1129 * 0 Failure 1130 * 1131 * Implicit Returns: 1132 * *resid_off, modified The contents of this offset MAY be 1133 * modified; in case of an I/O error, the 1134 * difference between beg_off and the 1135 * current value will reflect what was 1136 * able to be written before the error 1137 * occurred. 1138 * *io_errno, modified The contents of this offset are set to 1139 * an errno, if an error occurs; if the 1140 * caller supplies an io_errno parameter, 1141 * they should be careful to initialize it 1142 * to 0 before calling this function to 1143 * enable them to distinguish an error 1144 * with a valid *resid_off from an invalid 1145 * one, and to avoid potentially falsely 1146 * reporting an error, depending on use. 1147 * 1148 * Notes: If there is no ubc_info associated with the vnode supplied, 1149 * this function immediately returns success. 1150 * 1151 * If the value of end_off is less than or equal to beg_off, this 1152 * function immediately returns success; that is, end_off is NOT 1153 * inclusive. 1154 * 1155 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or 1156 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to 1157 * attempt to block on in-progress I/O by calling this function 1158 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC 1159 * in order to block pending on the I/O already in progress. 1160 * 1161 * The start offset is truncated to the page boundary and the 1162 * size is adjusted to include the last page in the range; that 1163 * is, end_off on exactly a page boundary will not change if it 1164 * is rounded, and the range of bytes written will be from the 1165 * truncate beg_off to the rounded (end_off - 1). 1166 */ 1167static int 1168ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno) 1169{ 1170 memory_object_size_t tsize; 1171 kern_return_t kret; 1172 int request_flags = 0; 1173 int flush_flags = MEMORY_OBJECT_RETURN_NONE; 1174 1175 if ( !UBCINFOEXISTS(vp)) 1176 return (0); 1177 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0) 1178 return (0); 1179 if (end_off <= beg_off) 1180 return (1); 1181 1182 if (flags & UBC_INVALIDATE) 1183 /* 1184 * discard the resident pages 1185 */ 1186 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE); 1187 1188 if (flags & UBC_SYNC) 1189 /* 1190 * wait for all the I/O to complete before returning 1191 */ 1192 request_flags |= MEMORY_OBJECT_IO_SYNC; 1193 1194 if (flags & UBC_PUSHDIRTY) 1195 /* 1196 * we only return the dirty pages in the range 1197 */ 1198 flush_flags = MEMORY_OBJECT_RETURN_DIRTY; 1199 1200 if (flags & UBC_PUSHALL) 1201 /* 1202 * then return all the interesting pages in the range (both 1203 * dirty and precious) to the pager 1204 */ 1205 flush_flags = MEMORY_OBJECT_RETURN_ALL; 1206 1207 beg_off = trunc_page_64(beg_off); 1208 end_off = round_page_64(end_off); 1209 tsize = (memory_object_size_t)end_off - beg_off; 1210 1211 /* flush and/or invalidate pages in the range requested */ 1212 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control, 1213 beg_off, tsize, 1214 (memory_object_offset_t *)resid_off, 1215 io_errno, flush_flags, request_flags, 1216 VM_PROT_NO_CHANGE); 1217 1218 return ((kret == KERN_SUCCESS) ? 1 : 0); 1219} 1220 1221 1222/* 1223 * ubc_msync_internal 1224 * 1225 * Explicitly map a vnode that has an associate ubc_info, and add a reference 1226 * to it for the ubc system, if there isn't one already, so it will not be 1227 * recycled while it's in use, and set flags on the ubc_info to indicate that 1228 * we have done this 1229 * 1230 * Parameters: vp The vnode to map 1231 * flags The mapping flags for the vnode; this 1232 * will be a combination of one or more of 1233 * PROT_READ, PROT_WRITE, and PROT_EXEC 1234 * 1235 * Returns: 0 Success 1236 * EPERM Permission was denied 1237 * 1238 * Notes: An I/O reference on the vnode must already be held on entry 1239 * 1240 * If there is no ubc_info associated with the vnode, this function 1241 * will return success. 1242 * 1243 * If a permission error occurs, this function will return 1244 * failure; all other failures will cause this function to return 1245 * success. 1246 * 1247 * IMPORTANT: This is an internal use function, and its symbols 1248 * are not exported, hence its error checking is not very robust. 1249 * It is primarily used by: 1250 * 1251 * o mmap(), when mapping a file 1252 * o The deprecated map_fd() interface, when mapping a file 1253 * o When mapping a shared file (a shared library in the 1254 * shared segment region) 1255 * o When loading a program image during the exec process 1256 * 1257 * ...all of these uses ignore the return code, and any fault that 1258 * results later because of a failure is handled in the fix-up path 1259 * of the fault handler. The interface exists primarily as a 1260 * performance hint. 1261 * 1262 * Given that third party implementation of the type of interfaces 1263 * that would use this function, such as alternative executable 1264 * formats, etc., are unsupported, this function is not exported 1265 * for general use. 1266 * 1267 * The extra reference is held until the VM system unmaps the 1268 * vnode from its own context to maintain a vnode reference in 1269 * cases like open()/mmap()/close(), which leave the backing 1270 * object referenced by a mapped memory region in a process 1271 * address space. 1272 */ 1273__private_extern__ int 1274ubc_map(vnode_t vp, int flags) 1275{ 1276 struct ubc_info *uip; 1277 int error = 0; 1278 int need_ref = 0; 1279 int need_wakeup = 0; 1280 1281 if (UBCINFOEXISTS(vp)) { 1282 1283 vnode_lock(vp); 1284 uip = vp->v_ubcinfo; 1285 1286 while (ISSET(uip->ui_flags, UI_MAPBUSY)) { 1287 SET(uip->ui_flags, UI_MAPWAITING); 1288 (void) msleep(&uip->ui_flags, &vp->v_lock, 1289 PRIBIO, "ubc_map", NULL); 1290 } 1291 SET(uip->ui_flags, UI_MAPBUSY); 1292 vnode_unlock(vp); 1293 1294 error = VNOP_MMAP(vp, flags, vfs_context_current()); 1295 1296 if (error != EPERM) 1297 error = 0; 1298 1299 vnode_lock_spin(vp); 1300 1301 if (error == 0) { 1302 if ( !ISSET(uip->ui_flags, UI_ISMAPPED)) 1303 need_ref = 1; 1304 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED)); 1305 } 1306 CLR(uip->ui_flags, UI_MAPBUSY); 1307 1308 if (ISSET(uip->ui_flags, UI_MAPWAITING)) { 1309 CLR(uip->ui_flags, UI_MAPWAITING); 1310 need_wakeup = 1; 1311 } 1312 vnode_unlock(vp); 1313 1314 if (need_wakeup) 1315 wakeup(&uip->ui_flags); 1316 1317 if (need_ref) 1318 vnode_ref(vp); 1319 } 1320 return (error); 1321} 1322 1323 1324/* 1325 * ubc_destroy_named 1326 * 1327 * Destroy the named memory object associated with the ubc_info control object 1328 * associated with the designated vnode, if there is a ubc_info associated 1329 * with the vnode, and a control object is associated with it 1330 * 1331 * Parameters: vp The designated vnode 1332 * 1333 * Returns: (void) 1334 * 1335 * Notes: This function is called on vnode termination for all vnodes, 1336 * and must therefore not assume that there is a ubc_info that is 1337 * associated with the vnode, nor that there is a control object 1338 * associated with the ubc_info. 1339 * 1340 * If all the conditions necessary are present, this function 1341 * calls memory_object_destory(), which will in turn end up 1342 * calling ubc_unmap() to release any vnode references that were 1343 * established via ubc_map(). 1344 * 1345 * IMPORTANT: This is an internal use function that is used 1346 * exclusively by the internal use function vclean(). 1347 */ 1348__private_extern__ void 1349ubc_destroy_named(vnode_t vp) 1350{ 1351 memory_object_control_t control; 1352 struct ubc_info *uip; 1353 kern_return_t kret; 1354 1355 if (UBCINFOEXISTS(vp)) { 1356 uip = vp->v_ubcinfo; 1357 1358 /* Terminate the memory object */ 1359 control = ubc_getobject(vp, UBC_HOLDOBJECT); 1360 if (control != MEMORY_OBJECT_CONTROL_NULL) { 1361 kret = memory_object_destroy(control, 0); 1362 if (kret != KERN_SUCCESS) 1363 panic("ubc_destroy_named: memory_object_destroy failed"); 1364 } 1365 } 1366} 1367 1368 1369/* 1370 * ubc_isinuse 1371 * 1372 * Determine whether or not a vnode is currently in use by ubc at a level in 1373 * excess of the requested busycount 1374 * 1375 * Parameters: vp The vnode to check 1376 * busycount The threshold busy count, used to bias 1377 * the count usually already held by the 1378 * caller to avoid races 1379 * 1380 * Returns: 1 The vnode is in use over the threshold 1381 * 0 The vnode is not in use over the 1382 * threshold 1383 * 1384 * Notes: Because the vnode is only held locked while actually asking 1385 * the use count, this function only represents a snapshot of the 1386 * current state of the vnode. If more accurate information is 1387 * required, an additional busycount should be held by the caller 1388 * and a non-zero busycount used. 1389 * 1390 * If there is no ubc_info associated with the vnode, this 1391 * function will report that the vnode is not in use by ubc. 1392 */ 1393int 1394ubc_isinuse(struct vnode *vp, int busycount) 1395{ 1396 if ( !UBCINFOEXISTS(vp)) 1397 return (0); 1398 return(ubc_isinuse_locked(vp, busycount, 0)); 1399} 1400 1401 1402/* 1403 * ubc_isinuse_locked 1404 * 1405 * Determine whether or not a vnode is currently in use by ubc at a level in 1406 * excess of the requested busycount 1407 * 1408 * Parameters: vp The vnode to check 1409 * busycount The threshold busy count, used to bias 1410 * the count usually already held by the 1411 * caller to avoid races 1412 * locked True if the vnode is already locked by 1413 * the caller 1414 * 1415 * Returns: 1 The vnode is in use over the threshold 1416 * 0 The vnode is not in use over the 1417 * threshold 1418 * 1419 * Notes: If the vnode is not locked on entry, it is locked while 1420 * actually asking the use count. If this is the case, this 1421 * function only represents a snapshot of the current state of 1422 * the vnode. If more accurate information is required, the 1423 * vnode lock should be held by the caller, otherwise an 1424 * additional busycount should be held by the caller and a 1425 * non-zero busycount used. 1426 * 1427 * If there is no ubc_info associated with the vnode, this 1428 * function will report that the vnode is not in use by ubc. 1429 */ 1430int 1431ubc_isinuse_locked(struct vnode *vp, int busycount, int locked) 1432{ 1433 int retval = 0; 1434 1435 1436 if (!locked) 1437 vnode_lock(vp); 1438 1439 if ((vp->v_usecount - vp->v_kusecount) > busycount) 1440 retval = 1; 1441 1442 if (!locked) 1443 vnode_unlock(vp); 1444 return (retval); 1445} 1446 1447 1448/* 1449 * ubc_unmap 1450 * 1451 * Reverse the effects of a ubc_map() call for a given vnode 1452 * 1453 * Parameters: vp vnode to unmap from ubc 1454 * 1455 * Returns: (void) 1456 * 1457 * Notes: This is an internal use function used by vnode_pager_unmap(). 1458 * It will attempt to obtain a reference on the supplied vnode, 1459 * and if it can do so, and there is an associated ubc_info, and 1460 * the flags indicate that it was mapped via ubc_map(), then the 1461 * flag is cleared, the mapping removed, and the reference taken 1462 * by ubc_map() is released. 1463 * 1464 * IMPORTANT: This MUST only be called by the VM 1465 * to prevent race conditions. 1466 */ 1467__private_extern__ void 1468ubc_unmap(struct vnode *vp) 1469{ 1470 struct ubc_info *uip; 1471 int need_rele = 0; 1472 int need_wakeup = 0; 1473 1474 if (vnode_getwithref(vp)) 1475 return; 1476 1477 if (UBCINFOEXISTS(vp)) { 1478 vnode_lock(vp); 1479 uip = vp->v_ubcinfo; 1480 1481 while (ISSET(uip->ui_flags, UI_MAPBUSY)) { 1482 SET(uip->ui_flags, UI_MAPWAITING); 1483 (void) msleep(&uip->ui_flags, &vp->v_lock, 1484 PRIBIO, "ubc_unmap", NULL); 1485 } 1486 SET(uip->ui_flags, UI_MAPBUSY); 1487 1488 if (ISSET(uip->ui_flags, UI_ISMAPPED)) { 1489 CLR(uip->ui_flags, UI_ISMAPPED); 1490 need_rele = 1; 1491 } 1492 vnode_unlock(vp); 1493 1494 if (need_rele) { 1495 (void) VNOP_MNOMAP(vp, vfs_context_current()); 1496 vnode_rele(vp); 1497 } 1498 1499 vnode_lock_spin(vp); 1500 1501 CLR(uip->ui_flags, UI_MAPBUSY); 1502 if (ISSET(uip->ui_flags, UI_MAPWAITING)) { 1503 CLR(uip->ui_flags, UI_MAPWAITING); 1504 need_wakeup = 1; 1505 } 1506 vnode_unlock(vp); 1507 1508 if (need_wakeup) 1509 wakeup(&uip->ui_flags); 1510 1511 } 1512 /* 1513 * the drop of the vnode ref will cleanup 1514 */ 1515 vnode_put(vp); 1516} 1517 1518 1519/* 1520 * ubc_page_op 1521 * 1522 * Manipulate individual page state for a vnode with an associated ubc_info 1523 * with an associated memory object control. 1524 * 1525 * Parameters: vp The vnode backing the page 1526 * f_offset A file offset interior to the page 1527 * ops The operations to perform, as a bitmap 1528 * (see below for more information) 1529 * phys_entryp The address of a ppnum_t; may be NULL 1530 * to ignore 1531 * flagsp A pointer to an int to contain flags; 1532 * may be NULL to ignore 1533 * 1534 * Returns: KERN_SUCCESS Success 1535 * KERN_INVALID_ARGUMENT If the memory object control has no VM 1536 * object associated 1537 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is 1538 * not physically contiguous 1539 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is 1540 * physically contiguous 1541 * KERN_FAILURE If the page cannot be looked up 1542 * 1543 * Implicit Returns: 1544 * *phys_entryp (modified) If phys_entryp is non-NULL and 1545 * UPL_POP_PHYSICAL 1546 * *flagsp (modified) If flagsp is non-NULL and there was 1547 * !UPL_POP_PHYSICAL and a KERN_SUCCESS 1548 * 1549 * Notes: For object boundaries, it is considerably more efficient to 1550 * ensure that f_offset is in fact on a page boundary, as this 1551 * will avoid internal use of the hash table to identify the 1552 * page, and would therefore skip a number of early optimizations. 1553 * Since this is a page operation anyway, the caller should try 1554 * to pass only a page aligned offset because of this. 1555 * 1556 * *flagsp may be modified even if this function fails. If it is 1557 * modified, it will contain the condition of the page before the 1558 * requested operation was attempted; these will only include the 1559 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP, 1560 * UPL_POP_SET, or UPL_POP_CLR bits. 1561 * 1562 * The flags field may contain a specific operation, such as 1563 * UPL_POP_PHYSICAL or UPL_POP_DUMP: 1564 * 1565 * o UPL_POP_PHYSICAL Fail if not contiguous; if 1566 * *phys_entryp and successful, set 1567 * *phys_entryp 1568 * o UPL_POP_DUMP Dump the specified page 1569 * 1570 * Otherwise, it is treated as a bitmap of one or more page 1571 * operations to perform on the final memory object; allowable 1572 * bit values are: 1573 * 1574 * o UPL_POP_DIRTY The page is dirty 1575 * o UPL_POP_PAGEOUT The page is paged out 1576 * o UPL_POP_PRECIOUS The page is precious 1577 * o UPL_POP_ABSENT The page is absent 1578 * o UPL_POP_BUSY The page is busy 1579 * 1580 * If the page status is only being queried and not modified, then 1581 * not other bits should be specified. However, if it is being 1582 * modified, exactly ONE of the following bits should be set: 1583 * 1584 * o UPL_POP_SET Set the current bitmap bits 1585 * o UPL_POP_CLR Clear the current bitmap bits 1586 * 1587 * Thus to effect a combination of setting an clearing, it may be 1588 * necessary to call this function twice. If this is done, the 1589 * set should be used before the clear, since clearing may trigger 1590 * a wakeup on the destination page, and if the page is backed by 1591 * an encrypted swap file, setting will trigger the decryption 1592 * needed before the wakeup occurs. 1593 */ 1594kern_return_t 1595ubc_page_op( 1596 struct vnode *vp, 1597 off_t f_offset, 1598 int ops, 1599 ppnum_t *phys_entryp, 1600 int *flagsp) 1601{ 1602 memory_object_control_t control; 1603 1604 control = ubc_getobject(vp, UBC_FLAGS_NONE); 1605 if (control == MEMORY_OBJECT_CONTROL_NULL) 1606 return KERN_INVALID_ARGUMENT; 1607 1608 return (memory_object_page_op(control, 1609 (memory_object_offset_t)f_offset, 1610 ops, 1611 phys_entryp, 1612 flagsp)); 1613} 1614 1615 1616/* 1617 * ubc_range_op 1618 * 1619 * Manipulate page state for a range of memory for a vnode with an associated 1620 * ubc_info with an associated memory object control, when page level state is 1621 * not required to be returned from the call (i.e. there are no phys_entryp or 1622 * flagsp parameters to this call, and it takes a range which may contain 1623 * multiple pages, rather than an offset interior to a single page). 1624 * 1625 * Parameters: vp The vnode backing the page 1626 * f_offset_beg A file offset interior to the start page 1627 * f_offset_end A file offset interior to the end page 1628 * ops The operations to perform, as a bitmap 1629 * (see below for more information) 1630 * range The address of an int; may be NULL to 1631 * ignore 1632 * 1633 * Returns: KERN_SUCCESS Success 1634 * KERN_INVALID_ARGUMENT If the memory object control has no VM 1635 * object associated 1636 * KERN_INVALID_OBJECT If the object is physically contiguous 1637 * 1638 * Implicit Returns: 1639 * *range (modified) If range is non-NULL, its contents will 1640 * be modified to contain the number of 1641 * bytes successfully operated upon. 1642 * 1643 * Notes: IMPORTANT: This function cannot be used on a range that 1644 * consists of physically contiguous pages. 1645 * 1646 * For object boundaries, it is considerably more efficient to 1647 * ensure that f_offset_beg and f_offset_end are in fact on page 1648 * boundaries, as this will avoid internal use of the hash table 1649 * to identify the page, and would therefore skip a number of 1650 * early optimizations. Since this is an operation on a set of 1651 * pages anyway, the caller should try to pass only a page aligned 1652 * offsets because of this. 1653 * 1654 * *range will be modified only if this function succeeds. 1655 * 1656 * The flags field MUST contain a specific operation; allowable 1657 * values are: 1658 * 1659 * o UPL_ROP_ABSENT Returns the extent of the range 1660 * presented which is absent, starting 1661 * with the start address presented 1662 * 1663 * o UPL_ROP_PRESENT Returns the extent of the range 1664 * presented which is present (resident), 1665 * starting with the start address 1666 * presented 1667 * o UPL_ROP_DUMP Dump the pages which are found in the 1668 * target object for the target range. 1669 * 1670 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are 1671 * multiple regions in the range, only the first matching region 1672 * is returned. 1673 */ 1674kern_return_t 1675ubc_range_op( 1676 struct vnode *vp, 1677 off_t f_offset_beg, 1678 off_t f_offset_end, 1679 int ops, 1680 int *range) 1681{ 1682 memory_object_control_t control; 1683 1684 control = ubc_getobject(vp, UBC_FLAGS_NONE); 1685 if (control == MEMORY_OBJECT_CONTROL_NULL) 1686 return KERN_INVALID_ARGUMENT; 1687 1688 return (memory_object_range_op(control, 1689 (memory_object_offset_t)f_offset_beg, 1690 (memory_object_offset_t)f_offset_end, 1691 ops, 1692 range)); 1693} 1694 1695 1696/* 1697 * ubc_create_upl 1698 * 1699 * Given a vnode, cause the population of a portion of the vm_object; based on 1700 * the nature of the request, the pages returned may contain valid data, or 1701 * they may be uninitialized. 1702 * 1703 * Parameters: vp The vnode from which to create the upl 1704 * f_offset The start offset into the backing store 1705 * represented by the vnode 1706 * bufsize The size of the upl to create 1707 * uplp Pointer to the upl_t to receive the 1708 * created upl; MUST NOT be NULL 1709 * plp Pointer to receive the internal page 1710 * list for the created upl; MAY be NULL 1711 * to ignore 1712 * 1713 * Returns: KERN_SUCCESS The requested upl has been created 1714 * KERN_INVALID_ARGUMENT The bufsize argument is not an even 1715 * multiple of the page size 1716 * KERN_INVALID_ARGUMENT There is no ubc_info associated with 1717 * the vnode, or there is no memory object 1718 * control associated with the ubc_info 1719 * memory_object_upl_request:KERN_INVALID_VALUE 1720 * The supplied upl_flags argument is 1721 * invalid 1722 * Implicit Returns: 1723 * *uplp (modified) 1724 * *plp (modified) If non-NULL, the value of *plp will be 1725 * modified to point to the internal page 1726 * list; this modification may occur even 1727 * if this function is unsuccessful, in 1728 * which case the contents may be invalid 1729 * 1730 * Note: If successful, the returned *uplp MUST subsequently be freed 1731 * via a call to ubc_upl_commit(), ubc_upl_commit_range(), 1732 * ubc_upl_abort(), or ubc_upl_abort_range(). 1733 */ 1734kern_return_t 1735ubc_create_upl( 1736 struct vnode *vp, 1737 off_t f_offset, 1738 long bufsize, 1739 upl_t *uplp, 1740 upl_page_info_t **plp, 1741 int uplflags) 1742{ 1743 memory_object_control_t control; 1744 mach_msg_type_number_t count; 1745 int ubcflags; 1746 kern_return_t kr; 1747 1748 if (bufsize & 0xfff) 1749 return KERN_INVALID_ARGUMENT; 1750 1751 if (uplflags & UPL_FOR_PAGEOUT) { 1752 uplflags &= ~UPL_FOR_PAGEOUT; 1753 ubcflags = UBC_FOR_PAGEOUT; 1754 } else 1755 ubcflags = UBC_FLAGS_NONE; 1756 1757 control = ubc_getobject(vp, ubcflags); 1758 if (control == MEMORY_OBJECT_CONTROL_NULL) 1759 return KERN_INVALID_ARGUMENT; 1760 1761 if (uplflags & UPL_WILL_BE_DUMPED) { 1762 uplflags &= ~UPL_WILL_BE_DUMPED; 1763 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL); 1764 } else 1765 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL); 1766 count = 0; 1767 1768 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, &count, uplflags); 1769 if (plp != NULL) 1770 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp); 1771 return kr; 1772} 1773 1774 1775/* 1776 * ubc_upl_maxbufsize 1777 * 1778 * Return the maximum bufsize ubc_create_upl( ) will take. 1779 * 1780 * Parameters: none 1781 * 1782 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take. 1783 */ 1784upl_size_t 1785ubc_upl_maxbufsize( 1786 void) 1787{ 1788 return(MAX_UPL_SIZE * PAGE_SIZE); 1789} 1790 1791/* 1792 * ubc_upl_map 1793 * 1794 * Map the page list assocated with the supplied upl into the kernel virtual 1795 * address space at the virtual address indicated by the dst_addr argument; 1796 * the entire upl is mapped 1797 * 1798 * Parameters: upl The upl to map 1799 * dst_addr The address at which to map the upl 1800 * 1801 * Returns: KERN_SUCCESS The upl has been mapped 1802 * KERN_INVALID_ARGUMENT The upl is UPL_NULL 1803 * KERN_FAILURE The upl is already mapped 1804 * vm_map_enter:KERN_INVALID_ARGUMENT 1805 * A failure code from vm_map_enter() due 1806 * to an invalid argument 1807 */ 1808kern_return_t 1809ubc_upl_map( 1810 upl_t upl, 1811 vm_offset_t *dst_addr) 1812{ 1813 return (vm_upl_map(kernel_map, upl, dst_addr)); 1814} 1815 1816 1817/* 1818 * ubc_upl_unmap 1819 * 1820 * Unmap the page list assocated with the supplied upl from the kernel virtual 1821 * address space; the entire upl is unmapped. 1822 * 1823 * Parameters: upl The upl to unmap 1824 * 1825 * Returns: KERN_SUCCESS The upl has been unmapped 1826 * KERN_FAILURE The upl is not currently mapped 1827 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL 1828 */ 1829kern_return_t 1830ubc_upl_unmap( 1831 upl_t upl) 1832{ 1833 return(vm_upl_unmap(kernel_map, upl)); 1834} 1835 1836 1837/* 1838 * ubc_upl_commit 1839 * 1840 * Commit the contents of the upl to the backing store 1841 * 1842 * Parameters: upl The upl to commit 1843 * 1844 * Returns: KERN_SUCCESS The upl has been committed 1845 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL 1846 * KERN_FAILURE The supplied upl does not represent 1847 * device memory, and the offset plus the 1848 * size would exceed the actual size of 1849 * the upl 1850 * 1851 * Notes: In practice, the only return value for this function should be 1852 * KERN_SUCCESS, unless there has been data structure corruption; 1853 * since the upl is deallocated regardless of success or failure, 1854 * there's really nothing to do about this other than panic. 1855 * 1856 * IMPORTANT: Use of this function should not be mixed with use of 1857 * ubc_upl_commit_range(), due to the unconditional deallocation 1858 * by this function. 1859 */ 1860kern_return_t 1861ubc_upl_commit( 1862 upl_t upl) 1863{ 1864 upl_page_info_t *pl; 1865 kern_return_t kr; 1866 1867 pl = UPL_GET_INTERNAL_PAGE_LIST(upl); 1868 kr = upl_commit(upl, pl, MAX_UPL_SIZE); 1869 upl_deallocate(upl); 1870 return kr; 1871} 1872 1873 1874/* 1875 * ubc_upl_commit 1876 * 1877 * Commit the contents of the specified range of the upl to the backing store 1878 * 1879 * Parameters: upl The upl to commit 1880 * offset The offset into the upl 1881 * size The size of the region to be committed, 1882 * starting at the specified offset 1883 * flags commit type (see below) 1884 * 1885 * Returns: KERN_SUCCESS The range has been committed 1886 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL 1887 * KERN_FAILURE The supplied upl does not represent 1888 * device memory, and the offset plus the 1889 * size would exceed the actual size of 1890 * the upl 1891 * 1892 * Notes: IMPORTANT: If the commit is successful, and the object is now 1893 * empty, the upl will be deallocated. Since the caller cannot 1894 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag 1895 * should generally only be used when the offset is 0 and the size 1896 * is equal to the upl size. 1897 * 1898 * The flags argument is a bitmap of flags on the rage of pages in 1899 * the upl to be committed; allowable flags are: 1900 * 1901 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is 1902 * both empty and has been 1903 * successfully committed 1904 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty 1905 * bit; will prevent a 1906 * later pageout 1907 * o UPL_COMMIT_SET_DIRTY Set each pages dirty 1908 * bit; will cause a later 1909 * pageout 1910 * o UPL_COMMIT_INACTIVATE Clear each pages 1911 * reference bit; the page 1912 * will not be accessed 1913 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages 1914 * become busy when an 1915 * IOMemoryDescriptor is 1916 * mapped or redirected, 1917 * and we have to wait for 1918 * an IOKit driver 1919 * 1920 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should 1921 * not be specified by the caller. 1922 * 1923 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are 1924 * mutually exclusive, and should not be combined. 1925 */ 1926kern_return_t 1927ubc_upl_commit_range( 1928 upl_t upl, 1929 vm_offset_t offset, 1930 vm_size_t size, 1931 int flags) 1932{ 1933 upl_page_info_t *pl; 1934 boolean_t empty; 1935 kern_return_t kr; 1936 1937 if (flags & UPL_COMMIT_FREE_ON_EMPTY) 1938 flags |= UPL_COMMIT_NOTIFY_EMPTY; 1939 1940 if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) { 1941 return KERN_INVALID_ARGUMENT; 1942 } 1943 1944 pl = UPL_GET_INTERNAL_PAGE_LIST(upl); 1945 1946 kr = upl_commit_range(upl, offset, size, flags, 1947 pl, MAX_UPL_SIZE, &empty); 1948 1949 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty) 1950 upl_deallocate(upl); 1951 1952 return kr; 1953} 1954 1955 1956/* 1957 * ubc_upl_abort_range 1958 * 1959 * Abort the contents of the specified range of the specified upl 1960 * 1961 * Parameters: upl The upl to abort 1962 * offset The offset into the upl 1963 * size The size of the region to be aborted, 1964 * starting at the specified offset 1965 * abort_flags abort type (see below) 1966 * 1967 * Returns: KERN_SUCCESS The range has been aborted 1968 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL 1969 * KERN_FAILURE The supplied upl does not represent 1970 * device memory, and the offset plus the 1971 * size would exceed the actual size of 1972 * the upl 1973 * 1974 * Notes: IMPORTANT: If the abort is successful, and the object is now 1975 * empty, the upl will be deallocated. Since the caller cannot 1976 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag 1977 * should generally only be used when the offset is 0 and the size 1978 * is equal to the upl size. 1979 * 1980 * The abort_flags argument is a bitmap of flags on the range of 1981 * pages in the upl to be aborted; allowable flags are: 1982 * 1983 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both 1984 * empty and has been successfully 1985 * aborted 1986 * o UPL_ABORT_RESTART The operation must be restarted 1987 * o UPL_ABORT_UNAVAILABLE The pages are unavailable 1988 * o UPL_ABORT_ERROR An I/O error occurred 1989 * o UPL_ABORT_DUMP_PAGES Just free the pages 1990 * o UPL_ABORT_NOTIFY_EMPTY RESERVED 1991 * o UPL_ABORT_ALLOW_ACCESS RESERVED 1992 * 1993 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should 1994 * not be specified by the caller. It is intended to fulfill the 1995 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function 1996 * ubc_upl_commit_range(), but is never referenced internally. 1997 * 1998 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor 1999 * referenced; do not use it. 2000 */ 2001kern_return_t 2002ubc_upl_abort_range( 2003 upl_t upl, 2004 vm_offset_t offset, 2005 vm_size_t size, 2006 int abort_flags) 2007{ 2008 kern_return_t kr; 2009 boolean_t empty = FALSE; 2010 2011 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY) 2012 abort_flags |= UPL_ABORT_NOTIFY_EMPTY; 2013 2014 kr = upl_abort_range(upl, offset, size, abort_flags, &empty); 2015 2016 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty) 2017 upl_deallocate(upl); 2018 2019 return kr; 2020} 2021 2022 2023/* 2024 * ubc_upl_abort 2025 * 2026 * Abort the contents of the specified upl 2027 * 2028 * Parameters: upl The upl to abort 2029 * abort_type abort type (see below) 2030 * 2031 * Returns: KERN_SUCCESS The range has been aborted 2032 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL 2033 * KERN_FAILURE The supplied upl does not represent 2034 * device memory, and the offset plus the 2035 * size would exceed the actual size of 2036 * the upl 2037 * 2038 * Notes: IMPORTANT: If the abort is successful, and the object is now 2039 * empty, the upl will be deallocated. Since the caller cannot 2040 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag 2041 * should generally only be used when the offset is 0 and the size 2042 * is equal to the upl size. 2043 * 2044 * The abort_type is a bitmap of flags on the range of 2045 * pages in the upl to be aborted; allowable flags are: 2046 * 2047 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both 2048 * empty and has been successfully 2049 * aborted 2050 * o UPL_ABORT_RESTART The operation must be restarted 2051 * o UPL_ABORT_UNAVAILABLE The pages are unavailable 2052 * o UPL_ABORT_ERROR An I/O error occurred 2053 * o UPL_ABORT_DUMP_PAGES Just free the pages 2054 * o UPL_ABORT_NOTIFY_EMPTY RESERVED 2055 * o UPL_ABORT_ALLOW_ACCESS RESERVED 2056 * 2057 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should 2058 * not be specified by the caller. It is intended to fulfill the 2059 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function 2060 * ubc_upl_commit_range(), but is never referenced internally. 2061 * 2062 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor 2063 * referenced; do not use it. 2064 */ 2065kern_return_t 2066ubc_upl_abort( 2067 upl_t upl, 2068 int abort_type) 2069{ 2070 kern_return_t kr; 2071 2072 kr = upl_abort(upl, abort_type); 2073 upl_deallocate(upl); 2074 return kr; 2075} 2076 2077 2078/* 2079 * ubc_upl_pageinfo 2080 * 2081 * Retrieve the internal page list for the specified upl 2082 * 2083 * Parameters: upl The upl to obtain the page list from 2084 * 2085 * Returns: !NULL The (upl_page_info_t *) for the page 2086 * list internal to the upl 2087 * NULL Error/no page list associated 2088 * 2089 * Notes: IMPORTANT: The function is only valid on internal objects 2090 * where the list request was made with the UPL_INTERNAL flag. 2091 * 2092 * This function is a utility helper function, since some callers 2093 * may not have direct access to the header defining the macro, 2094 * due to abstraction layering constraints. 2095 */ 2096upl_page_info_t * 2097ubc_upl_pageinfo( 2098 upl_t upl) 2099{ 2100 return (UPL_GET_INTERNAL_PAGE_LIST(upl)); 2101} 2102 2103 2104int 2105UBCINFOEXISTS(struct vnode * vp) 2106{ 2107 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL)); 2108} 2109 2110 2111/* 2112 * CODE SIGNING 2113 */ 2114#define CS_BLOB_PAGEABLE 0 2115static volatile SInt32 cs_blob_size = 0; 2116static volatile SInt32 cs_blob_count = 0; 2117static SInt32 cs_blob_size_peak = 0; 2118static UInt32 cs_blob_size_max = 0; 2119static SInt32 cs_blob_count_peak = 0; 2120extern int cs_debug; 2121 2122int cs_validation = 1; 2123 2124SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures"); 2125SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs"); 2126SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs"); 2127SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs"); 2128SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs"); 2129SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob"); 2130 2131kern_return_t 2132ubc_cs_blob_allocate( 2133 vm_offset_t *blob_addr_p, 2134 vm_size_t *blob_size_p) 2135{ 2136 kern_return_t kr; 2137 2138#if CS_BLOB_PAGEABLE 2139 *blob_size_p = round_page(*blob_size_p); 2140 kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p); 2141#else /* CS_BLOB_PAGEABLE */ 2142 *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p); 2143 if (*blob_addr_p == 0) { 2144 kr = KERN_NO_SPACE; 2145 } else { 2146 kr = KERN_SUCCESS; 2147 } 2148#endif /* CS_BLOB_PAGEABLE */ 2149 return kr; 2150} 2151 2152void 2153ubc_cs_blob_deallocate( 2154 vm_offset_t blob_addr, 2155 vm_size_t blob_size) 2156{ 2157#if CS_BLOB_PAGEABLE 2158 kmem_free(kernel_map, blob_addr, blob_size); 2159#else /* CS_BLOB_PAGEABLE */ 2160 kfree((void *) blob_addr, blob_size); 2161#endif /* CS_BLOB_PAGEABLE */ 2162} 2163 2164int 2165ubc_cs_blob_add( 2166 struct vnode *vp, 2167 cpu_type_t cputype, 2168 off_t base_offset, 2169 vm_address_t addr, 2170 vm_size_t size) 2171{ 2172 kern_return_t kr; 2173 struct ubc_info *uip; 2174 struct cs_blob *blob, *oblob; 2175 int error; 2176 ipc_port_t blob_handle; 2177 memory_object_size_t blob_size; 2178 const CS_CodeDirectory *cd; 2179 off_t blob_start_offset, blob_end_offset; 2180 SHA1_CTX sha1ctxt; 2181 2182 blob_handle = IPC_PORT_NULL; 2183 2184 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob)); 2185 if (blob == NULL) { 2186 return ENOMEM; 2187 } 2188 2189#if CS_BLOB_PAGEABLE 2190 /* get a memory entry on the blob */ 2191 blob_size = (memory_object_size_t) size; 2192 kr = mach_make_memory_entry_64(kernel_map, 2193 &blob_size, 2194 addr, 2195 VM_PROT_READ, 2196 &blob_handle, 2197 IPC_PORT_NULL); 2198 if (kr != KERN_SUCCESS) { 2199 error = ENOMEM; 2200 goto out; 2201 } 2202 if (memory_object_round_page(blob_size) != 2203 (memory_object_size_t) round_page(size)) { 2204 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%x !?\n", 2205 blob_size, size); 2206 panic("XXX FBDP size mismatch 0x%llx 0x%x\n", blob_size, size); 2207 error = EINVAL; 2208 goto out; 2209 } 2210#else 2211 blob_size = (memory_object_size_t) size; 2212 blob_handle = IPC_PORT_NULL; 2213#endif 2214 2215 /* fill in the new blob */ 2216 blob->csb_cpu_type = cputype; 2217 blob->csb_base_offset = base_offset; 2218 blob->csb_mem_size = size; 2219 blob->csb_mem_offset = 0; 2220 blob->csb_mem_handle = blob_handle; 2221 blob->csb_mem_kaddr = addr; 2222 2223 /* 2224 * Validate the blob's contents 2225 */ 2226 cd = findCodeDirectory( 2227 (const CS_SuperBlob *) addr, 2228 (char *) addr, 2229 (char *) addr + blob->csb_mem_size); 2230 if (cd == NULL) { 2231 /* no code directory => useless blob ! */ 2232 blob->csb_flags = 0; 2233 blob->csb_start_offset = 0; 2234 blob->csb_end_offset = 0; 2235 } else { 2236 unsigned char *sha1_base; 2237 int sha1_size; 2238 2239 blob->csb_flags = ntohl(cd->flags) | CS_VALID; 2240 blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); 2241 blob->csb_start_offset = (blob->csb_end_offset - 2242 (ntohl(cd->nCodeSlots) * PAGE_SIZE)); 2243 /* compute the blob's SHA1 hash */ 2244 sha1_base = (const unsigned char *) cd; 2245 sha1_size = ntohl(cd->length); 2246 SHA1Init(&sha1ctxt); 2247 SHA1Update(&sha1ctxt, sha1_base, sha1_size); 2248 SHA1Final(blob->csb_sha1, &sha1ctxt); 2249 } 2250 2251 /* 2252 * Let policy module check whether the blob's signature is accepted. 2253 */ 2254#if CONFIG_MACF 2255 error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size); 2256 if (error) 2257 goto out; 2258#endif 2259 2260 /* 2261 * Validate the blob's coverage 2262 */ 2263 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset; 2264 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset; 2265 2266 if (blob_start_offset >= blob_end_offset || 2267 blob_start_offset < 0 || 2268 blob_end_offset <= 0) { 2269 /* reject empty or backwards blob */ 2270 error = EINVAL; 2271 goto out; 2272 } 2273 2274 vnode_lock(vp); 2275 if (! UBCINFOEXISTS(vp)) { 2276 vnode_unlock(vp); 2277 error = ENOENT; 2278 goto out; 2279 } 2280 uip = vp->v_ubcinfo; 2281 2282 /* check if this new blob overlaps with an existing blob */ 2283 for (oblob = uip->cs_blobs; 2284 oblob != NULL; 2285 oblob = oblob->csb_next) { 2286 off_t oblob_start_offset, oblob_end_offset; 2287 2288 oblob_start_offset = (oblob->csb_base_offset + 2289 oblob->csb_start_offset); 2290 oblob_end_offset = (oblob->csb_base_offset + 2291 oblob->csb_end_offset); 2292 if (blob_start_offset >= oblob_end_offset || 2293 blob_end_offset <= oblob_start_offset) { 2294 /* no conflict with this existing blob */ 2295 } else { 2296 /* conflict ! */ 2297 if (blob_start_offset == oblob_start_offset && 2298 blob_end_offset == oblob_end_offset && 2299 blob->csb_mem_size == oblob->csb_mem_size && 2300 blob->csb_flags == oblob->csb_flags && 2301 (blob->csb_cpu_type == CPU_TYPE_ANY || 2302 oblob->csb_cpu_type == CPU_TYPE_ANY || 2303 blob->csb_cpu_type == oblob->csb_cpu_type) && 2304 !bcmp(blob->csb_sha1, 2305 oblob->csb_sha1, 2306 SHA1_RESULTLEN)) { 2307 /* 2308 * We already have this blob: 2309 * we'll return success but 2310 * throw away the new blob. 2311 */ 2312 if (oblob->csb_cpu_type == CPU_TYPE_ANY) { 2313 /* 2314 * The old blob matches this one 2315 * but doesn't have any CPU type. 2316 * Update it with whatever the caller 2317 * provided this time. 2318 */ 2319 oblob->csb_cpu_type = cputype; 2320 } 2321 vnode_unlock(vp); 2322 error = EAGAIN; 2323 goto out; 2324 } else { 2325 /* different blob: reject the new one */ 2326 vnode_unlock(vp); 2327 error = EALREADY; 2328 goto out; 2329 } 2330 } 2331 2332 } 2333 2334 2335 /* mark this vnode's VM object as having "signed pages" */ 2336 kr = memory_object_signed(uip->ui_control, TRUE); 2337 if (kr != KERN_SUCCESS) { 2338 vnode_unlock(vp); 2339 error = ENOENT; 2340 goto out; 2341 } 2342 2343 /* 2344 * Add this blob to the list of blobs for this vnode. 2345 * We always add at the front of the list and we never remove a 2346 * blob from the list, so ubc_cs_get_blobs() can return whatever 2347 * the top of the list was and that list will remain valid 2348 * while we validate a page, even after we release the vnode's lock. 2349 */ 2350 blob->csb_next = uip->cs_blobs; 2351 uip->cs_blobs = blob; 2352 2353 OSAddAtomic(+1, &cs_blob_count); 2354 if (cs_blob_count > cs_blob_count_peak) { 2355 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */ 2356 } 2357 OSAddAtomic(+blob->csb_mem_size, &cs_blob_size); 2358 if (cs_blob_size > cs_blob_size_peak) { 2359 cs_blob_size_peak = cs_blob_size; /* XXX atomic ? */ 2360 } 2361 if (blob->csb_mem_size > cs_blob_size_max) { 2362 cs_blob_size_max = blob->csb_mem_size; 2363 } 2364 2365 if (cs_debug) { 2366 proc_t p; 2367 2368 p = current_proc(); 2369 printf("CODE SIGNING: proc %d(%s) " 2370 "loaded %s signatures for file (%s) " 2371 "range 0x%llx:0x%llx flags 0x%x\n", 2372 p->p_pid, p->p_comm, 2373 blob->csb_cpu_type == -1 ? "detached" : "embedded", 2374 vnode_name(vp), 2375 blob->csb_base_offset + blob->csb_start_offset, 2376 blob->csb_base_offset + blob->csb_end_offset, 2377 blob->csb_flags); 2378 } 2379 2380 vnode_unlock(vp); 2381 2382 error = 0; /* success ! */ 2383 2384out: 2385 if (error) { 2386 /* we failed; release what we allocated */ 2387 if (blob) { 2388 kfree(blob, sizeof (*blob)); 2389 blob = NULL; 2390 } 2391 if (blob_handle != IPC_PORT_NULL) { 2392 mach_memory_entry_port_release(blob_handle); 2393 blob_handle = IPC_PORT_NULL; 2394 } 2395 } 2396 2397 if (error == EAGAIN) { 2398 /* 2399 * See above: error is EAGAIN if we were asked 2400 * to add an existing blob again. We cleaned the new 2401 * blob and we want to return success. 2402 */ 2403 error = 0; 2404 /* 2405 * Since we're not failing, consume the data we received. 2406 */ 2407 ubc_cs_blob_deallocate(addr, size); 2408 } 2409 2410 return error; 2411} 2412 2413 2414struct cs_blob * 2415ubc_cs_blob_get( 2416 struct vnode *vp, 2417 cpu_type_t cputype, 2418 off_t offset) 2419{ 2420 struct ubc_info *uip; 2421 struct cs_blob *blob; 2422 off_t offset_in_blob; 2423 2424 vnode_lock_spin(vp); 2425 2426 if (! UBCINFOEXISTS(vp)) { 2427 blob = NULL; 2428 goto out; 2429 } 2430 2431 uip = vp->v_ubcinfo; 2432 for (blob = uip->cs_blobs; 2433 blob != NULL; 2434 blob = blob->csb_next) { 2435 if (cputype != -1 && blob->csb_cpu_type == cputype) { 2436 break; 2437 } 2438 if (offset != -1) { 2439 offset_in_blob = offset - blob->csb_base_offset; 2440 if (offset_in_blob >= blob->csb_start_offset && 2441 offset_in_blob < blob->csb_end_offset) { 2442 /* our offset is covered by this blob */ 2443 break; 2444 } 2445 } 2446 } 2447 2448out: 2449 vnode_unlock(vp); 2450 2451 return blob; 2452} 2453 2454static void 2455ubc_cs_free( 2456 struct ubc_info *uip) 2457{ 2458 struct cs_blob *blob, *next_blob; 2459 2460 for (blob = uip->cs_blobs; 2461 blob != NULL; 2462 blob = next_blob) { 2463 next_blob = blob->csb_next; 2464 if (blob->csb_mem_kaddr != 0) { 2465 ubc_cs_blob_deallocate(blob->csb_mem_kaddr, 2466 blob->csb_mem_size); 2467 blob->csb_mem_kaddr = 0; 2468 } 2469 if (blob->csb_mem_handle != IPC_PORT_NULL) { 2470 mach_memory_entry_port_release(blob->csb_mem_handle); 2471 } 2472 blob->csb_mem_handle = IPC_PORT_NULL; 2473 OSAddAtomic(-1, &cs_blob_count); 2474 OSAddAtomic(-blob->csb_mem_size, &cs_blob_size); 2475 kfree(blob, sizeof (*blob)); 2476 } 2477 uip->cs_blobs = NULL; 2478} 2479 2480struct cs_blob * 2481ubc_get_cs_blobs( 2482 struct vnode *vp) 2483{ 2484 struct ubc_info *uip; 2485 struct cs_blob *blobs; 2486 2487 vnode_lock_spin(vp); 2488 2489 if (! UBCINFOEXISTS(vp)) { 2490 blobs = NULL; 2491 goto out; 2492 } 2493 2494 uip = vp->v_ubcinfo; 2495 blobs = uip->cs_blobs; 2496 2497out: 2498 vnode_unlock(vp); 2499 2500 return blobs; 2501} 2502 2503unsigned long cs_validate_page_no_hash = 0; 2504unsigned long cs_validate_page_bad_hash = 0; 2505boolean_t 2506cs_validate_page( 2507 void *_blobs, 2508 memory_object_offset_t page_offset, 2509 const void *data, 2510 boolean_t *tainted) 2511{ 2512 SHA1_CTX sha1ctxt; 2513 unsigned char actual_hash[SHA1_RESULTLEN]; 2514 unsigned char expected_hash[SHA1_RESULTLEN]; 2515 boolean_t found_hash; 2516 struct cs_blob *blobs, *blob; 2517 const CS_CodeDirectory *cd; 2518 const CS_SuperBlob *embedded; 2519 off_t start_offset, end_offset; 2520 const unsigned char *hash; 2521 boolean_t validated; 2522 off_t offset; /* page offset in the file */ 2523 size_t size; 2524 off_t codeLimit = 0; 2525 char *lower_bound, *upper_bound; 2526 vm_offset_t kaddr, blob_addr; 2527 vm_size_t ksize; 2528 kern_return_t kr; 2529 2530 offset = page_offset; 2531 2532 /* retrieve the expected hash */ 2533 found_hash = FALSE; 2534 blobs = (struct cs_blob *) _blobs; 2535 2536 for (blob = blobs; 2537 blob != NULL; 2538 blob = blob->csb_next) { 2539 offset = page_offset - blob->csb_base_offset; 2540 if (offset < blob->csb_start_offset || 2541 offset >= blob->csb_end_offset) { 2542 /* our page is not covered by this blob */ 2543 continue; 2544 } 2545 2546 /* map the blob in the kernel address space */ 2547 kaddr = blob->csb_mem_kaddr; 2548 if (kaddr == 0) { 2549 ksize = (vm_size_t) (blob->csb_mem_size + 2550 blob->csb_mem_offset); 2551 kr = vm_map(kernel_map, 2552 &kaddr, 2553 ksize, 2554 0, 2555 VM_FLAGS_ANYWHERE, 2556 blob->csb_mem_handle, 2557 0, 2558 TRUE, 2559 VM_PROT_READ, 2560 VM_PROT_READ, 2561 VM_INHERIT_NONE); 2562 if (kr != KERN_SUCCESS) { 2563 /* XXX FBDP what to do !? */ 2564 printf("cs_validate_page: failed to map blob, " 2565 "size=0x%x kr=0x%x\n", 2566 blob->csb_mem_size, kr); 2567 break; 2568 } 2569 } 2570 blob_addr = kaddr + blob->csb_mem_offset; 2571 2572 lower_bound = CAST_DOWN(char *, blob_addr); 2573 upper_bound = lower_bound + blob->csb_mem_size; 2574 2575 embedded = (const CS_SuperBlob *) blob_addr; 2576 cd = findCodeDirectory(embedded, lower_bound, upper_bound); 2577 if (cd != NULL) { 2578 if (cd->pageSize != PAGE_SHIFT || 2579 cd->hashType != 0x1 || 2580 cd->hashSize != SHA1_RESULTLEN) { 2581 /* bogus blob ? */ 2582 continue; 2583 } 2584 2585 end_offset = round_page(ntohl(cd->codeLimit)); 2586 start_offset = end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE); 2587 offset = page_offset - blob->csb_base_offset; 2588 if (offset < start_offset || 2589 offset >= end_offset) { 2590 /* our page is not covered by this blob */ 2591 continue; 2592 } 2593 2594 codeLimit = ntohl(cd->codeLimit); 2595 hash = hashes(cd, atop(offset), 2596 lower_bound, upper_bound); 2597 if (hash != NULL) { 2598 bcopy(hash, expected_hash, 2599 sizeof (expected_hash)); 2600 found_hash = TRUE; 2601 } 2602 2603 break; 2604 } 2605 } 2606 2607 if (found_hash == FALSE) { 2608 /* 2609 * We can't verify this page because there is no signature 2610 * for it (yet). It's possible that this part of the object 2611 * is not signed, or that signatures for that part have not 2612 * been loaded yet. 2613 * Report that the page has not been validated and let the 2614 * caller decide if it wants to accept it or not. 2615 */ 2616 cs_validate_page_no_hash++; 2617 if (cs_debug > 1) { 2618 printf("CODE SIGNING: cs_validate_page: " 2619 "off 0x%llx: no hash to validate !?\n", 2620 page_offset); 2621 } 2622 validated = FALSE; 2623 *tainted = FALSE; 2624 } else { 2625 2626 size = PAGE_SIZE; 2627 const uint32_t *asha1, *esha1; 2628 if (offset + size > codeLimit) { 2629 /* partial page at end of segment */ 2630 assert(offset < codeLimit); 2631 size = codeLimit & PAGE_MASK; 2632 } 2633 /* compute the actual page's SHA1 hash */ 2634 SHA1Init(&sha1ctxt); 2635 SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size); 2636 SHA1Final(actual_hash, &sha1ctxt); 2637 2638 asha1 = (const uint32_t *) actual_hash; 2639 esha1 = (const uint32_t *) expected_hash; 2640 2641 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) { 2642 if (cs_debug) { 2643 printf("CODE SIGNING: cs_validate_page: " 2644 "off 0x%llx size 0x%lx: " 2645 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != " 2646 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n", 2647 page_offset, size, 2648 asha1[0], asha1[1], asha1[2], 2649 asha1[3], asha1[4], 2650 esha1[0], esha1[1], esha1[2], 2651 esha1[3], esha1[4]); 2652 } 2653 cs_validate_page_bad_hash++; 2654 *tainted = TRUE; 2655 } else { 2656 if (cs_debug > 1) { 2657 printf("CODE SIGNING: cs_validate_page: " 2658 "off 0x%llx size 0x%lx: SHA1 OK\n", 2659 page_offset, size); 2660 } 2661 *tainted = FALSE; 2662 } 2663 validated = TRUE; 2664 } 2665 2666 return validated; 2667} 2668 2669int 2670ubc_cs_getcdhash( 2671 vnode_t vp, 2672 off_t offset, 2673 unsigned char *cdhash) 2674{ 2675 struct cs_blob *blobs, *blob; 2676 off_t rel_offset; 2677 2678 blobs = ubc_get_cs_blobs(vp); 2679 for (blob = blobs; 2680 blob != NULL; 2681 blob = blob->csb_next) { 2682 /* compute offset relative to this blob */ 2683 rel_offset = offset - blob->csb_base_offset; 2684 if (rel_offset >= blob->csb_start_offset && 2685 rel_offset < blob->csb_end_offset) { 2686 /* this blob does cover our "offset" ! */ 2687 break; 2688 } 2689 } 2690 2691 if (blob == NULL) { 2692 /* we didn't find a blob covering "offset" */ 2693 return EBADEXEC; /* XXX any better error ? */ 2694 } 2695 2696 /* get the SHA1 hash of that blob */ 2697 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1)); 2698 2699 return 0; 2700} 2701