1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* @(#)hfs_readwrite.c 1.0 29 * 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved 31 * 32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files. 33 * 34 */ 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/resourcevar.h> 39#include <sys/kernel.h> 40#include <sys/fcntl.h> 41#include <sys/filedesc.h> 42#include <sys/stat.h> 43#include <sys/buf.h> 44#include <sys/buf_internal.h> 45#include <sys/proc.h> 46#include <sys/kauth.h> 47#include <sys/vnode.h> 48#include <sys/vnode_internal.h> 49#include <sys/uio.h> 50#include <sys/vfs_context.h> 51#include <sys/fsevents.h> 52#include <kern/kalloc.h> 53#include <sys/disk.h> 54#include <sys/sysctl.h> 55#include <sys/fsctl.h> 56#include <sys/mount_internal.h> 57#include <sys/file_internal.h> 58 59#include <miscfs/specfs/specdev.h> 60 61#include <sys/ubc.h> 62#include <sys/ubc_internal.h> 63 64#include <vm/vm_pageout.h> 65#include <vm/vm_kern.h> 66 67#include <sys/kdebug.h> 68 69#include "hfs.h" 70#include "hfs_attrlist.h" 71#include "hfs_endian.h" 72#include "hfs_fsctl.h" 73#include "hfs_quota.h" 74#include "hfscommon/headers/FileMgrInternal.h" 75#include "hfscommon/headers/BTreesInternal.h" 76#include "hfs_cnode.h" 77#include "hfs_dbg.h" 78 79#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2))) 80 81enum { 82 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */ 83}; 84 85/* from bsd/hfs/hfs_vfsops.c */ 86extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); 87 88static int hfs_clonefile(struct vnode *, int, int, int); 89static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); 90static int hfs_minorupdate(struct vnode *vp); 91static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); 92 93/* from bsd/hfs/hfs_vnops.c */ 94extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); 95 96 97 98int flush_cache_on_write = 0; 99SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); 100 101/* 102 * Read data from a file. 103 */ 104int 105hfs_vnop_read(struct vnop_read_args *ap) 106{ 107 /* 108 struct vnop_read_args { 109 struct vnodeop_desc *a_desc; 110 vnode_t a_vp; 111 struct uio *a_uio; 112 int a_ioflag; 113 vfs_context_t a_context; 114 }; 115 */ 116 117 uio_t uio = ap->a_uio; 118 struct vnode *vp = ap->a_vp; 119 struct cnode *cp; 120 struct filefork *fp; 121 struct hfsmount *hfsmp; 122 off_t filesize; 123 off_t filebytes; 124 off_t start_resid = uio_resid(uio); 125 off_t offset = uio_offset(uio); 126 int retval = 0; 127 int took_truncate_lock = 0; 128 int io_throttle = 0; 129 int throttled_count = 0; 130 131 /* Preflight checks */ 132 if (!vnode_isreg(vp)) { 133 /* can only read regular files */ 134 if (vnode_isdir(vp)) 135 return (EISDIR); 136 else 137 return (EPERM); 138 } 139 if (start_resid == 0) 140 return (0); /* Nothing left to do */ 141 if (offset < 0) 142 return (EINVAL); /* cant read from a negative offset */ 143 144 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == 145 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { 146 /* Don't allow unencrypted io request from user space */ 147 return EPERM; 148 } 149 150 151 152#if HFS_COMPRESSION 153 if (VNODE_IS_RSRC(vp)) { 154 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ 155 return 0; 156 } 157 /* otherwise read the resource fork normally */ 158 } else { 159 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 160 if (compressed) { 161 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); 162 if (compressed) { 163 if (retval == 0) { 164 /* successful read, update the access time */ 165 VTOC(vp)->c_touch_acctime = TRUE; 166 167 /* compressed files are not hot file candidates */ 168 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 169 VTOF(vp)->ff_bytesread = 0; 170 } 171 } 172 return retval; 173 } 174 /* otherwise the file was converted back to a regular file while we were reading it */ 175 retval = 0; 176 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 177 int error; 178 179 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); 180 if (error) { 181 return error; 182 } 183 184 } 185 } 186#endif /* HFS_COMPRESSION */ 187 188 cp = VTOC(vp); 189 fp = VTOF(vp); 190 hfsmp = VTOHFS(vp); 191 192#if CONFIG_PROTECT 193 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) { 194 goto exit; 195 } 196#endif 197 198 /* 199 * If this read request originated from a syscall (as opposed to 200 * an in-kernel page fault or something), then set it up for 201 * throttle checks 202 */ 203 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) { 204 io_throttle = IO_RETURN_ON_THROTTLE; 205 } 206 207read_again: 208 209 /* Protect against a size change. */ 210 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); 211 took_truncate_lock = 1; 212 213 filesize = fp->ff_size; 214 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 215 216 /* 217 * Check the file size. Note that per POSIX spec, we return 0 at 218 * file EOF, so attempting a read at an offset that is too big 219 * should just return 0 on HFS+. Since the return value was initialized 220 * to 0 above, we just jump to exit. HFS Standard has its own behavior. 221 */ 222 if (offset > filesize) { 223 if ((hfsmp->hfs_flags & HFS_STANDARD) && 224 (offset > (off_t)MAXHFSFILESIZE)) { 225 retval = EFBIG; 226 } 227 goto exit; 228 } 229 230 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START, 231 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 232 233 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle); 234 235 cp->c_touch_acctime = TRUE; 236 237 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END, 238 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 239 240 /* 241 * Keep track blocks read 242 */ 243 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) { 244 int took_cnode_lock = 0; 245 off_t bytesread; 246 247 bytesread = start_resid - uio_resid(uio); 248 249 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 250 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { 251 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 252 took_cnode_lock = 1; 253 } 254 /* 255 * If this file hasn't been seen since the start of 256 * the current sampling period then start over. 257 */ 258 if (cp->c_atime < hfsmp->hfc_timebase) { 259 struct timeval tv; 260 261 fp->ff_bytesread = bytesread; 262 microtime(&tv); 263 cp->c_atime = tv.tv_sec; 264 } else { 265 fp->ff_bytesread += bytesread; 266 } 267 if (took_cnode_lock) 268 hfs_unlock(cp); 269 } 270exit: 271 if (took_truncate_lock) { 272 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 273 } 274 if (retval == EAGAIN) { 275 throttle_lowpri_io(1); 276 throttled_count++; 277 278 retval = 0; 279 goto read_again; 280 } 281 if (throttled_count) { 282 throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); 283 } 284 return (retval); 285} 286 287/* 288 * Write data to a file. 289 */ 290int 291hfs_vnop_write(struct vnop_write_args *ap) 292{ 293 uio_t uio = ap->a_uio; 294 struct vnode *vp = ap->a_vp; 295 struct cnode *cp; 296 struct filefork *fp; 297 struct hfsmount *hfsmp; 298 kauth_cred_t cred = NULL; 299 off_t origFileSize; 300 off_t writelimit; 301 off_t bytesToAdd = 0; 302 off_t actualBytesAdded; 303 off_t filebytes; 304 off_t offset; 305 ssize_t resid; 306 int eflags; 307 int ioflag = ap->a_ioflag; 308 int retval = 0; 309 int lockflags; 310 int cnode_locked = 0; 311 int partialwrite = 0; 312 int do_snapshot = 1; 313 time_t orig_ctime=VTOC(vp)->c_ctime; 314 int took_truncate_lock = 0; 315 int io_return_on_throttle = 0; 316 int throttled_count = 0; 317 struct rl_entry *invalid_range; 318 319#if HFS_COMPRESSION 320 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 321 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 322 switch(state) { 323 case FILE_IS_COMPRESSED: 324 return EACCES; 325 case FILE_IS_CONVERTING: 326 /* if FILE_IS_CONVERTING, we allow writes but do not 327 bother with snapshots or else we will deadlock. 328 */ 329 do_snapshot = 0; 330 break; 331 default: 332 printf("invalid state %d for compressed file\n", state); 333 /* fall through */ 334 } 335 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 336 int error; 337 338 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP); 339 if (error != 0) { 340 return error; 341 } 342 } 343 344 if (do_snapshot) { 345 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio); 346 } 347 348#endif 349 350 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == 351 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { 352 /* Don't allow unencrypted io request from user space */ 353 return EPERM; 354 } 355 356 357 resid = uio_resid(uio); 358 offset = uio_offset(uio); 359 360 if (offset < 0) 361 return (EINVAL); 362 if (resid == 0) 363 return (E_NONE); 364 if (!vnode_isreg(vp)) 365 return (EPERM); /* Can only write regular files */ 366 367 cp = VTOC(vp); 368 fp = VTOF(vp); 369 hfsmp = VTOHFS(vp); 370 371#if CONFIG_PROTECT 372 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) { 373 goto exit; 374 } 375#endif 376 377 eflags = kEFDeferMask; /* defer file block allocations */ 378#if HFS_SPARSE_DEV 379 /* 380 * When the underlying device is sparse and space 381 * is low (< 8MB), stop doing delayed allocations 382 * and begin doing synchronous I/O. 383 */ 384 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 385 (hfs_freeblks(hfsmp, 0) < 2048)) { 386 eflags &= ~kEFDeferMask; 387 ioflag |= IO_SYNC; 388 } 389#endif /* HFS_SPARSE_DEV */ 390 391 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) == 392 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) { 393 io_return_on_throttle = IO_RETURN_ON_THROTTLE; 394 } 395 396again: 397 /* 398 * Protect against a size change. 399 * 400 * Note: If took_truncate_lock is true, then we previously got the lock shared 401 * but needed to upgrade to exclusive. So try getting it exclusive from the 402 * start. 403 */ 404 if (ioflag & IO_APPEND || took_truncate_lock) { 405 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 406 } 407 else { 408 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); 409 } 410 took_truncate_lock = 1; 411 412 /* Update UIO */ 413 if (ioflag & IO_APPEND) { 414 uio_setoffset(uio, fp->ff_size); 415 offset = fp->ff_size; 416 } 417 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) { 418 retval = EPERM; 419 goto exit; 420 } 421 422 origFileSize = fp->ff_size; 423 writelimit = offset + resid; 424 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 425 426 /* 427 * We may need an exclusive truncate lock for several reasons, all 428 * of which are because we may be writing to a (portion of a) block 429 * for the first time, and we need to make sure no readers see the 430 * prior, uninitialized contents of the block. The cases are: 431 * 432 * 1. We have unallocated (delayed allocation) blocks. We may be 433 * allocating new blocks to the file and writing to them. 434 * (A more precise check would be whether the range we're writing 435 * to contains delayed allocation blocks.) 436 * 2. We need to extend the file. The bytes between the old EOF 437 * and the new EOF are not yet initialized. This is important 438 * even if we're not allocating new blocks to the file. If the 439 * old EOF and new EOF are in the same block, we still need to 440 * protect that range of bytes until they are written for the 441 * first time. 442 * 3. The write overlaps some invalid ranges (delayed zero fill; that 443 * part of the file has been allocated, but not yet written). 444 * 445 * If we had a shared lock with the above cases, we need to try to upgrade 446 * to an exclusive lock. If the upgrade fails, we will lose the shared 447 * lock, and will need to take the truncate lock again; the took_truncate_lock 448 * flag will still be set, causing us to try for an exclusive lock next time. 449 * 450 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode 451 * lock is held, since it protects the range lists. 452 */ 453 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 454 ((fp->ff_unallocblocks != 0) || 455 (writelimit > origFileSize))) { 456 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { 457 /* 458 * Lock upgrade failed and we lost our shared lock, try again. 459 * Note: we do not set took_truncate_lock=0 here. Leaving it 460 * set to 1 will cause us to try to get the lock exclusive. 461 */ 462 goto again; 463 } 464 else { 465 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */ 466 cp->c_truncatelockowner = current_thread(); 467 } 468 } 469 470 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 471 goto exit; 472 } 473 cnode_locked = 1; 474 475 /* 476 * Now that we have the cnode lock, see if there are delayed zero fill ranges 477 * overlapping our write. If so, we need the truncate lock exclusive (see above). 478 */ 479 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 480 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) { 481 /* 482 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes 483 * a deadlock, rather than simply returning failure. (That is, it apparently does 484 * not behave like a "try_lock"). Since this condition is rare, just drop the 485 * cnode lock and try again. Since took_truncate_lock is set, we will 486 * automatically take the truncate lock exclusive. 487 */ 488 hfs_unlock(cp); 489 cnode_locked = 0; 490 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 491 goto again; 492 } 493 494 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START, 495 (int)offset, uio_resid(uio), (int)fp->ff_size, 496 (int)filebytes, 0); 497 498 /* Check if we do not need to extend the file */ 499 if (writelimit <= filebytes) { 500 goto sizeok; 501 } 502 503 cred = vfs_context_ucred(ap->a_context); 504 bytesToAdd = writelimit - filebytes; 505 506#if QUOTA 507 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 508 cred, 0); 509 if (retval) 510 goto exit; 511#endif /* QUOTA */ 512 513 if (hfs_start_transaction(hfsmp) != 0) { 514 retval = EINVAL; 515 goto exit; 516 } 517 518 while (writelimit > filebytes) { 519 bytesToAdd = writelimit - filebytes; 520 if (cred && suser(cred, NULL) != 0) 521 eflags |= kEFReserveMask; 522 523 /* Protect extents b-tree and allocation bitmap */ 524 lockflags = SFL_BITMAP; 525 if (overflow_extents(fp)) 526 lockflags |= SFL_EXTENTS; 527 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 528 529 /* Files that are changing size are not hot file candidates. */ 530 if (hfsmp->hfc_stage == HFC_RECORDING) { 531 fp->ff_bytesread = 0; 532 } 533 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd, 534 0, eflags, &actualBytesAdded)); 535 536 hfs_systemfile_unlock(hfsmp, lockflags); 537 538 if ((actualBytesAdded == 0) && (retval == E_NONE)) 539 retval = ENOSPC; 540 if (retval != E_NONE) 541 break; 542 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 543 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE, 544 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 545 } 546 (void) hfs_update(vp, TRUE); 547 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 548 (void) hfs_end_transaction(hfsmp); 549 550 /* 551 * If we didn't grow the file enough try a partial write. 552 * POSIX expects this behavior. 553 */ 554 if ((retval == ENOSPC) && (filebytes > offset)) { 555 retval = 0; 556 partialwrite = 1; 557 uio_setresid(uio, (uio_resid(uio) - bytesToAdd)); 558 resid -= bytesToAdd; 559 writelimit = filebytes; 560 } 561sizeok: 562 if (retval == E_NONE) { 563 off_t filesize; 564 off_t zero_off; 565 off_t tail_off; 566 off_t inval_start; 567 off_t inval_end; 568 off_t io_start; 569 int lflag; 570 571 if (writelimit > fp->ff_size) 572 filesize = writelimit; 573 else 574 filesize = fp->ff_size; 575 576 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY); 577 578 if (offset <= fp->ff_size) { 579 zero_off = offset & ~PAGE_MASK_64; 580 581 /* Check to see whether the area between the zero_offset and the start 582 of the transfer to see whether is invalid and should be zero-filled 583 as part of the transfer: 584 */ 585 if (offset > zero_off) { 586 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP) 587 lflag |= IO_HEADZEROFILL; 588 } 589 } else { 590 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; 591 592 /* The bytes between fp->ff_size and uio->uio_offset must never be 593 read without being zeroed. The current last block is filled with zeroes 594 if it holds valid data but in all cases merely do a little bookkeeping 595 to track the area from the end of the current last page to the start of 596 the area actually written. For the same reason only the bytes up to the 597 start of the page where this write will start is invalidated; any remainder 598 before uio->uio_offset is explicitly zeroed as part of the cluster_write. 599 600 Note that inval_start, the start of the page after the current EOF, 601 may be past the start of the write, in which case the zeroing 602 will be handled by the cluser_write of the actual data. 603 */ 604 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 605 inval_end = offset & ~PAGE_MASK_64; 606 zero_off = fp->ff_size; 607 608 if ((fp->ff_size & PAGE_MASK_64) && 609 (rl_scan(&fp->ff_invalidranges, 610 eof_page_base, 611 fp->ff_size - 1, 612 &invalid_range) != RL_NOOVERLAP)) { 613 /* The page containing the EOF is not valid, so the 614 entire page must be made inaccessible now. If the write 615 starts on a page beyond the page containing the eof 616 (inval_end > eof_page_base), add the 617 whole page to the range to be invalidated. Otherwise 618 (i.e. if the write starts on the same page), zero-fill 619 the entire page explicitly now: 620 */ 621 if (inval_end > eof_page_base) { 622 inval_start = eof_page_base; 623 } else { 624 zero_off = eof_page_base; 625 }; 626 }; 627 628 if (inval_start < inval_end) { 629 struct timeval tv; 630 /* There's some range of data that's going to be marked invalid */ 631 632 if (zero_off < inval_start) { 633 /* The pages between inval_start and inval_end are going to be invalidated, 634 and the actual write will start on a page past inval_end. Now's the last 635 chance to zero-fill the page containing the EOF: 636 */ 637 hfs_unlock(cp); 638 cnode_locked = 0; 639 retval = cluster_write(vp, (uio_t) 0, 640 fp->ff_size, inval_start, 641 zero_off, (off_t)0, 642 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); 643 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 644 cnode_locked = 1; 645 if (retval) goto ioerr_exit; 646 offset = uio_offset(uio); 647 }; 648 649 /* Mark the remaining area of the newly allocated space as invalid: */ 650 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges); 651 microuptime(&tv); 652 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 653 zero_off = fp->ff_size = inval_end; 654 }; 655 656 if (offset > zero_off) lflag |= IO_HEADZEROFILL; 657 }; 658 659 /* Check to see whether the area between the end of the write and the end of 660 the page it falls in is invalid and should be zero-filled as part of the transfer: 661 */ 662 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 663 if (tail_off > filesize) tail_off = filesize; 664 if (tail_off > writelimit) { 665 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) { 666 lflag |= IO_TAILZEROFILL; 667 }; 668 }; 669 670 /* 671 * if the write starts beyond the current EOF (possibly advanced in the 672 * zeroing of the last block, above), then we'll zero fill from the current EOF 673 * to where the write begins: 674 * 675 * NOTE: If (and ONLY if) the portion of the file about to be written is 676 * before the current EOF it might be marked as invalid now and must be 677 * made readable (removed from the invalid ranges) before cluster_write 678 * tries to write it: 679 */ 680 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset; 681 if (io_start < fp->ff_size) { 682 off_t io_end; 683 684 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; 685 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges); 686 }; 687 688 hfs_unlock(cp); 689 cnode_locked = 0; 690 691 /* 692 * We need to tell UBC the fork's new size BEFORE calling 693 * cluster_write, in case any of the new pages need to be 694 * paged out before cluster_write completes (which does happen 695 * in embedded systems due to extreme memory pressure). 696 * Similarly, we need to tell hfs_vnop_pageout what the new EOF 697 * will be, so that it can pass that on to cluster_pageout, and 698 * allow those pageouts. 699 * 700 * We don't update ff_size yet since we don't want pageins to 701 * be able to see uninitialized data between the old and new 702 * EOF, until cluster_write has completed and initialized that 703 * part of the file. 704 * 705 * The vnode pager relies on the file size last given to UBC via 706 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or 707 * ff_size (whichever is larger). NOTE: ff_new_size is always 708 * zero, unless we are extending the file via write. 709 */ 710 if (filesize > fp->ff_size) { 711 fp->ff_new_size = filesize; 712 ubc_setsize(vp, filesize); 713 } 714 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, 715 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle); 716 if (retval) { 717 fp->ff_new_size = 0; /* no longer extending; use ff_size */ 718 719 if (retval == EAGAIN) { 720 /* 721 * EAGAIN indicates that we still have I/O to do, but 722 * that we now need to be throttled 723 */ 724 if (resid != uio_resid(uio)) { 725 /* 726 * did manage to do some I/O before returning EAGAIN 727 */ 728 resid = uio_resid(uio); 729 offset = uio_offset(uio); 730 731 cp->c_touch_chgtime = TRUE; 732 cp->c_touch_modtime = TRUE; 733 hfs_incr_gencount(cp); 734 } 735 if (filesize > fp->ff_size) { 736 /* 737 * we called ubc_setsize before the call to 738 * cluster_write... since we only partially 739 * completed the I/O, we need to 740 * re-adjust our idea of the filesize based 741 * on our interim EOF 742 */ 743 ubc_setsize(vp, offset); 744 745 fp->ff_size = offset; 746 } 747 goto exit; 748 } 749 if (filesize > origFileSize) { 750 ubc_setsize(vp, origFileSize); 751 } 752 goto ioerr_exit; 753 } 754 755 if (filesize > origFileSize) { 756 fp->ff_size = filesize; 757 758 /* Files that are changing size are not hot file candidates. */ 759 if (hfsmp->hfc_stage == HFC_RECORDING) { 760 fp->ff_bytesread = 0; 761 } 762 } 763 fp->ff_new_size = 0; /* ff_size now has the correct size */ 764 } 765 if (partialwrite) { 766 uio_setresid(uio, (uio_resid(uio) + bytesToAdd)); 767 resid += bytesToAdd; 768 } 769 770 // XXXdbg - see radar 4871353 for more info 771 { 772 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) { 773 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); 774 } 775 } 776 777ioerr_exit: 778 if (resid > uio_resid(uio)) { 779 if (!cnode_locked) { 780 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 781 cnode_locked = 1; 782 } 783 784 cp->c_touch_chgtime = TRUE; 785 cp->c_touch_modtime = TRUE; 786 hfs_incr_gencount(cp); 787 788 /* 789 * If we successfully wrote any data, and we are not the superuser 790 * we clear the setuid and setgid bits as a precaution against 791 * tampering. 792 */ 793 if (cp->c_mode & (S_ISUID | S_ISGID)) { 794 cred = vfs_context_ucred(ap->a_context); 795 if (cred && suser(cred, NULL)) { 796 cp->c_mode &= ~(S_ISUID | S_ISGID); 797 } 798 } 799 } 800 if (retval) { 801 if (ioflag & IO_UNIT) { 802 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, 803 0, ap->a_context); 804 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); 805 uio_setresid(uio, resid); 806 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 807 } 808 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) 809 retval = hfs_update(vp, TRUE); 810 811 /* Updating vcbWrCnt doesn't need to be atomic. */ 812 hfsmp->vcbWrCnt++; 813 814 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END, 815 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 816exit: 817 if (cnode_locked) 818 hfs_unlock(cp); 819 820 if (took_truncate_lock) { 821 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 822 } 823 if (retval == EAGAIN) { 824 throttle_lowpri_io(1); 825 throttled_count++; 826 827 retval = 0; 828 goto again; 829 } 830 if (throttled_count) { 831 throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); 832 } 833 return (retval); 834} 835 836/* support for the "bulk-access" fcntl */ 837 838#define CACHE_LEVELS 16 839#define NUM_CACHE_ENTRIES (64*16) 840#define PARENT_IDS_FLAG 0x100 841 842struct access_cache { 843 int numcached; 844 int cachehits; /* these two for statistics gathering */ 845 int lookups; 846 unsigned int *acache; 847 unsigned char *haveaccess; 848}; 849 850struct access_t { 851 uid_t uid; /* IN: effective user id */ 852 short flags; /* IN: access requested (i.e. R_OK) */ 853 short num_groups; /* IN: number of groups user belongs to */ 854 int num_files; /* IN: number of files to process */ 855 int *file_ids; /* IN: array of file ids */ 856 gid_t *groups; /* IN: array of groups */ 857 short *access; /* OUT: access info for each file (0 for 'has access') */ 858} __attribute__((unavailable)); // this structure is for reference purposes only 859 860struct user32_access_t { 861 uid_t uid; /* IN: effective user id */ 862 short flags; /* IN: access requested (i.e. R_OK) */ 863 short num_groups; /* IN: number of groups user belongs to */ 864 int num_files; /* IN: number of files to process */ 865 user32_addr_t file_ids; /* IN: array of file ids */ 866 user32_addr_t groups; /* IN: array of groups */ 867 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 868}; 869 870struct user64_access_t { 871 uid_t uid; /* IN: effective user id */ 872 short flags; /* IN: access requested (i.e. R_OK) */ 873 short num_groups; /* IN: number of groups user belongs to */ 874 int num_files; /* IN: number of files to process */ 875 user64_addr_t file_ids; /* IN: array of file ids */ 876 user64_addr_t groups; /* IN: array of groups */ 877 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 878}; 879 880 881// these are the "extended" versions of the above structures 882// note that it is crucial that they be different sized than 883// the regular version 884struct ext_access_t { 885 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 886 uint32_t num_files; /* IN: number of files to process */ 887 uint32_t map_size; /* IN: size of the bit map */ 888 uint32_t *file_ids; /* IN: Array of file ids */ 889 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */ 890 short *access; /* OUT: access info for each file (0 for 'has access') */ 891 uint32_t num_parents; /* future use */ 892 cnid_t *parents; /* future use */ 893} __attribute__((unavailable)); // this structure is for reference purposes only 894 895struct user32_ext_access_t { 896 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 897 uint32_t num_files; /* IN: number of files to process */ 898 uint32_t map_size; /* IN: size of the bit map */ 899 user32_addr_t file_ids; /* IN: Array of file ids */ 900 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */ 901 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 902 uint32_t num_parents; /* future use */ 903 user32_addr_t parents; /* future use */ 904}; 905 906struct user64_ext_access_t { 907 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 908 uint32_t num_files; /* IN: number of files to process */ 909 uint32_t map_size; /* IN: size of the bit map */ 910 user64_addr_t file_ids; /* IN: array of file ids */ 911 user64_addr_t bitmap; /* IN: array of groups */ 912 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 913 uint32_t num_parents;/* future use */ 914 user64_addr_t parents;/* future use */ 915}; 916 917 918/* 919 * Perform a binary search for the given parent_id. Return value is 920 * the index if there is a match. If no_match_indexp is non-NULL it 921 * will be assigned with the index to insert the item (even if it was 922 * not found). 923 */ 924static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp) 925{ 926 int index=-1; 927 unsigned int lo=0; 928 929 do { 930 unsigned int mid = ((hi - lo)/2) + lo; 931 unsigned int this_id = array[mid]; 932 933 if (parent_id == this_id) { 934 hi = mid; 935 break; 936 } 937 938 if (parent_id < this_id) { 939 hi = mid; 940 continue; 941 } 942 943 if (parent_id > this_id) { 944 lo = mid + 1; 945 continue; 946 } 947 } while(lo < hi); 948 949 /* check if lo and hi converged on the match */ 950 if (parent_id == array[hi]) { 951 index = hi; 952 } 953 954 if (no_match_indexp) { 955 *no_match_indexp = hi; 956 } 957 958 return index; 959} 960 961 962static int 963lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) 964{ 965 unsigned int hi; 966 int matches = 0; 967 int index, no_match_index; 968 969 if (cache->numcached == 0) { 970 *indexp = 0; 971 return 0; // table is empty, so insert at index=0 and report no match 972 } 973 974 if (cache->numcached > NUM_CACHE_ENTRIES) { 975 cache->numcached = NUM_CACHE_ENTRIES; 976 } 977 978 hi = cache->numcached - 1; 979 980 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index); 981 982 /* if no existing entry found, find index for new one */ 983 if (index == -1) { 984 index = no_match_index; 985 matches = 0; 986 } else { 987 matches = 1; 988 } 989 990 *indexp = index; 991 return matches; 992} 993 994/* 995 * Add a node to the access_cache at the given index (or do a lookup first 996 * to find the index if -1 is passed in). We currently do a replace rather 997 * than an insert if the cache is full. 998 */ 999static void 1000add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) 1001{ 1002 int lookup_index = -1; 1003 1004 /* need to do a lookup first if -1 passed for index */ 1005 if (index == -1) { 1006 if (lookup_bucket(cache, &lookup_index, nodeID)) { 1007 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) { 1008 // only update an entry if the previous access was ESRCH (i.e. a scope checking error) 1009 cache->haveaccess[lookup_index] = access; 1010 } 1011 1012 /* mission accomplished */ 1013 return; 1014 } else { 1015 index = lookup_index; 1016 } 1017 1018 } 1019 1020 /* if the cache is full, do a replace rather than an insert */ 1021 if (cache->numcached >= NUM_CACHE_ENTRIES) { 1022 cache->numcached = NUM_CACHE_ENTRIES-1; 1023 1024 if (index > cache->numcached) { 1025 index = cache->numcached; 1026 } 1027 } 1028 1029 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) { 1030 index++; 1031 } 1032 1033 if (index >= 0 && index < cache->numcached) { 1034 /* only do bcopy if we're inserting */ 1035 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) ); 1036 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) ); 1037 } 1038 1039 cache->acache[index] = nodeID; 1040 cache->haveaccess[index] = access; 1041 cache->numcached++; 1042} 1043 1044 1045struct cinfo { 1046 uid_t uid; 1047 gid_t gid; 1048 mode_t mode; 1049 cnid_t parentcnid; 1050 u_int16_t recflags; 1051}; 1052 1053static int 1054snoop_callback(const cnode_t *cp, void *arg) 1055{ 1056 struct cinfo *cip = arg; 1057 1058 cip->uid = cp->c_uid; 1059 cip->gid = cp->c_gid; 1060 cip->mode = cp->c_mode; 1061 cip->parentcnid = cp->c_parentcnid; 1062 cip->recflags = cp->c_attr.ca_recflags; 1063 1064 return (0); 1065} 1066 1067/* 1068 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item 1069 * isn't incore, then go to the catalog. 1070 */ 1071static int 1072do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 1073 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp) 1074{ 1075 int error = 0; 1076 1077 /* if this id matches the one the fsctl was called with, skip the lookup */ 1078 if (cnid == skip_cp->c_cnid) { 1079 cnattrp->ca_uid = skip_cp->c_uid; 1080 cnattrp->ca_gid = skip_cp->c_gid; 1081 cnattrp->ca_mode = skip_cp->c_mode; 1082 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; 1083 keyp->hfsPlus.parentID = skip_cp->c_parentcnid; 1084 } else { 1085 struct cinfo c_info; 1086 1087 /* otherwise, check the cnode hash incase the file/dir is incore */ 1088 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info); 1089 1090 if (error == EACCES) { 1091 // File is deleted 1092 return ENOENT; 1093 } else if (!error) { 1094 cnattrp->ca_uid = c_info.uid; 1095 cnattrp->ca_gid = c_info.gid; 1096 cnattrp->ca_mode = c_info.mode; 1097 cnattrp->ca_recflags = c_info.recflags; 1098 keyp->hfsPlus.parentID = c_info.parentcnid; 1099 } else { 1100 int lockflags; 1101 1102 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) 1103 throttle_lowpri_io(1); 1104 1105 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 1106 1107 /* lookup this cnid in the catalog */ 1108 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); 1109 1110 hfs_systemfile_unlock(hfsmp, lockflags); 1111 1112 cache->lookups++; 1113 } 1114 } 1115 1116 return (error); 1117} 1118 1119 1120/* 1121 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache 1122 * up to CACHE_LEVELS as we progress towards the root. 1123 */ 1124static int 1125do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 1126 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, 1127 struct vfs_context *my_context, 1128 char *bitmap, 1129 uint32_t map_size, 1130 cnid_t* parents, 1131 uint32_t num_parents) 1132{ 1133 int myErr = 0; 1134 int myResult; 1135 HFSCatalogNodeID thisNodeID; 1136 unsigned int myPerms; 1137 struct cat_attr cnattr; 1138 int cache_index = -1, scope_index = -1, scope_idx_start = -1; 1139 CatalogKey catkey; 1140 1141 int i = 0, ids_to_cache = 0; 1142 int parent_ids[CACHE_LEVELS]; 1143 1144 thisNodeID = nodeID; 1145 while (thisNodeID >= kRootDirID) { 1146 myResult = 0; /* default to "no access" */ 1147 1148 /* check the cache before resorting to hitting the catalog */ 1149 1150 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need 1151 * to look any further after hitting cached dir */ 1152 1153 if (lookup_bucket(cache, &cache_index, thisNodeID)) { 1154 cache->cachehits++; 1155 myErr = cache->haveaccess[cache_index]; 1156 if (scope_index != -1) { 1157 if (myErr == ESRCH) { 1158 myErr = 0; 1159 } 1160 } else { 1161 scope_index = 0; // so we'll just use the cache result 1162 scope_idx_start = ids_to_cache; 1163 } 1164 myResult = (myErr == 0) ? 1 : 0; 1165 goto ExitThisRoutine; 1166 } 1167 1168 1169 if (parents) { 1170 int tmp; 1171 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL); 1172 if (scope_index == -1) 1173 scope_index = tmp; 1174 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) { 1175 scope_idx_start = ids_to_cache; 1176 } 1177 } 1178 1179 /* remember which parents we want to cache */ 1180 if (ids_to_cache < CACHE_LEVELS) { 1181 parent_ids[ids_to_cache] = thisNodeID; 1182 ids_to_cache++; 1183 } 1184 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"... 1185 if (bitmap && map_size) { 1186 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7)); 1187 } 1188 1189 1190 /* do the lookup (checks the cnode hash, then the catalog) */ 1191 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr); 1192 if (myErr) { 1193 goto ExitThisRoutine; /* no access */ 1194 } 1195 1196 /* Root always gets access. */ 1197 if (suser(myp_ucred, NULL) == 0) { 1198 thisNodeID = catkey.hfsPlus.parentID; 1199 myResult = 1; 1200 continue; 1201 } 1202 1203 // if the thing has acl's, do the full permission check 1204 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1205 struct vnode *vp; 1206 1207 /* get the vnode for this cnid */ 1208 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0); 1209 if ( myErr ) { 1210 myResult = 0; 1211 goto ExitThisRoutine; 1212 } 1213 1214 thisNodeID = VTOC(vp)->c_parentcnid; 1215 1216 hfs_unlock(VTOC(vp)); 1217 1218 if (vnode_vtype(vp) == VDIR) { 1219 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context); 1220 } else { 1221 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context); 1222 } 1223 1224 vnode_put(vp); 1225 if (myErr) { 1226 myResult = 0; 1227 goto ExitThisRoutine; 1228 } 1229 } else { 1230 unsigned int flags; 1231 int mode = cnattr.ca_mode & S_IFMT; 1232 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr); 1233 1234 if (mode == S_IFDIR) { 1235 flags = R_OK | X_OK; 1236 } else { 1237 flags = R_OK; 1238 } 1239 if ( (myPerms & flags) != flags) { 1240 myResult = 0; 1241 myErr = EACCES; 1242 goto ExitThisRoutine; /* no access */ 1243 } 1244 1245 /* up the hierarchy we go */ 1246 thisNodeID = catkey.hfsPlus.parentID; 1247 } 1248 } 1249 1250 /* if here, we have access to this node */ 1251 myResult = 1; 1252 1253 ExitThisRoutine: 1254 if (parents && myErr == 0 && scope_index == -1) { 1255 myErr = ESRCH; 1256 } 1257 1258 if (myErr) { 1259 myResult = 0; 1260 } 1261 *err = myErr; 1262 1263 /* cache the parent directory(ies) */ 1264 for (i = 0; i < ids_to_cache; i++) { 1265 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) { 1266 add_node(cache, -1, parent_ids[i], ESRCH); 1267 } else { 1268 add_node(cache, -1, parent_ids[i], myErr); 1269 } 1270 } 1271 1272 return (myResult); 1273} 1274 1275static int 1276do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, 1277 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context) 1278{ 1279 boolean_t is64bit; 1280 1281 /* 1282 * NOTE: on entry, the vnode has an io_ref. In case this vnode 1283 * happens to be in our list of file_ids, we'll note it 1284 * avoid calling hfs_chashget_nowait() on that id as that 1285 * will cause a "locking against myself" panic. 1286 */ 1287 Boolean check_leaf = true; 1288 1289 struct user64_ext_access_t *user_access_structp; 1290 struct user64_ext_access_t tmp_user_access; 1291 struct access_cache cache; 1292 1293 int error = 0, prev_parent_check_ok=1; 1294 unsigned int i; 1295 1296 short flags; 1297 unsigned int num_files = 0; 1298 int map_size = 0; 1299 int num_parents = 0; 1300 int *file_ids=NULL; 1301 short *access=NULL; 1302 char *bitmap=NULL; 1303 cnid_t *parents=NULL; 1304 int leaf_index; 1305 1306 cnid_t cnid; 1307 cnid_t prevParent_cnid = 0; 1308 unsigned int myPerms; 1309 short myaccess = 0; 1310 struct cat_attr cnattr; 1311 CatalogKey catkey; 1312 struct cnode *skip_cp = VTOC(vp); 1313 kauth_cred_t cred = vfs_context_ucred(context); 1314 proc_t p = vfs_context_proc(context); 1315 1316 is64bit = proc_is64bit(p); 1317 1318 /* initialize the local cache and buffers */ 1319 cache.numcached = 0; 1320 cache.cachehits = 0; 1321 cache.lookups = 0; 1322 cache.acache = NULL; 1323 cache.haveaccess = NULL; 1324 1325 /* struct copyin done during dispatch... need to copy file_id array separately */ 1326 if (ap->a_data == NULL) { 1327 error = EINVAL; 1328 goto err_exit_bulk_access; 1329 } 1330 1331 if (is64bit) { 1332 if (arg_size != sizeof(struct user64_ext_access_t)) { 1333 error = EINVAL; 1334 goto err_exit_bulk_access; 1335 } 1336 1337 user_access_structp = (struct user64_ext_access_t *)ap->a_data; 1338 1339 } else if (arg_size == sizeof(struct user32_access_t)) { 1340 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data; 1341 1342 // convert an old style bulk-access struct to the new style 1343 tmp_user_access.flags = accessp->flags; 1344 tmp_user_access.num_files = accessp->num_files; 1345 tmp_user_access.map_size = 0; 1346 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1347 tmp_user_access.bitmap = USER_ADDR_NULL; 1348 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1349 tmp_user_access.num_parents = 0; 1350 user_access_structp = &tmp_user_access; 1351 1352 } else if (arg_size == sizeof(struct user32_ext_access_t)) { 1353 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data; 1354 1355 // up-cast from a 32-bit version of the struct 1356 tmp_user_access.flags = accessp->flags; 1357 tmp_user_access.num_files = accessp->num_files; 1358 tmp_user_access.map_size = accessp->map_size; 1359 tmp_user_access.num_parents = accessp->num_parents; 1360 1361 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1362 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap); 1363 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1364 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents); 1365 1366 user_access_structp = &tmp_user_access; 1367 } else { 1368 error = EINVAL; 1369 goto err_exit_bulk_access; 1370 } 1371 1372 map_size = user_access_structp->map_size; 1373 1374 num_files = user_access_structp->num_files; 1375 1376 num_parents= user_access_structp->num_parents; 1377 1378 if (num_files < 1) { 1379 goto err_exit_bulk_access; 1380 } 1381 if (num_files > 1024) { 1382 error = EINVAL; 1383 goto err_exit_bulk_access; 1384 } 1385 1386 if (num_parents > 1024) { 1387 error = EINVAL; 1388 goto err_exit_bulk_access; 1389 } 1390 1391 file_ids = (int *) kalloc(sizeof(int) * num_files); 1392 access = (short *) kalloc(sizeof(short) * num_files); 1393 if (map_size) { 1394 bitmap = (char *) kalloc(sizeof(char) * map_size); 1395 } 1396 1397 if (num_parents) { 1398 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents); 1399 } 1400 1401 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES); 1402 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1403 1404 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) { 1405 if (file_ids) { 1406 kfree(file_ids, sizeof(int) * num_files); 1407 } 1408 if (bitmap) { 1409 kfree(bitmap, sizeof(char) * map_size); 1410 } 1411 if (access) { 1412 kfree(access, sizeof(short) * num_files); 1413 } 1414 if (cache.acache) { 1415 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1416 } 1417 if (cache.haveaccess) { 1418 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1419 } 1420 if (parents) { 1421 kfree(parents, sizeof(cnid_t) * num_parents); 1422 } 1423 return ENOMEM; 1424 } 1425 1426 // make sure the bitmap is zero'ed out... 1427 if (bitmap) { 1428 bzero(bitmap, (sizeof(char) * map_size)); 1429 } 1430 1431 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids, 1432 num_files * sizeof(int)))) { 1433 goto err_exit_bulk_access; 1434 } 1435 1436 if (num_parents) { 1437 if ((error = copyin(user_access_structp->parents, (caddr_t)parents, 1438 num_parents * sizeof(cnid_t)))) { 1439 goto err_exit_bulk_access; 1440 } 1441 } 1442 1443 flags = user_access_structp->flags; 1444 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) { 1445 flags = R_OK; 1446 } 1447 1448 /* check if we've been passed leaf node ids or parent ids */ 1449 if (flags & PARENT_IDS_FLAG) { 1450 check_leaf = false; 1451 } 1452 1453 /* Check access to each file_id passed in */ 1454 for (i = 0; i < num_files; i++) { 1455 leaf_index=-1; 1456 cnid = (cnid_t) file_ids[i]; 1457 1458 /* root always has access */ 1459 if ((!parents) && (!suser(cred, NULL))) { 1460 access[i] = 0; 1461 continue; 1462 } 1463 1464 if (check_leaf) { 1465 /* do the lookup (checks the cnode hash, then the catalog) */ 1466 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr); 1467 if (error) { 1468 access[i] = (short) error; 1469 continue; 1470 } 1471 1472 if (parents) { 1473 // Check if the leaf matches one of the parent scopes 1474 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL); 1475 if (leaf_index >= 0 && parents[leaf_index] == cnid) 1476 prev_parent_check_ok = 0; 1477 else if (leaf_index >= 0) 1478 prev_parent_check_ok = 1; 1479 } 1480 1481 // if the thing has acl's, do the full permission check 1482 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1483 struct vnode *cvp; 1484 int myErr = 0; 1485 /* get the vnode for this cnid */ 1486 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0); 1487 if ( myErr ) { 1488 access[i] = myErr; 1489 continue; 1490 } 1491 1492 hfs_unlock(VTOC(cvp)); 1493 1494 if (vnode_vtype(cvp) == VDIR) { 1495 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context); 1496 } else { 1497 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context); 1498 } 1499 1500 vnode_put(cvp); 1501 if (myErr) { 1502 access[i] = myErr; 1503 continue; 1504 } 1505 } else { 1506 /* before calling CheckAccess(), check the target file for read access */ 1507 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, 1508 cnattr.ca_mode, hfsmp->hfs_mp, cred, p); 1509 1510 /* fail fast if no access */ 1511 if ((myPerms & flags) == 0) { 1512 access[i] = EACCES; 1513 continue; 1514 } 1515 } 1516 } else { 1517 /* we were passed an array of parent ids */ 1518 catkey.hfsPlus.parentID = cnid; 1519 } 1520 1521 /* if the last guy had the same parent and had access, we're done */ 1522 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) { 1523 cache.cachehits++; 1524 access[i] = 0; 1525 continue; 1526 } 1527 1528 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 1529 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents); 1530 1531 if (myaccess || (error == ESRCH && leaf_index != -1)) { 1532 access[i] = 0; // have access.. no errors to report 1533 } else { 1534 access[i] = (error != 0 ? (short) error : EACCES); 1535 } 1536 1537 prevParent_cnid = catkey.hfsPlus.parentID; 1538 } 1539 1540 /* copyout the access array */ 1541 if ((error = copyout((caddr_t)access, user_access_structp->access, 1542 num_files * sizeof (short)))) { 1543 goto err_exit_bulk_access; 1544 } 1545 if (map_size && bitmap) { 1546 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 1547 map_size * sizeof (char)))) { 1548 goto err_exit_bulk_access; 1549 } 1550 } 1551 1552 1553 err_exit_bulk_access: 1554 1555 if (file_ids) 1556 kfree(file_ids, sizeof(int) * num_files); 1557 if (parents) 1558 kfree(parents, sizeof(cnid_t) * num_parents); 1559 if (bitmap) 1560 kfree(bitmap, sizeof(char) * map_size); 1561 if (access) 1562 kfree(access, sizeof(short) * num_files); 1563 if (cache.acache) 1564 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1565 if (cache.haveaccess) 1566 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1567 1568 return (error); 1569} 1570 1571 1572/* end "bulk-access" support */ 1573 1574 1575/* 1576 * Control filesystem operating characteristics. 1577 */ 1578int 1579hfs_vnop_ioctl( struct vnop_ioctl_args /* { 1580 vnode_t a_vp; 1581 int a_command; 1582 caddr_t a_data; 1583 int a_fflag; 1584 vfs_context_t a_context; 1585 } */ *ap) 1586{ 1587 struct vnode * vp = ap->a_vp; 1588 struct hfsmount *hfsmp = VTOHFS(vp); 1589 vfs_context_t context = ap->a_context; 1590 kauth_cred_t cred = vfs_context_ucred(context); 1591 proc_t p = vfs_context_proc(context); 1592 struct vfsstatfs *vfsp; 1593 boolean_t is64bit; 1594 off_t jnl_start, jnl_size; 1595 struct hfs_journal_info *jip; 1596#if HFS_COMPRESSION 1597 int compressed = 0; 1598 off_t uncompressed_size = -1; 1599 int decmpfs_error = 0; 1600 1601 if (ap->a_command == F_RDADVISE) { 1602 /* we need to inspect the decmpfs state of the file as early as possible */ 1603 compressed = hfs_file_is_compressed(VTOC(vp), 0); 1604 if (compressed) { 1605 if (VNODE_IS_RSRC(vp)) { 1606 /* if this is the resource fork, treat it as if it were empty */ 1607 uncompressed_size = 0; 1608 } else { 1609 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0); 1610 if (decmpfs_error != 0) { 1611 /* failed to get the uncompressed size, we'll check for this later */ 1612 uncompressed_size = -1; 1613 } 1614 } 1615 } 1616 } 1617#endif /* HFS_COMPRESSION */ 1618 1619 is64bit = proc_is64bit(p); 1620 1621#if CONFIG_PROTECT 1622 { 1623 int error = 0; 1624 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 1625 return error; 1626 } 1627 } 1628#endif /* CONFIG_PROTECT */ 1629 1630 switch (ap->a_command) { 1631 1632 case HFS_GETPATH: 1633 { 1634 struct vnode *file_vp; 1635 cnid_t cnid; 1636 int outlen; 1637 char *bufptr; 1638 int error; 1639 int flags = 0; 1640 1641 /* Caller must be owner of file system. */ 1642 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1643 if (suser(cred, NULL) && 1644 kauth_cred_getuid(cred) != vfsp->f_owner) { 1645 return (EACCES); 1646 } 1647 /* Target vnode must be file system's root. */ 1648 if (!vnode_isvroot(vp)) { 1649 return (EINVAL); 1650 } 1651 bufptr = (char *)ap->a_data; 1652 cnid = strtoul(bufptr, NULL, 10); 1653 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) { 1654 flags |= BUILDPATH_VOLUME_RELATIVE; 1655 } 1656 1657 /* We need to call hfs_vfs_vget to leverage the code that will 1658 * fix the origin list for us if needed, as opposed to calling 1659 * hfs_vget, since we will need the parent for build_path call. 1660 */ 1661 1662 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { 1663 return (error); 1664 } 1665 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context); 1666 vnode_put(file_vp); 1667 1668 return (error); 1669 } 1670 1671 case HFS_TRANSFER_DOCUMENT_ID: 1672 { 1673 struct cnode *cp = NULL; 1674 int error; 1675 u_int32_t to_fd = *(u_int32_t *)ap->a_data; 1676 struct fileproc *to_fp; 1677 struct vnode *to_vp; 1678 struct cnode *to_cp; 1679 1680 cp = VTOC(vp); 1681 1682 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) { 1683 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error); 1684 return error; 1685 } 1686 if ( (error = vnode_getwithref(to_vp)) ) { 1687 file_drop(to_fd); 1688 return error; 1689 } 1690 1691 if (VTOHFS(to_vp) != hfsmp) { 1692 error = EXDEV; 1693 goto transfer_cleanup; 1694 } 1695 1696 int need_unlock = 1; 1697 to_cp = VTOC(to_vp); 1698 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); 1699 if (error != 0) { 1700 //printf("could not lock the pair of cnodes (error %d)\n", error); 1701 goto transfer_cleanup; 1702 } 1703 1704 if (!(cp->c_bsdflags & UF_TRACKED)) { 1705 error = EINVAL; 1706 } else if (to_cp->c_bsdflags & UF_TRACKED) { 1707 // 1708 // if the destination is already tracked, return an error 1709 // as otherwise it's a silent deletion of the target's 1710 // document-id 1711 // 1712 error = EEXIST; 1713 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { 1714 // 1715 // we can use the FndrExtendedFileInfo because the doc-id is the first 1716 // thing in both it and the ExtendedDirInfo struct which is fixed in 1717 // format and can not change layout 1718 // 1719 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); 1720 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16); 1721 1722 if (f_extinfo->document_id == 0) { 1723 uint32_t new_id; 1724 1725 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id 1726 1727 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) { 1728 // 1729 // re-lock the pair now that we have the document-id 1730 // 1731 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); 1732 f_extinfo->document_id = new_id; 1733 } else { 1734 goto transfer_cleanup; 1735 } 1736 } 1737 1738 to_extinfo->document_id = f_extinfo->document_id; 1739 f_extinfo->document_id = 0; 1740 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid); 1741 1742 // make sure the destination is also UF_TRACKED 1743 to_cp->c_bsdflags |= UF_TRACKED; 1744 cp->c_bsdflags &= ~UF_TRACKED; 1745 1746 // mark the cnodes dirty 1747 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; 1748 to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; 1749 1750 int lockflags; 1751 if ((error = hfs_start_transaction(hfsmp)) == 0) { 1752 1753 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); 1754 1755 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); 1756 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL); 1757 1758 hfs_systemfile_unlock (hfsmp, lockflags); 1759 (void) hfs_end_transaction(hfsmp); 1760 } 1761 1762#if CONFIG_FSE 1763 add_fsevent(FSE_DOCID_CHANGED, context, 1764 FSE_ARG_DEV, hfsmp->hfs_raw_dev, 1765 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode # 1766 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode # 1767 FSE_ARG_INT32, to_extinfo->document_id, 1768 FSE_ARG_DONE); 1769 1770 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents 1771 need_unlock = 0; 1772 1773 if (need_fsevent(FSE_STAT_CHANGED, vp)) { 1774 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE); 1775 } 1776 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) { 1777 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE); 1778 } 1779#else 1780 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents 1781 need_unlock = 0; 1782#endif 1783 } 1784 1785 if (need_unlock) { 1786 hfs_unlockpair(cp, to_cp); 1787 } 1788 1789 transfer_cleanup: 1790 vnode_put(to_vp); 1791 file_drop(to_fd); 1792 1793 return error; 1794 } 1795 1796 1797 1798 case HFS_PREV_LINK: 1799 case HFS_NEXT_LINK: 1800 { 1801 cnid_t linkfileid; 1802 cnid_t nextlinkid; 1803 cnid_t prevlinkid; 1804 int error; 1805 1806 /* Caller must be owner of file system. */ 1807 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1808 if (suser(cred, NULL) && 1809 kauth_cred_getuid(cred) != vfsp->f_owner) { 1810 return (EACCES); 1811 } 1812 /* Target vnode must be file system's root. */ 1813 if (!vnode_isvroot(vp)) { 1814 return (EINVAL); 1815 } 1816 linkfileid = *(cnid_t *)ap->a_data; 1817 if (linkfileid < kHFSFirstUserCatalogNodeID) { 1818 return (EINVAL); 1819 } 1820 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) { 1821 return (error); 1822 } 1823 if (ap->a_command == HFS_NEXT_LINK) { 1824 *(cnid_t *)ap->a_data = nextlinkid; 1825 } else { 1826 *(cnid_t *)ap->a_data = prevlinkid; 1827 } 1828 return (0); 1829 } 1830 1831 case HFS_RESIZE_PROGRESS: { 1832 1833 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1834 if (suser(cred, NULL) && 1835 kauth_cred_getuid(cred) != vfsp->f_owner) { 1836 return (EACCES); /* must be owner of file system */ 1837 } 1838 if (!vnode_isvroot(vp)) { 1839 return (EINVAL); 1840 } 1841 /* file system must not be mounted read-only */ 1842 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1843 return (EROFS); 1844 } 1845 1846 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); 1847 } 1848 1849 case HFS_RESIZE_VOLUME: { 1850 u_int64_t newsize; 1851 u_int64_t cursize; 1852 1853 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1854 if (suser(cred, NULL) && 1855 kauth_cred_getuid(cred) != vfsp->f_owner) { 1856 return (EACCES); /* must be owner of file system */ 1857 } 1858 if (!vnode_isvroot(vp)) { 1859 return (EINVAL); 1860 } 1861 1862 /* filesystem must not be mounted read only */ 1863 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1864 return (EROFS); 1865 } 1866 newsize = *(u_int64_t *)ap->a_data; 1867 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; 1868 1869 if (newsize > cursize) { 1870 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); 1871 } else if (newsize < cursize) { 1872 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); 1873 } else { 1874 return (0); 1875 } 1876 } 1877 case HFS_CHANGE_NEXT_ALLOCATION: { 1878 int error = 0; /* Assume success */ 1879 u_int32_t location; 1880 1881 if (vnode_vfsisrdonly(vp)) { 1882 return (EROFS); 1883 } 1884 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1885 if (suser(cred, NULL) && 1886 kauth_cred_getuid(cred) != vfsp->f_owner) { 1887 return (EACCES); /* must be owner of file system */ 1888 } 1889 if (!vnode_isvroot(vp)) { 1890 return (EINVAL); 1891 } 1892 hfs_lock_mount(hfsmp); 1893 location = *(u_int32_t *)ap->a_data; 1894 if ((location >= hfsmp->allocLimit) && 1895 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) { 1896 error = EINVAL; 1897 goto fail_change_next_allocation; 1898 } 1899 /* Return previous value. */ 1900 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation; 1901 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) { 1902 /* On magic value for location, set nextAllocation to next block 1903 * after metadata zone and set flag in mount structure to indicate 1904 * that nextAllocation should not be updated again. 1905 */ 1906 if (hfsmp->hfs_metazone_end != 0) { 1907 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); 1908 } 1909 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 1910 } else { 1911 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 1912 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location); 1913 } 1914 MarkVCBDirty(hfsmp); 1915fail_change_next_allocation: 1916 hfs_unlock_mount(hfsmp); 1917 return (error); 1918 } 1919 1920#if HFS_SPARSE_DEV 1921 case HFS_SETBACKINGSTOREINFO: { 1922 struct vnode * bsfs_rootvp; 1923 struct vnode * di_vp; 1924 struct hfs_backingstoreinfo *bsdata; 1925 int error = 0; 1926 1927 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1928 return (EROFS); 1929 } 1930 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 1931 return (EALREADY); 1932 } 1933 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1934 if (suser(cred, NULL) && 1935 kauth_cred_getuid(cred) != vfsp->f_owner) { 1936 return (EACCES); /* must be owner of file system */ 1937 } 1938 bsdata = (struct hfs_backingstoreinfo *)ap->a_data; 1939 if (bsdata == NULL) { 1940 return (EINVAL); 1941 } 1942 if ((error = file_vnode(bsdata->backingfd, &di_vp))) { 1943 return (error); 1944 } 1945 if ((error = vnode_getwithref(di_vp))) { 1946 file_drop(bsdata->backingfd); 1947 return(error); 1948 } 1949 1950 if (vnode_mount(vp) == vnode_mount(di_vp)) { 1951 (void)vnode_put(di_vp); 1952 file_drop(bsdata->backingfd); 1953 return (EINVAL); 1954 } 1955 1956 /* 1957 * Obtain the backing fs root vnode and keep a reference 1958 * on it. This reference will be dropped in hfs_unmount. 1959 */ 1960 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */ 1961 if (error) { 1962 (void)vnode_put(di_vp); 1963 file_drop(bsdata->backingfd); 1964 return (error); 1965 } 1966 vnode_ref(bsfs_rootvp); 1967 vnode_put(bsfs_rootvp); 1968 1969 hfs_lock_mount(hfsmp); 1970 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; 1971 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; 1972 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4; 1973 hfs_unlock_mount(hfsmp); 1974 1975 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */ 1976 1977 /* 1978 * If the sparse image is on a sparse image file (as opposed to a sparse 1979 * bundle), then we may need to limit the free space to the maximum size 1980 * of a file on that volume. So we query (using pathconf), and if we get 1981 * a meaningful result, we cache the number of blocks for later use in 1982 * hfs_freeblks(). 1983 */ 1984 hfsmp->hfs_backingfs_maxblocks = 0; 1985 if (vnode_vtype(di_vp) == VREG) { 1986 int terr; 1987 int hostbits; 1988 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context); 1989 if (terr == 0 && hostbits != 0 && hostbits < 64) { 1990 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits; 1991 1992 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize; 1993 } 1994 } 1995 1996 /* The free extent cache is managed differently for sparse devices. 1997 * There is a window between which the volume is mounted and the 1998 * device is marked as sparse, so the free extent cache for this 1999 * volume is currently initialized as normal volume (sorted by block 2000 * count). Reset the cache so that it will be rebuilt again 2001 * for sparse device (sorted by start block). 2002 */ 2003 ResetVCBFreeExtCache(hfsmp); 2004 2005 (void)vnode_put(di_vp); 2006 file_drop(bsdata->backingfd); 2007 return (0); 2008 } 2009 case HFS_CLRBACKINGSTOREINFO: { 2010 struct vnode * tmpvp; 2011 2012 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2013 if (suser(cred, NULL) && 2014 kauth_cred_getuid(cred) != vfsp->f_owner) { 2015 return (EACCES); /* must be owner of file system */ 2016 } 2017 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2018 return (EROFS); 2019 } 2020 2021 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 2022 hfsmp->hfs_backingfs_rootvp) { 2023 2024 hfs_lock_mount(hfsmp); 2025 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; 2026 tmpvp = hfsmp->hfs_backingfs_rootvp; 2027 hfsmp->hfs_backingfs_rootvp = NULLVP; 2028 hfsmp->hfs_sparsebandblks = 0; 2029 hfs_unlock_mount(hfsmp); 2030 2031 vnode_rele(tmpvp); 2032 } 2033 return (0); 2034 } 2035#endif /* HFS_SPARSE_DEV */ 2036 2037 /* Change the next CNID stored in the VH */ 2038 case HFS_CHANGE_NEXTCNID: { 2039 int error = 0; /* Assume success */ 2040 u_int32_t fileid; 2041 int wraparound = 0; 2042 int lockflags = 0; 2043 2044 if (vnode_vfsisrdonly(vp)) { 2045 return (EROFS); 2046 } 2047 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2048 if (suser(cred, NULL) && 2049 kauth_cred_getuid(cred) != vfsp->f_owner) { 2050 return (EACCES); /* must be owner of file system */ 2051 } 2052 2053 fileid = *(u_int32_t *)ap->a_data; 2054 2055 /* Must have catalog lock excl. to advance the CNID pointer */ 2056 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK); 2057 2058 hfs_lock_mount(hfsmp); 2059 2060 /* If it is less than the current next CNID, force the wraparound bit to be set */ 2061 if (fileid < hfsmp->vcbNxtCNID) { 2062 wraparound=1; 2063 } 2064 2065 /* Return previous value. */ 2066 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID; 2067 2068 hfsmp->vcbNxtCNID = fileid; 2069 2070 if (wraparound) { 2071 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; 2072 } 2073 2074 MarkVCBDirty(hfsmp); 2075 hfs_unlock_mount(hfsmp); 2076 hfs_systemfile_unlock (hfsmp, lockflags); 2077 2078 return (error); 2079 } 2080 2081 case F_FREEZE_FS: { 2082 struct mount *mp; 2083 2084 mp = vnode_mount(vp); 2085 hfsmp = VFSTOHFS(mp); 2086 2087 if (!(hfsmp->jnl)) 2088 return (ENOTSUP); 2089 2090 vfsp = vfs_statfs(mp); 2091 2092 if (kauth_cred_getuid(cred) != vfsp->f_owner && 2093 !kauth_cred_issuser(cred)) 2094 return (EACCES); 2095 2096 return hfs_freeze(hfsmp); 2097 } 2098 2099 case F_THAW_FS: { 2100 vfsp = vfs_statfs(vnode_mount(vp)); 2101 if (kauth_cred_getuid(cred) != vfsp->f_owner && 2102 !kauth_cred_issuser(cred)) 2103 return (EACCES); 2104 2105 return hfs_thaw(hfsmp, current_proc()); 2106 } 2107 2108 case HFS_BULKACCESS_FSCTL: { 2109 int size; 2110 2111 if (hfsmp->hfs_flags & HFS_STANDARD) { 2112 return EINVAL; 2113 } 2114 2115 if (is64bit) { 2116 size = sizeof(struct user64_access_t); 2117 } else { 2118 size = sizeof(struct user32_access_t); 2119 } 2120 2121 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2122 } 2123 2124 case HFS_EXT_BULKACCESS_FSCTL: { 2125 int size; 2126 2127 if (hfsmp->hfs_flags & HFS_STANDARD) { 2128 return EINVAL; 2129 } 2130 2131 if (is64bit) { 2132 size = sizeof(struct user64_ext_access_t); 2133 } else { 2134 size = sizeof(struct user32_ext_access_t); 2135 } 2136 2137 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2138 } 2139 2140 case HFS_SET_XATTREXTENTS_STATE: { 2141 int state; 2142 2143 if (ap->a_data == NULL) { 2144 return (EINVAL); 2145 } 2146 2147 state = *(int *)ap->a_data; 2148 2149 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2150 return (EROFS); 2151 } 2152 2153 /* Super-user can enable or disable extent-based extended 2154 * attribute support on a volume 2155 * Note: Starting Mac OS X 10.7, extent-based extended attributes 2156 * are enabled by default, so any change will be transient only 2157 * till the volume is remounted. 2158 */ 2159 if (!kauth_cred_issuser(kauth_cred_get())) { 2160 return (EPERM); 2161 } 2162 if (state == 0 || state == 1) 2163 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state); 2164 else 2165 return (EINVAL); 2166 } 2167 2168 case F_SETSTATICCONTENT: { 2169 int error; 2170 int enable_static = 0; 2171 struct cnode *cp = NULL; 2172 /* 2173 * lock the cnode, decorate the cnode flag, and bail out. 2174 * VFS should have already authenticated the caller for us. 2175 */ 2176 2177 if (ap->a_data) { 2178 /* 2179 * Note that even though ap->a_data is of type caddr_t, 2180 * the fcntl layer at the syscall handler will pass in NULL 2181 * or 1 depending on what the argument supplied to the fcntl 2182 * was. So it is in fact correct to check the ap->a_data 2183 * argument for zero or non-zero value when deciding whether or not 2184 * to enable the static bit in the cnode. 2185 */ 2186 enable_static = 1; 2187 } 2188 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2189 return EROFS; 2190 } 2191 cp = VTOC(vp); 2192 2193 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2194 if (error == 0) { 2195 if (enable_static) { 2196 cp->c_flag |= C_SSD_STATIC; 2197 } 2198 else { 2199 cp->c_flag &= ~C_SSD_STATIC; 2200 } 2201 hfs_unlock (cp); 2202 } 2203 return error; 2204 } 2205 2206 case F_SET_GREEDY_MODE: { 2207 int error; 2208 int enable_greedy_mode = 0; 2209 struct cnode *cp = NULL; 2210 /* 2211 * lock the cnode, decorate the cnode flag, and bail out. 2212 * VFS should have already authenticated the caller for us. 2213 */ 2214 2215 if (ap->a_data) { 2216 /* 2217 * Note that even though ap->a_data is of type caddr_t, 2218 * the fcntl layer at the syscall handler will pass in NULL 2219 * or 1 depending on what the argument supplied to the fcntl 2220 * was. So it is in fact correct to check the ap->a_data 2221 * argument for zero or non-zero value when deciding whether or not 2222 * to enable the greedy mode bit in the cnode. 2223 */ 2224 enable_greedy_mode = 1; 2225 } 2226 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2227 return EROFS; 2228 } 2229 cp = VTOC(vp); 2230 2231 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2232 if (error == 0) { 2233 if (enable_greedy_mode) { 2234 cp->c_flag |= C_SSD_GREEDY_MODE; 2235 } 2236 else { 2237 cp->c_flag &= ~C_SSD_GREEDY_MODE; 2238 } 2239 hfs_unlock (cp); 2240 } 2241 return error; 2242 } 2243 2244 case F_SETIOTYPE: { 2245 int error; 2246 uint32_t iotypeflag = 0; 2247 2248 struct cnode *cp = NULL; 2249 /* 2250 * lock the cnode, decorate the cnode flag, and bail out. 2251 * VFS should have already authenticated the caller for us. 2252 */ 2253 2254 if (ap->a_data == NULL) { 2255 return EINVAL; 2256 } 2257 2258 /* 2259 * Note that even though ap->a_data is of type caddr_t, we 2260 * can only use 32 bits of flag values. 2261 */ 2262 iotypeflag = (uint32_t) ap->a_data; 2263 switch (iotypeflag) { 2264 case F_IOTYPE_ISOCHRONOUS: 2265 break; 2266 default: 2267 return EINVAL; 2268 } 2269 2270 2271 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2272 return EROFS; 2273 } 2274 cp = VTOC(vp); 2275 2276 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2277 if (error == 0) { 2278 switch (iotypeflag) { 2279 case F_IOTYPE_ISOCHRONOUS: 2280 cp->c_flag |= C_IO_ISOCHRONOUS; 2281 break; 2282 default: 2283 break; 2284 } 2285 hfs_unlock (cp); 2286 } 2287 return error; 2288 } 2289 2290 case F_MAKECOMPRESSED: { 2291 int error = 0; 2292 uint32_t gen_counter; 2293 struct cnode *cp = NULL; 2294 int reset_decmp = 0; 2295 2296 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2297 return EROFS; 2298 } 2299 2300 /* 2301 * acquire & lock the cnode. 2302 * VFS should have already authenticated the caller for us. 2303 */ 2304 2305 if (ap->a_data) { 2306 /* 2307 * Cast the pointer into a uint32_t so we can extract the 2308 * supplied generation counter. 2309 */ 2310 gen_counter = *((uint32_t*)ap->a_data); 2311 } 2312 else { 2313 return EINVAL; 2314 } 2315 2316#if HFS_COMPRESSION 2317 cp = VTOC(vp); 2318 /* Grab truncate lock first; we may truncate the file */ 2319 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2320 2321 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2322 if (error) { 2323 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 2324 return error; 2325 } 2326 2327 /* Are there any other usecounts/FDs? */ 2328 if (vnode_isinuse(vp, 1)) { 2329 hfs_unlock(cp); 2330 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 2331 return EBUSY; 2332 } 2333 2334 /* now we have the cnode locked down; Validate arguments */ 2335 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) { 2336 /* EINVAL if you are trying to manipulate an IMMUTABLE file */ 2337 hfs_unlock(cp); 2338 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); 2339 return EINVAL; 2340 } 2341 2342 if ((hfs_get_gencount (cp)) == gen_counter) { 2343 /* 2344 * OK, the gen_counter matched. Go for it: 2345 * Toggle state bits, truncate file, and suppress mtime update 2346 */ 2347 reset_decmp = 1; 2348 cp->c_bsdflags |= UF_COMPRESSED; 2349 2350 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, 2351 ap->a_context); 2352 } 2353 else { 2354 error = ESTALE; 2355 } 2356 2357 /* Unlock cnode before executing decmpfs ; they may need to get an EA */ 2358 hfs_unlock(cp); 2359 2360 /* 2361 * Reset the decmp state while still holding the truncate lock. We need to 2362 * serialize here against a listxattr on this node which may occur at any 2363 * time. 2364 * 2365 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed, 2366 * that will still potentially require getting the com.apple.decmpfs EA. If the 2367 * EA is required, then we can't hold the cnode lock, because the getxattr call is 2368 * generic(through VFS), and can't pass along any info telling it that we're already 2369 * holding it (the lock). If we don't serialize, then we risk listxattr stopping 2370 * and trying to fill in the hfs_file_is_compressed info during the callback 2371 * operation, which will result in deadlock against the b-tree node. 2372 * 2373 * So, to serialize against listxattr (which will grab buf_t meta references on 2374 * the b-tree blocks), we hold the truncate lock as we're manipulating the 2375 * decmpfs payload. 2376 */ 2377 if ((reset_decmp) && (error == 0)) { 2378 decmpfs_cnode *dp = VTOCMP (vp); 2379 if (dp != NULL) { 2380 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); 2381 } 2382 2383 /* Initialize the decmpfs node as needed */ 2384 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */ 2385 } 2386 2387 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); 2388 2389#endif 2390 return error; 2391 } 2392 2393 case F_SETBACKINGSTORE: { 2394 2395 int error = 0; 2396 2397 /* 2398 * See comment in F_SETSTATICCONTENT re: using 2399 * a null check for a_data 2400 */ 2401 if (ap->a_data) { 2402 error = hfs_set_backingstore (vp, 1); 2403 } 2404 else { 2405 error = hfs_set_backingstore (vp, 0); 2406 } 2407 2408 return error; 2409 } 2410 2411 case F_GETPATH_MTMINFO: { 2412 int error = 0; 2413 2414 int *data = (int*) ap->a_data; 2415 2416 /* Ask if this is a backingstore vnode */ 2417 error = hfs_is_backingstore (vp, data); 2418 2419 return error; 2420 } 2421 2422 case F_FULLFSYNC: { 2423 int error; 2424 2425 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2426 return (EROFS); 2427 } 2428 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2429 if (error == 0) { 2430 error = hfs_fsync(vp, MNT_WAIT, TRUE, p); 2431 hfs_unlock(VTOC(vp)); 2432 } 2433 2434 return error; 2435 } 2436 2437 case F_CHKCLEAN: { 2438 register struct cnode *cp; 2439 int error; 2440 2441 if (!vnode_isreg(vp)) 2442 return EINVAL; 2443 2444 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2445 if (error == 0) { 2446 cp = VTOC(vp); 2447 /* 2448 * used by regression test to determine if 2449 * all the dirty pages (via write) have been cleaned 2450 * after a call to 'fsysnc'. 2451 */ 2452 error = is_file_clean(vp, VTOF(vp)->ff_size); 2453 hfs_unlock(cp); 2454 } 2455 return (error); 2456 } 2457 2458 case F_RDADVISE: { 2459 register struct radvisory *ra; 2460 struct filefork *fp; 2461 int error; 2462 2463 if (!vnode_isreg(vp)) 2464 return EINVAL; 2465 2466 ra = (struct radvisory *)(ap->a_data); 2467 fp = VTOF(vp); 2468 2469 /* Protect against a size change. */ 2470 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2471 2472#if HFS_COMPRESSION 2473 if (compressed && (uncompressed_size == -1)) { 2474 /* fetching the uncompressed size failed above, so return the error */ 2475 error = decmpfs_error; 2476 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) || 2477 (!compressed && (ra->ra_offset >= fp->ff_size))) { 2478 error = EFBIG; 2479 } 2480#else /* HFS_COMPRESSION */ 2481 if (ra->ra_offset >= fp->ff_size) { 2482 error = EFBIG; 2483 } 2484#endif /* HFS_COMPRESSION */ 2485 else { 2486 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); 2487 } 2488 2489 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); 2490 return (error); 2491 } 2492 2493 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ 2494 { 2495 if (is64bit) { 2496 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2497 } 2498 else { 2499 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2500 } 2501 return 0; 2502 } 2503 2504 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME: 2505 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time; 2506 break; 2507 2508 case SPOTLIGHT_FSCTL_GET_LAST_MTIME: 2509 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime; 2510 break; 2511 2512 case HFS_FSCTL_GET_VERY_LOW_DISK: 2513 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit; 2514 break; 2515 2516 case HFS_FSCTL_SET_VERY_LOW_DISK: 2517 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) { 2518 return EINVAL; 2519 } 2520 2521 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data; 2522 break; 2523 2524 case HFS_FSCTL_GET_LOW_DISK: 2525 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit; 2526 break; 2527 2528 case HFS_FSCTL_SET_LOW_DISK: 2529 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel 2530 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) { 2531 2532 return EINVAL; 2533 } 2534 2535 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data; 2536 break; 2537 2538 case HFS_FSCTL_GET_DESIRED_DISK: 2539 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel; 2540 break; 2541 2542 case HFS_FSCTL_SET_DESIRED_DISK: 2543 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) { 2544 return EINVAL; 2545 } 2546 2547 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data; 2548 break; 2549 2550 case HFS_VOLUME_STATUS: 2551 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions; 2552 break; 2553 2554 case HFS_SET_BOOT_INFO: 2555 if (!vnode_isvroot(vp)) 2556 return(EINVAL); 2557 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) 2558 return(EACCES); /* must be superuser or owner of filesystem */ 2559 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2560 return (EROFS); 2561 } 2562 hfs_lock_mount (hfsmp); 2563 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); 2564 hfs_unlock_mount (hfsmp); 2565 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 2566 break; 2567 2568 case HFS_GET_BOOT_INFO: 2569 if (!vnode_isvroot(vp)) 2570 return(EINVAL); 2571 hfs_lock_mount (hfsmp); 2572 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo)); 2573 hfs_unlock_mount(hfsmp); 2574 break; 2575 2576 case HFS_MARK_BOOT_CORRUPT: 2577 /* Mark the boot volume corrupt by setting 2578 * kHFSVolumeInconsistentBit in the volume header. This will 2579 * force fsck_hfs on next mount. 2580 */ 2581 if (!kauth_cred_issuser(kauth_cred_get())) { 2582 return EACCES; 2583 } 2584 2585 /* Allowed only on the root vnode of the boot volume */ 2586 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 2587 !vnode_isvroot(vp)) { 2588 return EINVAL; 2589 } 2590 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2591 return (EROFS); 2592 } 2593 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); 2594 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED); 2595 break; 2596 2597 case HFS_FSCTL_GET_JOURNAL_INFO: 2598 jip = (struct hfs_journal_info*)ap->a_data; 2599 2600 if (vp == NULLVP) 2601 return EINVAL; 2602 2603 if (hfsmp->jnl == NULL) { 2604 jnl_start = 0; 2605 jnl_size = 0; 2606 } else { 2607 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; 2608 jnl_size = (off_t)hfsmp->jnl_size; 2609 } 2610 2611 jip->jstart = jnl_start; 2612 jip->jsize = jnl_size; 2613 break; 2614 2615 case HFS_SET_ALWAYS_ZEROFILL: { 2616 struct cnode *cp = VTOC(vp); 2617 2618 if (*(int *)ap->a_data) { 2619 cp->c_flag |= C_ALWAYS_ZEROFILL; 2620 } else { 2621 cp->c_flag &= ~C_ALWAYS_ZEROFILL; 2622 } 2623 break; 2624 } 2625 2626 case HFS_DISABLE_METAZONE: { 2627 /* Only root can disable metadata zone */ 2628 if (!kauth_cred_issuser(kauth_cred_get())) { 2629 return EACCES; 2630 } 2631 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2632 return (EROFS); 2633 } 2634 2635 /* Disable metadata zone now */ 2636 (void) hfs_metadatazone_init(hfsmp, true); 2637 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN); 2638 break; 2639 } 2640 2641 2642 case HFS_FSINFO_METADATA_BLOCKS: { 2643 int error; 2644 struct hfsinfo_metadata *hinfo; 2645 2646 hinfo = (struct hfsinfo_metadata *)ap->a_data; 2647 2648 /* Get information about number of metadata blocks */ 2649 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo); 2650 if (error) { 2651 return error; 2652 } 2653 2654 break; 2655 } 2656 2657 case HFS_CS_FREESPACE_TRIM: { 2658 int error = 0; 2659 int lockflags = 0; 2660 2661 /* Only root allowed */ 2662 if (!kauth_cred_issuser(kauth_cred_get())) { 2663 return EACCES; 2664 } 2665 2666 /* 2667 * This core functionality is similar to hfs_scan_blocks(). 2668 * The main difference is that hfs_scan_blocks() is called 2669 * as part of mount where we are assured that the journal is 2670 * empty to start with. This fcntl() can be called on a 2671 * mounted volume, therefore it has to flush the content of 2672 * the journal as well as ensure the state of summary table. 2673 * 2674 * This fcntl scans over the entire allocation bitmap, 2675 * creates list of all the free blocks, and issues TRIM 2676 * down to the underlying device. This can take long time 2677 * as it can generate up to 512MB of read I/O. 2678 */ 2679 2680 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) { 2681 error = hfs_init_summary(hfsmp); 2682 if (error) { 2683 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN); 2684 return error; 2685 } 2686 } 2687 2688 /* 2689 * The journal maintains list of recently deallocated blocks to 2690 * issue DKIOCUNMAPs when the corresponding journal transaction is 2691 * flushed to the disk. To avoid any race conditions, we only 2692 * want one active trim list and only one thread issuing DKIOCUNMAPs. 2693 * Therefore we make sure that the journal trim list is sync'ed, 2694 * empty, and not modifiable for the duration of our scan. 2695 * 2696 * Take the journal lock before flushing the journal to the disk. 2697 * We will keep on holding the journal lock till we don't get the 2698 * bitmap lock to make sure that no new journal transactions can 2699 * start. This will make sure that the journal trim list is not 2700 * modified after the journal flush and before getting bitmap lock. 2701 * We can release the journal lock after we acquire the bitmap 2702 * lock as it will prevent any further block deallocations. 2703 */ 2704 hfs_journal_lock(hfsmp); 2705 2706 /* Flush the journal and wait for all I/Os to finish up */ 2707 error = hfs_journal_flush(hfsmp, TRUE); 2708 if (error) { 2709 hfs_journal_unlock(hfsmp); 2710 return error; 2711 } 2712 2713 /* Take bitmap lock to ensure it is not being modified */ 2714 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); 2715 2716 /* Release the journal lock */ 2717 hfs_journal_unlock(hfsmp); 2718 2719 /* 2720 * ScanUnmapBlocks reads the bitmap in large block size 2721 * (up to 1MB) unlike the runtime which reads the bitmap 2722 * in the 4K block size. This can cause buf_t collisions 2723 * and potential data corruption. To avoid this, we 2724 * invalidate all the existing buffers associated with 2725 * the bitmap vnode before scanning it. 2726 * 2727 * Note: ScanUnmapBlock() cleans up all the buffers 2728 * after itself, so there won't be any large buffers left 2729 * for us to clean up after it returns. 2730 */ 2731 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); 2732 if (error) { 2733 hfs_systemfile_unlock(hfsmp, lockflags); 2734 return error; 2735 } 2736 2737 /* Traverse bitmap and issue DKIOCUNMAPs */ 2738 error = ScanUnmapBlocks(hfsmp); 2739 hfs_systemfile_unlock(hfsmp, lockflags); 2740 if (error) { 2741 return error; 2742 } 2743 2744 break; 2745 } 2746 2747 default: 2748 return (ENOTTY); 2749 } 2750 2751 return 0; 2752} 2753 2754/* 2755 * select 2756 */ 2757int 2758hfs_vnop_select(__unused struct vnop_select_args *ap) 2759/* 2760 struct vnop_select_args { 2761 vnode_t a_vp; 2762 int a_which; 2763 int a_fflags; 2764 void *a_wql; 2765 vfs_context_t a_context; 2766 }; 2767*/ 2768{ 2769 /* 2770 * We should really check to see if I/O is possible. 2771 */ 2772 return (1); 2773} 2774 2775/* 2776 * Converts a logical block number to a physical block, and optionally returns 2777 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize. 2778 * The physical block number is based on the device block size, currently its 512. 2779 * The block run is returned in logical blocks, and is the REMAINING amount of blocks 2780 */ 2781int 2782hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp) 2783{ 2784 struct filefork *fp = VTOF(vp); 2785 struct hfsmount *hfsmp = VTOHFS(vp); 2786 int retval = E_NONE; 2787 u_int32_t logBlockSize; 2788 size_t bytesContAvail = 0; 2789 off_t blockposition; 2790 int lockExtBtree; 2791 int lockflags = 0; 2792 2793 /* 2794 * Check for underlying vnode requests and ensure that logical 2795 * to physical mapping is requested. 2796 */ 2797 if (vpp != NULL) 2798 *vpp = hfsmp->hfs_devvp; 2799 if (bnp == NULL) 2800 return (0); 2801 2802 logBlockSize = GetLogicalBlockSize(vp); 2803 blockposition = (off_t)bn * logBlockSize; 2804 2805 lockExtBtree = overflow_extents(fp); 2806 2807 if (lockExtBtree) 2808 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); 2809 2810 retval = MacToVFSError( 2811 MapFileBlockC (HFSTOVCB(hfsmp), 2812 (FCB*)fp, 2813 MAXPHYSIO, 2814 blockposition, 2815 bnp, 2816 &bytesContAvail)); 2817 2818 if (lockExtBtree) 2819 hfs_systemfile_unlock(hfsmp, lockflags); 2820 2821 if (retval == E_NONE) { 2822 /* Figure out how many read ahead blocks there are */ 2823 if (runp != NULL) { 2824 if (can_cluster(logBlockSize)) { 2825 /* Make sure this result never goes negative: */ 2826 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1; 2827 } else { 2828 *runp = 0; 2829 } 2830 } 2831 } 2832 return (retval); 2833} 2834 2835/* 2836 * Convert logical block number to file offset. 2837 */ 2838int 2839hfs_vnop_blktooff(struct vnop_blktooff_args *ap) 2840/* 2841 struct vnop_blktooff_args { 2842 vnode_t a_vp; 2843 daddr64_t a_lblkno; 2844 off_t *a_offset; 2845 }; 2846*/ 2847{ 2848 if (ap->a_vp == NULL) 2849 return (EINVAL); 2850 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp); 2851 2852 return(0); 2853} 2854 2855/* 2856 * Convert file offset to logical block number. 2857 */ 2858int 2859hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap) 2860/* 2861 struct vnop_offtoblk_args { 2862 vnode_t a_vp; 2863 off_t a_offset; 2864 daddr64_t *a_lblkno; 2865 }; 2866*/ 2867{ 2868 if (ap->a_vp == NULL) 2869 return (EINVAL); 2870 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp)); 2871 2872 return(0); 2873} 2874 2875/* 2876 * Map file offset to physical block number. 2877 * 2878 * If this function is called for write operation, and if the file 2879 * had virtual blocks allocated (delayed allocation), real blocks 2880 * are allocated by calling ExtendFileC(). 2881 * 2882 * If this function is called for read operation, and if the file 2883 * had virtual blocks allocated (delayed allocation), no change 2884 * to the size of file is done, and if required, rangelist is 2885 * searched for mapping. 2886 * 2887 * System file cnodes are expected to be locked (shared or exclusive). 2888 */ 2889int 2890hfs_vnop_blockmap(struct vnop_blockmap_args *ap) 2891/* 2892 struct vnop_blockmap_args { 2893 vnode_t a_vp; 2894 off_t a_foffset; 2895 size_t a_size; 2896 daddr64_t *a_bpn; 2897 size_t *a_run; 2898 void *a_poff; 2899 int a_flags; 2900 vfs_context_t a_context; 2901 }; 2902*/ 2903{ 2904 struct vnode *vp = ap->a_vp; 2905 struct cnode *cp; 2906 struct filefork *fp; 2907 struct hfsmount *hfsmp; 2908 size_t bytesContAvail = 0; 2909 int retval = E_NONE; 2910 int syslocks = 0; 2911 int lockflags = 0; 2912 struct rl_entry *invalid_range; 2913 enum rl_overlaptype overlaptype; 2914 int started_tr = 0; 2915 int tooklock = 0; 2916 2917#if HFS_COMPRESSION 2918 if (VNODE_IS_RSRC(vp)) { 2919 /* allow blockmaps to the resource fork */ 2920 } else { 2921 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 2922 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 2923 switch(state) { 2924 case FILE_IS_COMPRESSED: 2925 return ENOTSUP; 2926 case FILE_IS_CONVERTING: 2927 /* if FILE_IS_CONVERTING, we allow blockmap */ 2928 break; 2929 default: 2930 printf("invalid state %d for compressed file\n", state); 2931 /* fall through */ 2932 } 2933 } 2934 } 2935#endif /* HFS_COMPRESSION */ 2936 2937 /* Do not allow blockmap operation on a directory */ 2938 if (vnode_isdir(vp)) { 2939 return (ENOTSUP); 2940 } 2941 2942 /* 2943 * Check for underlying vnode requests and ensure that logical 2944 * to physical mapping is requested. 2945 */ 2946 if (ap->a_bpn == NULL) 2947 return (0); 2948 2949 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) { 2950 if (VTOC(vp)->c_lockowner != current_thread()) { 2951 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 2952 tooklock = 1; 2953 } 2954 } 2955 hfsmp = VTOHFS(vp); 2956 cp = VTOC(vp); 2957 fp = VTOF(vp); 2958 2959retry: 2960 /* Check virtual blocks only when performing write operation */ 2961 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 2962 if (hfs_start_transaction(hfsmp) != 0) { 2963 retval = EINVAL; 2964 goto exit; 2965 } else { 2966 started_tr = 1; 2967 } 2968 syslocks = SFL_EXTENTS | SFL_BITMAP; 2969 2970 } else if (overflow_extents(fp)) { 2971 syslocks = SFL_EXTENTS; 2972 } 2973 2974 if (syslocks) 2975 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK); 2976 2977 /* 2978 * Check for any delayed allocations. 2979 */ 2980 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 2981 int64_t actbytes; 2982 u_int32_t loanedBlocks; 2983 2984 // 2985 // Make sure we have a transaction. It's possible 2986 // that we came in and fp->ff_unallocblocks was zero 2987 // but during the time we blocked acquiring the extents 2988 // btree, ff_unallocblocks became non-zero and so we 2989 // will need to start a transaction. 2990 // 2991 if (started_tr == 0) { 2992 if (syslocks) { 2993 hfs_systemfile_unlock(hfsmp, lockflags); 2994 syslocks = 0; 2995 } 2996 goto retry; 2997 } 2998 2999 /* 3000 * Note: ExtendFileC will Release any blocks on loan and 3001 * aquire real blocks. So we ask to extend by zero bytes 3002 * since ExtendFileC will account for the virtual blocks. 3003 */ 3004 3005 loanedBlocks = fp->ff_unallocblocks; 3006 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0, 3007 kEFAllMask | kEFNoClumpMask, &actbytes); 3008 3009 if (retval) { 3010 fp->ff_unallocblocks = loanedBlocks; 3011 cp->c_blocks += loanedBlocks; 3012 fp->ff_blocks += loanedBlocks; 3013 3014 hfs_lock_mount (hfsmp); 3015 hfsmp->loanedBlocks += loanedBlocks; 3016 hfs_unlock_mount (hfsmp); 3017 3018 hfs_systemfile_unlock(hfsmp, lockflags); 3019 cp->c_flag |= C_MODIFIED; 3020 if (started_tr) { 3021 (void) hfs_update(vp, TRUE); 3022 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3023 3024 hfs_end_transaction(hfsmp); 3025 started_tr = 0; 3026 } 3027 goto exit; 3028 } 3029 } 3030 3031 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset, 3032 ap->a_bpn, &bytesContAvail); 3033 if (syslocks) { 3034 hfs_systemfile_unlock(hfsmp, lockflags); 3035 syslocks = 0; 3036 } 3037 3038 if (started_tr) { 3039 (void) hfs_update(vp, TRUE); 3040 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3041 hfs_end_transaction(hfsmp); 3042 started_tr = 0; 3043 } 3044 if (retval) { 3045 /* On write, always return error because virtual blocks, if any, 3046 * should have been allocated in ExtendFileC(). We do not 3047 * allocate virtual blocks on read, therefore return error 3048 * only if no virtual blocks are allocated. Otherwise we search 3049 * rangelist for zero-fills 3050 */ 3051 if ((MacToVFSError(retval) != ERANGE) || 3052 (ap->a_flags & VNODE_WRITE) || 3053 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) { 3054 goto exit; 3055 } 3056 3057 /* Validate if the start offset is within logical file size */ 3058 if (ap->a_foffset >= fp->ff_size) { 3059 goto exit; 3060 } 3061 3062 /* 3063 * At this point, we have encountered a failure during 3064 * MapFileBlockC that resulted in ERANGE, and we are not servicing 3065 * a write, and there are borrowed blocks. 3066 * 3067 * However, the cluster layer will not call blockmap for 3068 * blocks that are borrowed and in-cache. We have to assume that 3069 * because we observed ERANGE being emitted from MapFileBlockC, this 3070 * extent range is not valid on-disk. So we treat this as a 3071 * mapping that needs to be zero-filled prior to reading. 3072 * 3073 * Note that under certain circumstances (such as non-contiguous 3074 * userland VM mappings in the calling process), cluster_io 3075 * may be forced to split a large I/O driven by hfs_vnop_write 3076 * into multiple sub-I/Os that necessitate a RMW cycle. If this is 3077 * the case here, then we have already removed the invalid range list 3078 * mapping prior to getting to this blockmap call, so we should not 3079 * search the invalid rangelist for this byte range. 3080 */ 3081 3082 bytesContAvail = fp->ff_size - ap->a_foffset; 3083 /* 3084 * Clip the contiguous available bytes to, at most, the allowable 3085 * maximum or the amount requested. 3086 */ 3087 3088 if (bytesContAvail > ap->a_size) { 3089 bytesContAvail = ap->a_size; 3090 } 3091 3092 *ap->a_bpn = (daddr64_t) -1; 3093 retval = 0; 3094 3095 goto exit; 3096 } 3097 3098 /* MapFileC() found a valid extent in the filefork. Search the 3099 * mapping information further for invalid file ranges 3100 */ 3101 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, 3102 ap->a_foffset + (off_t)bytesContAvail - 1, 3103 &invalid_range); 3104 if (overlaptype != RL_NOOVERLAP) { 3105 switch(overlaptype) { 3106 case RL_MATCHINGOVERLAP: 3107 case RL_OVERLAPCONTAINSRANGE: 3108 case RL_OVERLAPSTARTSBEFORE: 3109 /* There's no valid block for this byte offset */ 3110 *ap->a_bpn = (daddr64_t)-1; 3111 /* There's no point limiting the amount to be returned 3112 * if the invalid range that was hit extends all the way 3113 * to the EOF (i.e. there's no valid bytes between the 3114 * end of this range and the file's EOF): 3115 */ 3116 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 3117 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 3118 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 3119 } 3120 break; 3121 3122 case RL_OVERLAPISCONTAINED: 3123 case RL_OVERLAPENDSAFTER: 3124 /* The range of interest hits an invalid block before the end: */ 3125 if (invalid_range->rl_start == ap->a_foffset) { 3126 /* There's actually no valid information to be had starting here: */ 3127 *ap->a_bpn = (daddr64_t)-1; 3128 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 3129 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 3130 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 3131 } 3132 } else { 3133 bytesContAvail = invalid_range->rl_start - ap->a_foffset; 3134 } 3135 break; 3136 3137 case RL_NOOVERLAP: 3138 break; 3139 } /* end switch */ 3140 if (bytesContAvail > ap->a_size) 3141 bytesContAvail = ap->a_size; 3142 } 3143 3144exit: 3145 if (retval == 0) { 3146 if (ap->a_run) 3147 *ap->a_run = bytesContAvail; 3148 3149 if (ap->a_poff) 3150 *(int *)ap->a_poff = 0; 3151 } 3152 3153 if (tooklock) 3154 hfs_unlock(cp); 3155 3156 return (MacToVFSError(retval)); 3157} 3158 3159/* 3160 * prepare and issue the I/O 3161 * buf_strategy knows how to deal 3162 * with requests that require 3163 * fragmented I/Os 3164 */ 3165int 3166hfs_vnop_strategy(struct vnop_strategy_args *ap) 3167{ 3168 buf_t bp = ap->a_bp; 3169 vnode_t vp = buf_vnode(bp); 3170 int error = 0; 3171 3172 /* Mark buffer as containing static data if cnode flag set */ 3173 if (VTOC(vp)->c_flag & C_SSD_STATIC) { 3174 buf_markstatic(bp); 3175 } 3176 3177 /* Mark buffer as containing static data if cnode flag set */ 3178 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) { 3179 bufattr_markgreedymode(&bp->b_attr); 3180 } 3181 3182 /* mark buffer as containing burst mode data if cnode flag set */ 3183 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) { 3184 bufattr_markisochronous(&bp->b_attr); 3185 } 3186 3187#if CONFIG_PROTECT 3188 cnode_t *cp = NULL; 3189 3190 if ((!bufattr_rawencrypted(&bp->b_attr)) && 3191 ((cp = cp_get_protected_cnode(vp)) != NULL)) { 3192 /* 3193 * We rely upon the truncate lock to protect the 3194 * CP cache key from getting tossed prior to our IO finishing here. 3195 * Nearly all cluster io calls to manipulate file payload from HFS 3196 * take the truncate lock before calling into the cluster 3197 * layer to ensure the file size does not change, or that they 3198 * have exclusive right to change the EOF of the file. 3199 * That same guarantee protects us here since the code that 3200 * deals with CP lock events must now take the truncate lock 3201 * before doing anything. 3202 * 3203 * There is 1 exception here: 3204 * 1) One exception should be the VM swapfile IO, because HFS will 3205 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the 3206 * swapfile code only without holding the truncate lock. This is because 3207 * individual swapfiles are maintained at fixed-length sizes by the VM code. 3208 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to 3209 * create our own UPL and thus take the truncate lock before calling 3210 * into the cluster layer. In that case, however, we are not concerned 3211 * with the CP blob being wiped out in the middle of the IO 3212 * because there isn't anything to toss; the VM swapfile key stays 3213 * in-core as long as the file is open. 3214 */ 3215 3216 3217 /* 3218 * Last chance: If this data protected I/O does not have unwrapped keys 3219 * present, then try to get them. We already know that it should, by this point. 3220 */ 3221 if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) { 3222 int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS); 3223 if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) { 3224 /* 3225 * We have to be careful here. By this point in the I/O path, VM or the cluster 3226 * engine has prepared a buf_t with the proper file offsets and all the rest, 3227 * so simply erroring out will result in us leaking this particular buf_t. 3228 * We need to properly decorate the buf_t just as buf_strategy would so as 3229 * to make it appear that the I/O errored out with the particular error code. 3230 */ 3231 buf_seterror (bp, error); 3232 buf_biodone(bp); 3233 return error; 3234 } 3235 } 3236 3237 /* 3238 *NB: 3239 * For filesystem resize, we may not have access to the underlying 3240 * file's cache key for whatever reason (device may be locked). However, 3241 * we do not need it since we are going to use the temporary HFS-wide resize key 3242 * which is generated once we start relocating file content. If this file's I/O 3243 * should be done using the resize key, it will have been supplied already, so 3244 * do not attach the file's cp blob to the buffer. 3245 */ 3246 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) { 3247 buf_setcpaddr(bp, cp->c_cpentry); 3248 } 3249 } 3250#endif /* CONFIG_PROTECT */ 3251 3252 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap); 3253 3254 return error; 3255} 3256 3257static int 3258hfs_minorupdate(struct vnode *vp) { 3259 struct cnode *cp = VTOC(vp); 3260 cp->c_flag &= ~C_MODIFIED; 3261 cp->c_touch_acctime = 0; 3262 cp->c_touch_chgtime = 0; 3263 cp->c_touch_modtime = 0; 3264 3265 return 0; 3266} 3267 3268int 3269do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context) 3270{ 3271 register struct cnode *cp = VTOC(vp); 3272 struct filefork *fp = VTOF(vp); 3273 kauth_cred_t cred = vfs_context_ucred(context); 3274 int retval; 3275 off_t bytesToAdd; 3276 off_t actualBytesAdded; 3277 off_t filebytes; 3278 u_int32_t fileblocks; 3279 int blksize; 3280 struct hfsmount *hfsmp; 3281 int lockflags; 3282 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); 3283 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); 3284 3285 blksize = VTOVCB(vp)->blockSize; 3286 fileblocks = fp->ff_blocks; 3287 filebytes = (off_t)fileblocks * (off_t)blksize; 3288 3289 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START, 3290 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 3291 3292 if (length < 0) 3293 return (EINVAL); 3294 3295 /* This should only happen with a corrupt filesystem */ 3296 if ((off_t)fp->ff_size < 0) 3297 return (EINVAL); 3298 3299 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) 3300 return (EFBIG); 3301 3302 hfsmp = VTOHFS(vp); 3303 3304 retval = E_NONE; 3305 3306 /* Files that are changing size are not hot file candidates. */ 3307 if (hfsmp->hfc_stage == HFC_RECORDING) { 3308 fp->ff_bytesread = 0; 3309 } 3310 3311 /* 3312 * We cannot just check if fp->ff_size == length (as an optimization) 3313 * since there may be extra physical blocks that also need truncation. 3314 */ 3315#if QUOTA 3316 if ((retval = hfs_getinoquota(cp))) 3317 return(retval); 3318#endif /* QUOTA */ 3319 3320 /* 3321 * Lengthen the size of the file. We must ensure that the 3322 * last byte of the file is allocated. Since the smallest 3323 * value of ff_size is 0, length will be at least 1. 3324 */ 3325 if (length > (off_t)fp->ff_size) { 3326#if QUOTA 3327 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)), 3328 cred, 0); 3329 if (retval) 3330 goto Err_Exit; 3331#endif /* QUOTA */ 3332 /* 3333 * If we don't have enough physical space then 3334 * we need to extend the physical size. 3335 */ 3336 if (length > filebytes) { 3337 int eflags; 3338 u_int32_t blockHint = 0; 3339 3340 /* All or nothing and don't round up to clumpsize. */ 3341 eflags = kEFAllMask | kEFNoClumpMask; 3342 3343 if (cred && (suser(cred, NULL) != 0)) { 3344 eflags |= kEFReserveMask; /* keep a reserve */ 3345 } 3346 3347 /* 3348 * Allocate Journal and Quota files in metadata zone. 3349 */ 3350 if (filebytes == 0 && 3351 hfsmp->hfs_flags & HFS_METADATA_ZONE && 3352 hfs_virtualmetafile(cp)) { 3353 eflags |= kEFMetadataMask; 3354 blockHint = hfsmp->hfs_metazone_start; 3355 } 3356 if (hfs_start_transaction(hfsmp) != 0) { 3357 retval = EINVAL; 3358 goto Err_Exit; 3359 } 3360 3361 /* Protect extents b-tree and allocation bitmap */ 3362 lockflags = SFL_BITMAP; 3363 if (overflow_extents(fp)) 3364 lockflags |= SFL_EXTENTS; 3365 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3366 3367 /* 3368 * Keep growing the file as long as the current EOF is 3369 * less than the desired value. 3370 */ 3371 while ((length > filebytes) && (retval == E_NONE)) { 3372 bytesToAdd = length - filebytes; 3373 retval = MacToVFSError(ExtendFileC(VTOVCB(vp), 3374 (FCB*)fp, 3375 bytesToAdd, 3376 blockHint, 3377 eflags, 3378 &actualBytesAdded)); 3379 3380 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 3381 if (actualBytesAdded == 0 && retval == E_NONE) { 3382 if (length > filebytes) 3383 length = filebytes; 3384 break; 3385 } 3386 } /* endwhile */ 3387 3388 hfs_systemfile_unlock(hfsmp, lockflags); 3389 3390 if (hfsmp->jnl) { 3391 if (skipupdate) { 3392 (void) hfs_minorupdate(vp); 3393 } 3394 else { 3395 (void) hfs_update(vp, TRUE); 3396 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3397 } 3398 } 3399 3400 hfs_end_transaction(hfsmp); 3401 3402 if (retval) 3403 goto Err_Exit; 3404 3405 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, 3406 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 3407 } 3408 3409 if (ISSET(flags, IO_NOZEROFILL)) { 3410 // An optimisation for the hibernation file 3411 if (vnode_isswap(vp)) 3412 rl_remove_all(&fp->ff_invalidranges); 3413 } else { 3414 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { 3415 struct rl_entry *invalid_range; 3416 off_t zero_limit; 3417 3418 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 3419 if (length < zero_limit) zero_limit = length; 3420 3421 if (length > (off_t)fp->ff_size) { 3422 struct timeval tv; 3423 3424 /* Extending the file: time to fill out the current last page w. zeroes? */ 3425 if ((fp->ff_size & PAGE_MASK_64) && 3426 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64, 3427 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) { 3428 3429 /* There's some valid data at the start of the (current) last page 3430 of the file, so zero out the remainder of that page to ensure the 3431 entire page contains valid data. Since there is no invalid range 3432 possible past the (current) eof, there's no need to remove anything 3433 from the invalid range list before calling cluster_write(): */ 3434 hfs_unlock(cp); 3435 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, 3436 fp->ff_size, (off_t)0, 3437 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); 3438 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 3439 if (retval) goto Err_Exit; 3440 3441 /* Merely invalidate the remaining area, if necessary: */ 3442 if (length > zero_limit) { 3443 microuptime(&tv); 3444 rl_add(zero_limit, length - 1, &fp->ff_invalidranges); 3445 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3446 } 3447 } else { 3448 /* The page containing the (current) eof is invalid: just add the 3449 remainder of the page to the invalid list, along with the area 3450 being newly allocated: 3451 */ 3452 microuptime(&tv); 3453 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges); 3454 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3455 }; 3456 } 3457 } else { 3458 panic("hfs_truncate: invoked on non-UBC object?!"); 3459 }; 3460 } 3461 if (suppress_times == 0) { 3462 cp->c_touch_modtime = TRUE; 3463 } 3464 fp->ff_size = length; 3465 3466 } else { /* Shorten the size of the file */ 3467 3468 // An optimisation for the hibernation file 3469 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) { 3470 rl_remove_all(&fp->ff_invalidranges); 3471 } else if ((off_t)fp->ff_size > length) { 3472 /* Any space previously marked as invalid is now irrelevant: */ 3473 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); 3474 } 3475 3476 /* 3477 * Account for any unmapped blocks. Note that the new 3478 * file length can still end up with unmapped blocks. 3479 */ 3480 if (fp->ff_unallocblocks > 0) { 3481 u_int32_t finalblks; 3482 u_int32_t loanedBlocks; 3483 3484 hfs_lock_mount(hfsmp); 3485 loanedBlocks = fp->ff_unallocblocks; 3486 cp->c_blocks -= loanedBlocks; 3487 fp->ff_blocks -= loanedBlocks; 3488 fp->ff_unallocblocks = 0; 3489 3490 hfsmp->loanedBlocks -= loanedBlocks; 3491 3492 finalblks = (length + blksize - 1) / blksize; 3493 if (finalblks > fp->ff_blocks) { 3494 /* calculate required unmapped blocks */ 3495 loanedBlocks = finalblks - fp->ff_blocks; 3496 hfsmp->loanedBlocks += loanedBlocks; 3497 3498 fp->ff_unallocblocks = loanedBlocks; 3499 cp->c_blocks += loanedBlocks; 3500 fp->ff_blocks += loanedBlocks; 3501 } 3502 hfs_unlock_mount (hfsmp); 3503 } 3504 3505#if QUOTA 3506 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); 3507#endif /* QUOTA */ 3508 if (hfs_start_transaction(hfsmp) != 0) { 3509 retval = EINVAL; 3510 goto Err_Exit; 3511 } 3512 3513 if (fp->ff_unallocblocks == 0) { 3514 /* Protect extents b-tree and allocation bitmap */ 3515 lockflags = SFL_BITMAP; 3516 if (overflow_extents(fp)) 3517 lockflags |= SFL_EXTENTS; 3518 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3519 3520 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 3521 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); 3522 3523 hfs_systemfile_unlock(hfsmp, lockflags); 3524 } 3525 if (hfsmp->jnl) { 3526 if (retval == 0) { 3527 fp->ff_size = length; 3528 } 3529 if (skipupdate) { 3530 (void) hfs_minorupdate(vp); 3531 } 3532 else { 3533 (void) hfs_update(vp, TRUE); 3534 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3535 } 3536 } 3537 hfs_end_transaction(hfsmp); 3538 3539 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 3540 if (retval) 3541 goto Err_Exit; 3542#if QUOTA 3543 /* These are bytesreleased */ 3544 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); 3545#endif /* QUOTA */ 3546 3547 /* 3548 * Only set update flag if the logical length changes & we aren't 3549 * suppressing modtime updates. 3550 */ 3551 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) { 3552 cp->c_touch_modtime = TRUE; 3553 } 3554 fp->ff_size = length; 3555 } 3556 if (cp->c_mode & (S_ISUID | S_ISGID)) { 3557 if (!vfs_context_issuser(context)) { 3558 cp->c_mode &= ~(S_ISUID | S_ISGID); 3559 skipupdate = 0; 3560 } 3561 } 3562 if (skipupdate) { 3563 retval = hfs_minorupdate(vp); 3564 } 3565 else { 3566 cp->c_touch_chgtime = TRUE; /* status changed */ 3567 if (suppress_times == 0) { 3568 cp->c_touch_modtime = TRUE; /* file data was modified */ 3569 3570 /* 3571 * If we are not suppressing the modtime update, then 3572 * update the gen count as well. 3573 */ 3574 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) { 3575 hfs_incr_gencount(cp); 3576 } 3577 } 3578 3579 retval = hfs_update(vp, MNT_WAIT); 3580 } 3581 if (retval) { 3582 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, 3583 -1, -1, -1, retval, 0); 3584 } 3585 3586Err_Exit: 3587 3588 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END, 3589 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0); 3590 3591 return (retval); 3592} 3593 3594/* 3595 * Preparation which must be done prior to deleting the catalog record 3596 * of a file or directory. In order to make the on-disk as safe as possible, 3597 * we remove the catalog entry before releasing the bitmap blocks and the 3598 * overflow extent records. However, some work must be done prior to deleting 3599 * the catalog record. 3600 * 3601 * When calling this function, the cnode must exist both in memory and on-disk. 3602 * If there are both resource fork and data fork vnodes, this function should 3603 * be called on both. 3604 */ 3605 3606int 3607hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) { 3608 3609 struct filefork *fp = VTOF(vp); 3610 struct cnode *cp = VTOC(vp); 3611#if QUOTA 3612 int retval = 0; 3613#endif /* QUOTA */ 3614 3615 /* Cannot truncate an HFS directory! */ 3616 if (vnode_isdir(vp)) { 3617 return (EISDIR); 3618 } 3619 3620 /* 3621 * See the comment below in hfs_truncate for why we need to call 3622 * setsize here. Essentially we want to avoid pending IO if we 3623 * already know that the blocks are going to be released here. 3624 * This function is only called when totally removing all storage for a file, so 3625 * we can take a shortcut and immediately setsize (0); 3626 */ 3627 ubc_setsize(vp, 0); 3628 3629 /* This should only happen with a corrupt filesystem */ 3630 if ((off_t)fp->ff_size < 0) 3631 return (EINVAL); 3632 3633 /* 3634 * We cannot just check if fp->ff_size == length (as an optimization) 3635 * since there may be extra physical blocks that also need truncation. 3636 */ 3637#if QUOTA 3638 if ((retval = hfs_getinoquota(cp))) { 3639 return(retval); 3640 } 3641#endif /* QUOTA */ 3642 3643 /* Wipe out any invalid ranges which have yet to be backed by disk */ 3644 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges); 3645 3646 /* 3647 * Account for any unmapped blocks. Since we're deleting the 3648 * entire file, we don't have to worry about just shrinking 3649 * to a smaller number of borrowed blocks. 3650 */ 3651 if (fp->ff_unallocblocks > 0) { 3652 u_int32_t loanedBlocks; 3653 3654 hfs_lock_mount (hfsmp); 3655 loanedBlocks = fp->ff_unallocblocks; 3656 cp->c_blocks -= loanedBlocks; 3657 fp->ff_blocks -= loanedBlocks; 3658 fp->ff_unallocblocks = 0; 3659 3660 hfsmp->loanedBlocks -= loanedBlocks; 3661 3662 hfs_unlock_mount (hfsmp); 3663 } 3664 3665 return 0; 3666} 3667 3668 3669/* 3670 * Special wrapper around calling TruncateFileC. This function is useable 3671 * even when the catalog record does not exist any longer, making it ideal 3672 * for use when deleting a file. The simplification here is that we know 3673 * that we are releasing all blocks. 3674 * 3675 * Note that this function may be called when there is no vnode backing 3676 * the file fork in question. We may call this from hfs_vnop_inactive 3677 * to clear out resource fork data (and may not want to clear out the data 3678 * fork yet). As a result, we pointer-check both sets of inputs before 3679 * doing anything with them. 3680 * 3681 * The caller is responsible for saving off a copy of the filefork(s) 3682 * embedded within the cnode prior to calling this function. The pointers 3683 * supplied as arguments must be valid even if the cnode is no longer valid. 3684 */ 3685 3686int 3687hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 3688 struct filefork *rsrcfork, u_int32_t fileid) { 3689 3690 off_t filebytes; 3691 u_int32_t fileblocks; 3692 int blksize = 0; 3693 int error = 0; 3694 int lockflags; 3695 3696 blksize = hfsmp->blockSize; 3697 3698 /* Data Fork */ 3699 if (datafork) { 3700 datafork->ff_size = 0; 3701 3702 fileblocks = datafork->ff_blocks; 3703 filebytes = (off_t)fileblocks * (off_t)blksize; 3704 3705 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3706 3707 while (filebytes > 0) { 3708 if (filebytes > HFS_BIGFILE_SIZE) { 3709 filebytes -= HFS_BIGFILE_SIZE; 3710 } else { 3711 filebytes = 0; 3712 } 3713 3714 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3715 if (hfs_start_transaction(hfsmp) != 0) { 3716 error = EINVAL; 3717 break; 3718 } 3719 3720 if (datafork->ff_unallocblocks == 0) { 3721 /* Protect extents b-tree and allocation bitmap */ 3722 lockflags = SFL_BITMAP; 3723 if (overflow_extents(datafork)) 3724 lockflags |= SFL_EXTENTS; 3725 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3726 3727 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false)); 3728 3729 hfs_systemfile_unlock(hfsmp, lockflags); 3730 } 3731 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3732 3733 /* Finish the transaction and start over if necessary */ 3734 hfs_end_transaction(hfsmp); 3735 3736 if (error) { 3737 break; 3738 } 3739 } 3740 } 3741 3742 /* Resource fork */ 3743 if (error == 0 && rsrcfork) { 3744 rsrcfork->ff_size = 0; 3745 3746 fileblocks = rsrcfork->ff_blocks; 3747 filebytes = (off_t)fileblocks * (off_t)blksize; 3748 3749 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3750 3751 while (filebytes > 0) { 3752 if (filebytes > HFS_BIGFILE_SIZE) { 3753 filebytes -= HFS_BIGFILE_SIZE; 3754 } else { 3755 filebytes = 0; 3756 } 3757 3758 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3759 if (hfs_start_transaction(hfsmp) != 0) { 3760 error = EINVAL; 3761 break; 3762 } 3763 3764 if (rsrcfork->ff_unallocblocks == 0) { 3765 /* Protect extents b-tree and allocation bitmap */ 3766 lockflags = SFL_BITMAP; 3767 if (overflow_extents(rsrcfork)) 3768 lockflags |= SFL_EXTENTS; 3769 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3770 3771 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false)); 3772 3773 hfs_systemfile_unlock(hfsmp, lockflags); 3774 } 3775 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3776 3777 /* Finish the transaction and start over if necessary */ 3778 hfs_end_transaction(hfsmp); 3779 3780 if (error) { 3781 break; 3782 } 3783 } 3784 } 3785 3786 return error; 3787} 3788 3789errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock) 3790{ 3791 errno_t error; 3792 3793 /* 3794 * Call ubc_setsize to give the VM subsystem a chance to do 3795 * whatever it needs to with existing pages before we delete 3796 * blocks. Note that symlinks don't use the UBC so we'll 3797 * get back ENOENT in that case. 3798 */ 3799 if (have_cnode_lock) { 3800 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY); 3801 if (error == EAGAIN) { 3802 cnode_t *cp = VTOC(vp); 3803 3804 if (cp->c_truncatelockowner != current_thread()) { 3805#if DEVELOPMENT || DEBUG 3806 panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!"); 3807#else 3808 printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n"); 3809#endif 3810 } 3811 3812 hfs_unlock(cp); 3813 error = ubc_setsize_ex(vp, len, 0); 3814 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); 3815 } 3816 } else 3817 error = ubc_setsize_ex(vp, len, 0); 3818 3819 return error == ENOENT ? 0 : error; 3820} 3821 3822/* 3823 * Truncate a cnode to at most length size, freeing (or adding) the 3824 * disk blocks. 3825 */ 3826int 3827hfs_truncate(struct vnode *vp, off_t length, int flags, 3828 int truncateflags, vfs_context_t context) 3829{ 3830 struct filefork *fp = VTOF(vp); 3831 off_t filebytes; 3832 u_int32_t fileblocks; 3833 int blksize; 3834 errno_t error = 0; 3835 struct cnode *cp = VTOC(vp); 3836 3837 /* Cannot truncate an HFS directory! */ 3838 if (vnode_isdir(vp)) { 3839 return (EISDIR); 3840 } 3841 /* A swap file cannot change size. */ 3842 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) { 3843 return (EPERM); 3844 } 3845 3846 blksize = VTOVCB(vp)->blockSize; 3847 fileblocks = fp->ff_blocks; 3848 filebytes = (off_t)fileblocks * (off_t)blksize; 3849 3850 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread()); 3851 3852 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock); 3853 if (error) 3854 return error; 3855 3856 if (!caller_has_cnode_lock) { 3857 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 3858 if (error) 3859 return error; 3860 } 3861 3862 // have to loop truncating or growing files that are 3863 // really big because otherwise transactions can get 3864 // enormous and consume too many kernel resources. 3865 3866 if (length < filebytes) { 3867 while (filebytes > length) { 3868 if ((filebytes - length) > HFS_BIGFILE_SIZE) { 3869 filebytes -= HFS_BIGFILE_SIZE; 3870 } else { 3871 filebytes = length; 3872 } 3873 cp->c_flag |= C_FORCEUPDATE; 3874 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); 3875 if (error) 3876 break; 3877 } 3878 } else if (length > filebytes) { 3879 while (filebytes < length) { 3880 if ((length - filebytes) > HFS_BIGFILE_SIZE) { 3881 filebytes += HFS_BIGFILE_SIZE; 3882 } else { 3883 filebytes = length; 3884 } 3885 cp->c_flag |= C_FORCEUPDATE; 3886 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); 3887 if (error) 3888 break; 3889 } 3890 } else /* Same logical size */ { 3891 3892 error = do_hfs_truncate(vp, length, flags, truncateflags, context); 3893 } 3894 /* Files that are changing size are not hot file candidates. */ 3895 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 3896 fp->ff_bytesread = 0; 3897 } 3898 3899 if (!caller_has_cnode_lock) 3900 hfs_unlock(cp); 3901 3902 // Make sure UBC's size matches up (in case we didn't completely succeed) 3903 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock); 3904 if (!error) 3905 error = err2; 3906 3907 return error; 3908} 3909 3910 3911/* 3912 * Preallocate file storage space. 3913 */ 3914int 3915hfs_vnop_allocate(struct vnop_allocate_args /* { 3916 vnode_t a_vp; 3917 off_t a_length; 3918 u_int32_t a_flags; 3919 off_t *a_bytesallocated; 3920 off_t a_offset; 3921 vfs_context_t a_context; 3922 } */ *ap) 3923{ 3924 struct vnode *vp = ap->a_vp; 3925 struct cnode *cp; 3926 struct filefork *fp; 3927 ExtendedVCB *vcb; 3928 off_t length = ap->a_length; 3929 off_t startingPEOF; 3930 off_t moreBytesRequested; 3931 off_t actualBytesAdded; 3932 off_t filebytes; 3933 u_int32_t fileblocks; 3934 int retval, retval2; 3935 u_int32_t blockHint; 3936 u_int32_t extendFlags; /* For call to ExtendFileC */ 3937 struct hfsmount *hfsmp; 3938 kauth_cred_t cred = vfs_context_ucred(ap->a_context); 3939 int lockflags; 3940 time_t orig_ctime; 3941 3942 *(ap->a_bytesallocated) = 0; 3943 3944 if (!vnode_isreg(vp)) 3945 return (EISDIR); 3946 if (length < (off_t)0) 3947 return (EINVAL); 3948 3949 cp = VTOC(vp); 3950 3951 orig_ctime = VTOC(vp)->c_ctime; 3952 3953 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); 3954 3955 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 3956 3957 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 3958 goto Err_Exit; 3959 } 3960 3961 fp = VTOF(vp); 3962 hfsmp = VTOHFS(vp); 3963 vcb = VTOVCB(vp); 3964 3965 fileblocks = fp->ff_blocks; 3966 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; 3967 3968 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) { 3969 retval = EINVAL; 3970 goto Err_Exit; 3971 } 3972 3973 /* Fill in the flags word for the call to Extend the file */ 3974 3975 extendFlags = kEFNoClumpMask; 3976 if (ap->a_flags & ALLOCATECONTIG) 3977 extendFlags |= kEFContigMask; 3978 if (ap->a_flags & ALLOCATEALL) 3979 extendFlags |= kEFAllMask; 3980 if (cred && suser(cred, NULL) != 0) 3981 extendFlags |= kEFReserveMask; 3982 if (hfs_virtualmetafile(cp)) 3983 extendFlags |= kEFMetadataMask; 3984 3985 retval = E_NONE; 3986 blockHint = 0; 3987 startingPEOF = filebytes; 3988 3989 if (ap->a_flags & ALLOCATEFROMPEOF) 3990 length += filebytes; 3991 else if (ap->a_flags & ALLOCATEFROMVOL) 3992 blockHint = ap->a_offset / VTOVCB(vp)->blockSize; 3993 3994 /* If no changes are necesary, then we're done */ 3995 if (filebytes == length) 3996 goto Std_Exit; 3997 3998 /* 3999 * Lengthen the size of the file. We must ensure that the 4000 * last byte of the file is allocated. Since the smallest 4001 * value of filebytes is 0, length will be at least 1. 4002 */ 4003 if (length > filebytes) { 4004 off_t total_bytes_added = 0, orig_request_size; 4005 4006 orig_request_size = moreBytesRequested = length - filebytes; 4007 4008#if QUOTA 4009 retval = hfs_chkdq(cp, 4010 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 4011 cred, 0); 4012 if (retval) 4013 goto Err_Exit; 4014 4015#endif /* QUOTA */ 4016 /* 4017 * Metadata zone checks. 4018 */ 4019 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { 4020 /* 4021 * Allocate Journal and Quota files in metadata zone. 4022 */ 4023 if (hfs_virtualmetafile(cp)) { 4024 blockHint = hfsmp->hfs_metazone_start; 4025 } else if ((blockHint >= hfsmp->hfs_metazone_start) && 4026 (blockHint <= hfsmp->hfs_metazone_end)) { 4027 /* 4028 * Move blockHint outside metadata zone. 4029 */ 4030 blockHint = hfsmp->hfs_metazone_end + 1; 4031 } 4032 } 4033 4034 4035 while ((length > filebytes) && (retval == E_NONE)) { 4036 off_t bytesRequested; 4037 4038 if (hfs_start_transaction(hfsmp) != 0) { 4039 retval = EINVAL; 4040 goto Err_Exit; 4041 } 4042 4043 /* Protect extents b-tree and allocation bitmap */ 4044 lockflags = SFL_BITMAP; 4045 if (overflow_extents(fp)) 4046 lockflags |= SFL_EXTENTS; 4047 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4048 4049 if (moreBytesRequested >= HFS_BIGFILE_SIZE) { 4050 bytesRequested = HFS_BIGFILE_SIZE; 4051 } else { 4052 bytesRequested = moreBytesRequested; 4053 } 4054 4055 if (extendFlags & kEFContigMask) { 4056 // if we're on a sparse device, this will force it to do a 4057 // full scan to find the space needed. 4058 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN; 4059 } 4060 4061 retval = MacToVFSError(ExtendFileC(vcb, 4062 (FCB*)fp, 4063 bytesRequested, 4064 blockHint, 4065 extendFlags, 4066 &actualBytesAdded)); 4067 4068 if (retval == E_NONE) { 4069 *(ap->a_bytesallocated) += actualBytesAdded; 4070 total_bytes_added += actualBytesAdded; 4071 moreBytesRequested -= actualBytesAdded; 4072 if (blockHint != 0) { 4073 blockHint += actualBytesAdded / vcb->blockSize; 4074 } 4075 } 4076 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 4077 4078 hfs_systemfile_unlock(hfsmp, lockflags); 4079 4080 if (hfsmp->jnl) { 4081 (void) hfs_update(vp, TRUE); 4082 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 4083 } 4084 4085 hfs_end_transaction(hfsmp); 4086 } 4087 4088 4089 /* 4090 * if we get an error and no changes were made then exit 4091 * otherwise we must do the hfs_update to reflect the changes 4092 */ 4093 if (retval && (startingPEOF == filebytes)) 4094 goto Err_Exit; 4095 4096 /* 4097 * Adjust actualBytesAdded to be allocation block aligned, not 4098 * clump size aligned. 4099 * NOTE: So what we are reporting does not affect reality 4100 * until the file is closed, when we truncate the file to allocation 4101 * block size. 4102 */ 4103 if (total_bytes_added != 0 && orig_request_size < total_bytes_added) 4104 *(ap->a_bytesallocated) = 4105 roundup(orig_request_size, (off_t)vcb->blockSize); 4106 4107 } else { /* Shorten the size of the file */ 4108 4109 /* 4110 * N.B. At present, this code is never called. If and when we 4111 * do start using it, it looks like there might be slightly 4112 * strange semantics with the file size: it's possible for the 4113 * file size to *increase* e.g. if current file size is 5, 4114 * length is 1024 and filebytes is 4096, the file size will 4115 * end up being 1024 bytes. This isn't necessarily a problem 4116 * but it's not consistent with the code above which doesn't 4117 * change the file size. 4118 */ 4119 4120 retval = hfs_truncate(vp, length, 0, 0, ap->a_context); 4121 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 4122 4123 /* 4124 * if we get an error and no changes were made then exit 4125 * otherwise we must do the hfs_update to reflect the changes 4126 */ 4127 if (retval && (startingPEOF == filebytes)) goto Err_Exit; 4128#if QUOTA 4129 /* These are bytesreleased */ 4130 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0); 4131#endif /* QUOTA */ 4132 4133 if (fp->ff_size > filebytes) { 4134 fp->ff_size = filebytes; 4135 4136 hfs_ubc_setsize(vp, fp->ff_size, true); 4137 } 4138 } 4139 4140Std_Exit: 4141 cp->c_touch_chgtime = TRUE; 4142 cp->c_touch_modtime = TRUE; 4143 retval2 = hfs_update(vp, MNT_WAIT); 4144 4145 if (retval == 0) 4146 retval = retval2; 4147Err_Exit: 4148 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4149 hfs_unlock(cp); 4150 return (retval); 4151} 4152 4153 4154/* 4155 * Pagein for HFS filesystem 4156 */ 4157int 4158hfs_vnop_pagein(struct vnop_pagein_args *ap) 4159/* 4160 struct vnop_pagein_args { 4161 vnode_t a_vp, 4162 upl_t a_pl, 4163 vm_offset_t a_pl_offset, 4164 off_t a_f_offset, 4165 size_t a_size, 4166 int a_flags 4167 vfs_context_t a_context; 4168 }; 4169*/ 4170{ 4171 vnode_t vp; 4172 struct cnode *cp; 4173 struct filefork *fp; 4174 int error = 0; 4175 upl_t upl; 4176 upl_page_info_t *pl; 4177 off_t f_offset; 4178 off_t page_needed_f_offset; 4179 int offset; 4180 int isize; 4181 int upl_size; 4182 int pg_index; 4183 boolean_t truncate_lock_held = FALSE; 4184 boolean_t file_converted = FALSE; 4185 kern_return_t kret; 4186 4187 vp = ap->a_vp; 4188 cp = VTOC(vp); 4189 fp = VTOF(vp); 4190 4191#if CONFIG_PROTECT 4192 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) { 4193 /* 4194 * If we errored here, then this means that one of two things occurred: 4195 * 1. there was a problem with the decryption of the key. 4196 * 2. the device is locked and we are not allowed to access this particular file. 4197 * 4198 * Either way, this means that we need to shut down this upl now. As long as 4199 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves) 4200 * then we create a upl and immediately abort it. 4201 */ 4202 if (ap->a_pl == NULL) { 4203 /* create the upl */ 4204 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl, 4205 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 4206 /* mark the range as needed so it doesn't immediately get discarded upon abort */ 4207 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1); 4208 4209 /* Abort the range */ 4210 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); 4211 } 4212 4213 4214 return error; 4215 } 4216#endif /* CONFIG_PROTECT */ 4217 4218 if (ap->a_pl != NULL) { 4219 /* 4220 * this can only happen for swap files now that 4221 * we're asking for V2 paging behavior... 4222 * so don't need to worry about decompression, or 4223 * keeping track of blocks read or taking the truncate lock 4224 */ 4225 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, 4226 ap->a_size, (off_t)fp->ff_size, ap->a_flags); 4227 goto pagein_done; 4228 } 4229 4230 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset; 4231 4232retry_pagein: 4233 /* 4234 * take truncate lock (shared/recursive) to guard against 4235 * zero-fill thru fsync interfering, but only for v2 4236 * 4237 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 4238 * lock shared and we are allowed to recurse 1 level if this thread already 4239 * owns the lock exclusively... this can legally occur 4240 * if we are doing a shrinking ftruncate against a file 4241 * that is mapped private, and the pages being truncated 4242 * do not currently exist in the cache... in that case 4243 * we will have to page-in the missing pages in order 4244 * to provide them to the private mapping... we must 4245 * also call hfs_unlock_truncate with a postive been_recursed 4246 * arg to indicate that if we have recursed, there is no need to drop 4247 * the lock. Allowing this simple recursion is necessary 4248 * in order to avoid a certain deadlock... since the ftruncate 4249 * already holds the truncate lock exclusively, if we try 4250 * to acquire it shared to protect the pagein path, we will 4251 * hang this thread 4252 * 4253 * NOTE: The if () block below is a workaround in order to prevent a 4254 * VM deadlock. See rdar://7853471. 4255 * 4256 * If we are in a forced unmount, then launchd will still have the 4257 * dyld_shared_cache file mapped as it is trying to reboot. If we 4258 * take the truncate lock here to service a page fault, then our 4259 * thread could deadlock with the forced-unmount. The forced unmount 4260 * thread will try to reclaim the dyld_shared_cache vnode, but since it's 4261 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount 4262 * thread will think it needs to copy all of the data out of the file 4263 * and into a VM copy object. If we hold the cnode lock here, then that 4264 * VM operation will not be able to proceed, because we'll set a busy page 4265 * before attempting to grab the lock. Note that this isn't as simple as "don't 4266 * call ubc_setsize" because doing that would just shift the problem to the 4267 * ubc_msync done before the vnode is reclaimed. 4268 * 4269 * So, if a forced unmount on this volume is in flight AND the cnode is 4270 * marked C_DELETED, then just go ahead and do the page in without taking 4271 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file 4272 * that is not going to be available on the next mount, this seems like a 4273 * OK solution from a correctness point of view, even though it is hacky. 4274 */ 4275 if (vfs_isforce(vp->v_mount)) { 4276 if (cp->c_flag & C_DELETED) { 4277 /* If we don't get it, then just go ahead and operate without the lock */ 4278 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4279 } 4280 } 4281 else { 4282 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4283 truncate_lock_held = TRUE; 4284 } 4285 4286 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 4287 4288 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 4289 error = EINVAL; 4290 goto pagein_done; 4291 } 4292 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1); 4293 4294 upl_size = isize = ap->a_size; 4295 4296 /* 4297 * Scan from the back to find the last page in the UPL, so that we 4298 * aren't looking at a UPL that may have already been freed by the 4299 * preceding aborts/completions. 4300 */ 4301 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 4302 if (upl_page_present(pl, --pg_index)) 4303 break; 4304 if (pg_index == 0) { 4305 /* 4306 * no absent pages were found in the range specified 4307 * just abort the UPL to get rid of it and then we're done 4308 */ 4309 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 4310 goto pagein_done; 4311 } 4312 } 4313 /* 4314 * initialize the offset variables before we touch the UPL. 4315 * f_offset is the position into the file, in bytes 4316 * offset is the position into the UPL, in bytes 4317 * pg_index is the pg# of the UPL we're operating on 4318 * isize is the offset into the UPL of the last page that is present. 4319 */ 4320 isize = ((pg_index + 1) * PAGE_SIZE); 4321 pg_index = 0; 4322 offset = 0; 4323 f_offset = ap->a_f_offset; 4324 4325 while (isize) { 4326 int xsize; 4327 int num_of_pages; 4328 4329 if ( !upl_page_present(pl, pg_index)) { 4330 /* 4331 * we asked for RET_ONLY_ABSENT, so it's possible 4332 * to get back empty slots in the UPL. 4333 * just skip over them 4334 */ 4335 f_offset += PAGE_SIZE; 4336 offset += PAGE_SIZE; 4337 isize -= PAGE_SIZE; 4338 pg_index++; 4339 4340 continue; 4341 } 4342 /* 4343 * We know that we have at least one absent page. 4344 * Now checking to see how many in a row we have 4345 */ 4346 num_of_pages = 1; 4347 xsize = isize - PAGE_SIZE; 4348 4349 while (xsize) { 4350 if ( !upl_page_present(pl, pg_index + num_of_pages)) 4351 break; 4352 num_of_pages++; 4353 xsize -= PAGE_SIZE; 4354 } 4355 xsize = num_of_pages * PAGE_SIZE; 4356 4357#if HFS_COMPRESSION 4358 if (VNODE_IS_RSRC(vp)) { 4359 /* allow pageins of the resource fork */ 4360 } else { 4361 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 4362 4363 if (compressed) { 4364 4365 if (truncate_lock_held) { 4366 /* 4367 * can't hold the truncate lock when calling into the decmpfs layer 4368 * since it calls back into this layer... even though we're only 4369 * holding the lock in shared mode, and the re-entrant path only 4370 * takes the lock shared, we can deadlock if some other thread 4371 * tries to grab the lock exclusively in between. 4372 */ 4373 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4374 truncate_lock_held = FALSE; 4375 } 4376 ap->a_pl = upl; 4377 ap->a_pl_offset = offset; 4378 ap->a_f_offset = f_offset; 4379 ap->a_size = xsize; 4380 4381 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp)); 4382 /* 4383 * note that decpfs_pagein_compressed can change the state of 4384 * 'compressed'... it will set it to 0 if the file is no longer 4385 * compressed once the compression lock is successfully taken 4386 * i.e. we would block on that lock while the file is being inflated 4387 */ 4388 if (compressed) { 4389 if (error == 0) { 4390 /* successful page-in, update the access time */ 4391 VTOC(vp)->c_touch_acctime = TRUE; 4392 4393 /* compressed files are not hot file candidates */ 4394 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 4395 fp->ff_bytesread = 0; 4396 } 4397 } else if (error == EAGAIN) { 4398 /* 4399 * EAGAIN indicates someone else already holds the compression lock... 4400 * to avoid deadlocking, we'll abort this range of pages with an 4401 * indication that the pagein needs to be redriven 4402 */ 4403 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); 4404 } else if (error == ENOSPC) { 4405 4406 if (upl_size == PAGE_SIZE) 4407 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n"); 4408 4409 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); 4410 4411 ap->a_size = PAGE_SIZE; 4412 ap->a_pl = NULL; 4413 ap->a_pl_offset = 0; 4414 ap->a_f_offset = page_needed_f_offset; 4415 4416 goto retry_pagein; 4417 } 4418 goto pagein_next_range; 4419 } 4420 else { 4421 /* 4422 * Set file_converted only if the file became decompressed while we were 4423 * paging in. If it were still compressed, we would re-start the loop using the goto 4424 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein 4425 * condition below, since we could have avoided taking the truncate lock to prevent 4426 * a deadlock in the force unmount case. 4427 */ 4428 file_converted = TRUE; 4429 } 4430 } 4431 if (file_converted == TRUE) { 4432 /* 4433 * the file was converted back to a regular file after we first saw it as compressed 4434 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over 4435 * reset a_size so that we consider what remains of the original request 4436 * and null out a_upl and a_pl_offset. 4437 * 4438 * We should only be able to get into this block if the decmpfs_pagein_compressed 4439 * successfully decompressed the range in question for this file. 4440 */ 4441 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); 4442 4443 ap->a_size = isize; 4444 ap->a_pl = NULL; 4445 ap->a_pl_offset = 0; 4446 4447 /* Reset file_converted back to false so that we don't infinite-loop. */ 4448 file_converted = FALSE; 4449 goto retry_pagein; 4450 } 4451 } 4452#endif 4453 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags); 4454 4455 /* 4456 * Keep track of blocks read. 4457 */ 4458 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { 4459 int bytesread; 4460 int took_cnode_lock = 0; 4461 4462 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE) 4463 bytesread = fp->ff_size; 4464 else 4465 bytesread = xsize; 4466 4467 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 4468 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) { 4469 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 4470 took_cnode_lock = 1; 4471 } 4472 /* 4473 * If this file hasn't been seen since the start of 4474 * the current sampling period then start over. 4475 */ 4476 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { 4477 struct timeval tv; 4478 4479 fp->ff_bytesread = bytesread; 4480 microtime(&tv); 4481 cp->c_atime = tv.tv_sec; 4482 } else { 4483 fp->ff_bytesread += bytesread; 4484 } 4485 cp->c_touch_acctime = TRUE; 4486 if (took_cnode_lock) 4487 hfs_unlock(cp); 4488 } 4489pagein_next_range: 4490 f_offset += xsize; 4491 offset += xsize; 4492 isize -= xsize; 4493 pg_index += num_of_pages; 4494 4495 error = 0; 4496 } 4497 4498pagein_done: 4499 if (truncate_lock_held == TRUE) { 4500 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */ 4501 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4502 } 4503 4504 return (error); 4505} 4506 4507/* 4508 * Pageout for HFS filesystem. 4509 */ 4510int 4511hfs_vnop_pageout(struct vnop_pageout_args *ap) 4512/* 4513 struct vnop_pageout_args { 4514 vnode_t a_vp, 4515 upl_t a_pl, 4516 vm_offset_t a_pl_offset, 4517 off_t a_f_offset, 4518 size_t a_size, 4519 int a_flags 4520 vfs_context_t a_context; 4521 }; 4522*/ 4523{ 4524 vnode_t vp = ap->a_vp; 4525 struct cnode *cp; 4526 struct filefork *fp; 4527 int retval = 0; 4528 off_t filesize; 4529 upl_t upl; 4530 upl_page_info_t* pl; 4531 vm_offset_t a_pl_offset; 4532 int a_flags; 4533 int is_pageoutv2 = 0; 4534 kern_return_t kret; 4535 4536 cp = VTOC(vp); 4537 fp = VTOF(vp); 4538 4539 /* 4540 * Figure out where the file ends, for pageout purposes. If 4541 * ff_new_size > ff_size, then we're in the middle of extending the 4542 * file via a write, so it is safe (and necessary) that we be able 4543 * to pageout up to that point. 4544 */ 4545 filesize = fp->ff_size; 4546 if (fp->ff_new_size > filesize) 4547 filesize = fp->ff_new_size; 4548 4549 a_flags = ap->a_flags; 4550 a_pl_offset = ap->a_pl_offset; 4551 4552 /* 4553 * we can tell if we're getting the new or old behavior from the UPL 4554 */ 4555 if ((upl = ap->a_pl) == NULL) { 4556 int request_flags; 4557 4558 is_pageoutv2 = 1; 4559 /* 4560 * we're in control of any UPL we commit 4561 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 4562 */ 4563 a_flags &= ~UPL_NOCOMMIT; 4564 a_pl_offset = 0; 4565 4566 /* 4567 * For V2 semantics, we want to take the cnode truncate lock 4568 * shared to guard against the file size changing via zero-filling. 4569 * 4570 * However, we have to be careful because we may be invoked 4571 * via the ubc_msync path to write out dirty mmap'd pages 4572 * in response to a lock event on a content-protected 4573 * filesystem (e.g. to write out class A files). 4574 * As a result, we want to take the truncate lock 'SHARED' with 4575 * the mini-recursion locktype so that we don't deadlock/panic 4576 * because we may be already holding the truncate lock exclusive to force any other 4577 * IOs to have blocked behind us. 4578 */ 4579 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4580 4581 if (a_flags & UPL_MSYNC) { 4582 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; 4583 } 4584 else { 4585 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; 4586 } 4587 4588 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 4589 4590 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 4591 retval = EINVAL; 4592 goto pageout_done; 4593 } 4594 } 4595 /* 4596 * from this point forward upl points at the UPL we're working with 4597 * it was either passed in or we succesfully created it 4598 */ 4599 4600 /* 4601 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own 4602 * UPL instead of relying on the UPL passed into us. We go ahead and do that here, 4603 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for 4604 * N dirty ranges in the UPL. Note that this is almost a direct copy of the 4605 * logic in vnode_pageout except that we need to do it after grabbing the truncate 4606 * lock in HFS so that we don't lock invert ourselves. 4607 * 4608 * Note that we can still get into this function on behalf of the default pager with 4609 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above 4610 * since fsync and other writing threads will grab the locks, then mark the 4611 * relevant pages as busy. But the pageout codepath marks the pages as busy, 4612 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So 4613 * we do not try to grab anything for the pre-V2 case, which should only be accessed 4614 * by the paging/VM system. 4615 */ 4616 4617 if (is_pageoutv2) { 4618 off_t f_offset; 4619 int offset; 4620 int isize; 4621 int pg_index; 4622 int error; 4623 int error_ret = 0; 4624 4625 isize = ap->a_size; 4626 f_offset = ap->a_f_offset; 4627 4628 /* 4629 * Scan from the back to find the last page in the UPL, so that we 4630 * aren't looking at a UPL that may have already been freed by the 4631 * preceding aborts/completions. 4632 */ 4633 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 4634 if (upl_page_present(pl, --pg_index)) 4635 break; 4636 if (pg_index == 0) { 4637 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 4638 goto pageout_done; 4639 } 4640 } 4641 4642 /* 4643 * initialize the offset variables before we touch the UPL. 4644 * a_f_offset is the position into the file, in bytes 4645 * offset is the position into the UPL, in bytes 4646 * pg_index is the pg# of the UPL we're operating on. 4647 * isize is the offset into the UPL of the last non-clean page. 4648 */ 4649 isize = ((pg_index + 1) * PAGE_SIZE); 4650 4651 offset = 0; 4652 pg_index = 0; 4653 4654 while (isize) { 4655 int xsize; 4656 int num_of_pages; 4657 4658 if ( !upl_page_present(pl, pg_index)) { 4659 /* 4660 * we asked for RET_ONLY_DIRTY, so it's possible 4661 * to get back empty slots in the UPL. 4662 * just skip over them 4663 */ 4664 f_offset += PAGE_SIZE; 4665 offset += PAGE_SIZE; 4666 isize -= PAGE_SIZE; 4667 pg_index++; 4668 4669 continue; 4670 } 4671 if ( !upl_dirty_page(pl, pg_index)) { 4672 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl); 4673 } 4674 4675 /* 4676 * We know that we have at least one dirty page. 4677 * Now checking to see how many in a row we have 4678 */ 4679 num_of_pages = 1; 4680 xsize = isize - PAGE_SIZE; 4681 4682 while (xsize) { 4683 if ( !upl_dirty_page(pl, pg_index + num_of_pages)) 4684 break; 4685 num_of_pages++; 4686 xsize -= PAGE_SIZE; 4687 } 4688 xsize = num_of_pages * PAGE_SIZE; 4689 4690 if (!vnode_isswap(vp)) { 4691 off_t end_of_range; 4692 int tooklock; 4693 4694 tooklock = 0; 4695 4696 if (cp->c_lockowner != current_thread()) { 4697 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 4698 /* 4699 * we're in the v2 path, so we are the 4700 * owner of the UPL... we may have already 4701 * processed some of the UPL, so abort it 4702 * from the current working offset to the 4703 * end of the UPL 4704 */ 4705 ubc_upl_abort_range(upl, 4706 offset, 4707 ap->a_size - offset, 4708 UPL_ABORT_FREE_ON_EMPTY); 4709 goto pageout_done; 4710 } 4711 tooklock = 1; 4712 } 4713 end_of_range = f_offset + xsize - 1; 4714 4715 if (end_of_range >= filesize) { 4716 end_of_range = (off_t)(filesize - 1); 4717 } 4718 if (f_offset < filesize) { 4719 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); 4720 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4721 } 4722 if (tooklock) { 4723 hfs_unlock(cp); 4724 } 4725 } 4726 if ((error = cluster_pageout(vp, upl, offset, f_offset, 4727 xsize, filesize, a_flags))) { 4728 if (error_ret == 0) 4729 error_ret = error; 4730 } 4731 f_offset += xsize; 4732 offset += xsize; 4733 isize -= xsize; 4734 pg_index += num_of_pages; 4735 } 4736 /* capture errnos bubbled out of cluster_pageout if they occurred */ 4737 if (error_ret != 0) { 4738 retval = error_ret; 4739 } 4740 } /* end block for v2 pageout behavior */ 4741 else { 4742 if (!vnode_isswap(vp)) { 4743 off_t end_of_range; 4744 int tooklock = 0; 4745 4746 if (cp->c_lockowner != current_thread()) { 4747 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 4748 if (!(a_flags & UPL_NOCOMMIT)) { 4749 ubc_upl_abort_range(upl, 4750 a_pl_offset, 4751 ap->a_size, 4752 UPL_ABORT_FREE_ON_EMPTY); 4753 } 4754 goto pageout_done; 4755 } 4756 tooklock = 1; 4757 } 4758 end_of_range = ap->a_f_offset + ap->a_size - 1; 4759 4760 if (end_of_range >= filesize) { 4761 end_of_range = (off_t)(filesize - 1); 4762 } 4763 if (ap->a_f_offset < filesize) { 4764 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); 4765 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4766 } 4767 4768 if (tooklock) { 4769 hfs_unlock(cp); 4770 } 4771 } 4772 /* 4773 * just call cluster_pageout for old pre-v2 behavior 4774 */ 4775 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset, 4776 ap->a_size, filesize, a_flags); 4777 } 4778 4779 /* 4780 * If data was written, update the modification time of the file 4781 * but only if it's mapped writable; we will have touched the 4782 * modifcation time for direct writes. 4783 */ 4784 if (retval == 0 && (ubc_is_mapped_writable(vp) 4785 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) { 4786 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 4787 4788 // Check again with lock 4789 bool mapped_writable = ubc_is_mapped_writable(vp); 4790 if (mapped_writable 4791 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) { 4792 cp->c_touch_modtime = TRUE; 4793 cp->c_touch_chgtime = TRUE; 4794 4795 /* 4796 * We only need to increment the generation counter if 4797 * it's currently mapped writable because we incremented 4798 * the counter in hfs_vnop_mnomap. 4799 */ 4800 if (mapped_writable) 4801 hfs_incr_gencount(VTOC(vp)); 4802 4803 /* 4804 * If setuid or setgid bits are set and this process is 4805 * not the superuser then clear the setuid and setgid bits 4806 * as a precaution against tampering. 4807 */ 4808 if ((cp->c_mode & (S_ISUID | S_ISGID)) && 4809 (vfs_context_suser(ap->a_context) != 0)) { 4810 cp->c_mode &= ~(S_ISUID | S_ISGID); 4811 } 4812 } 4813 4814 hfs_unlock(cp); 4815 } 4816 4817pageout_done: 4818 if (is_pageoutv2) { 4819 /* 4820 * Release the truncate lock. Note that because 4821 * we may have taken the lock recursively by 4822 * being invoked via ubc_msync due to lockdown, 4823 * we should release it recursively, too. 4824 */ 4825 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4826 } 4827 return (retval); 4828} 4829 4830/* 4831 * Intercept B-Tree node writes to unswap them if necessary. 4832 */ 4833int 4834hfs_vnop_bwrite(struct vnop_bwrite_args *ap) 4835{ 4836 int retval = 0; 4837 register struct buf *bp = ap->a_bp; 4838 register struct vnode *vp = buf_vnode(bp); 4839 BlockDescriptor block; 4840 4841 /* Trap B-Tree writes */ 4842 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || 4843 (VTOC(vp)->c_fileid == kHFSCatalogFileID) || 4844 (VTOC(vp)->c_fileid == kHFSAttributesFileID) || 4845 (vp == VTOHFS(vp)->hfc_filevp)) { 4846 4847 /* 4848 * Swap and validate the node if it is in native byte order. 4849 * This is always be true on big endian, so we always validate 4850 * before writing here. On little endian, the node typically has 4851 * been swapped and validated when it was written to the journal, 4852 * so we won't do anything here. 4853 */ 4854 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) { 4855 /* Prepare the block pointer */ 4856 block.blockHeader = bp; 4857 block.buffer = (char *)buf_dataptr(bp); 4858 block.blockNum = buf_lblkno(bp); 4859 /* not found in cache ==> came from disk */ 4860 block.blockReadFromDisk = (buf_fromcache(bp) == 0); 4861 block.blockSize = buf_count(bp); 4862 4863 /* Endian un-swap B-Tree node */ 4864 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false); 4865 if (retval) 4866 panic("hfs_vnop_bwrite: about to write corrupt node!\n"); 4867 } 4868 } 4869 4870 /* This buffer shouldn't be locked anymore but if it is clear it */ 4871 if ((buf_flags(bp) & B_LOCKED)) { 4872 // XXXdbg 4873 if (VTOHFS(vp)->jnl) { 4874 panic("hfs: CLEARING the lock bit on bp %p\n", bp); 4875 } 4876 buf_clearflags(bp, B_LOCKED); 4877 } 4878 retval = vn_bwrite (ap); 4879 4880 return (retval); 4881} 4882 4883/* 4884 * Relocate a file to a new location on disk 4885 * cnode must be locked on entry 4886 * 4887 * Relocation occurs by cloning the file's data from its 4888 * current set of blocks to a new set of blocks. During 4889 * the relocation all of the blocks (old and new) are 4890 * owned by the file. 4891 * 4892 * ----------------- 4893 * |///////////////| 4894 * ----------------- 4895 * 0 N (file offset) 4896 * 4897 * ----------------- ----------------- 4898 * |///////////////| | | STEP 1 (acquire new blocks) 4899 * ----------------- ----------------- 4900 * 0 N N+1 2N 4901 * 4902 * ----------------- ----------------- 4903 * |///////////////| |///////////////| STEP 2 (clone data) 4904 * ----------------- ----------------- 4905 * 0 N N+1 2N 4906 * 4907 * ----------------- 4908 * |///////////////| STEP 3 (head truncate blocks) 4909 * ----------------- 4910 * 0 N 4911 * 4912 * During steps 2 and 3 page-outs to file offsets less 4913 * than or equal to N are suspended. 4914 * 4915 * During step 3 page-ins to the file get suspended. 4916 */ 4917int 4918hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, 4919 struct proc *p) 4920{ 4921 struct cnode *cp; 4922 struct filefork *fp; 4923 struct hfsmount *hfsmp; 4924 u_int32_t headblks; 4925 u_int32_t datablks; 4926 u_int32_t blksize; 4927 u_int32_t growsize; 4928 u_int32_t nextallocsave; 4929 daddr64_t sector_a, sector_b; 4930 int eflags; 4931 off_t newbytes; 4932 int retval; 4933 int lockflags = 0; 4934 int took_trunc_lock = 0; 4935 int started_tr = 0; 4936 enum vtype vnodetype; 4937 4938 vnodetype = vnode_vtype(vp); 4939 if (vnodetype != VREG) { 4940 /* Not allowed to move symlinks. */ 4941 return (EPERM); 4942 } 4943 4944 hfsmp = VTOHFS(vp); 4945 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) { 4946 return (ENOSPC); 4947 } 4948 4949 cp = VTOC(vp); 4950 fp = VTOF(vp); 4951 if (fp->ff_unallocblocks) 4952 return (EINVAL); 4953 4954#if CONFIG_PROTECT 4955 /* 4956 * <rdar://problem/9118426> 4957 * Disable HFS file relocation on content-protected filesystems 4958 */ 4959 if (cp_fs_protected (hfsmp->hfs_mp)) { 4960 return EINVAL; 4961 } 4962#endif 4963 /* If it's an SSD, also disable HFS relocation */ 4964 if (hfsmp->hfs_flags & HFS_SSD) { 4965 return EINVAL; 4966 } 4967 4968 4969 blksize = hfsmp->blockSize; 4970 if (blockHint == 0) 4971 blockHint = hfsmp->nextAllocation; 4972 4973 if (fp->ff_size > 0x7fffffff) { 4974 return (EFBIG); 4975 } 4976 4977 // 4978 // We do not believe that this call to hfs_fsync() is 4979 // necessary and it causes a journal transaction 4980 // deadlock so we are removing it. 4981 // 4982 //if (vnodetype == VREG && !vnode_issystem(vp)) { 4983 // retval = hfs_fsync(vp, MNT_WAIT, 0, p); 4984 // if (retval) 4985 // return (retval); 4986 //} 4987 4988 if (!vnode_issystem(vp) && (vnodetype != VLNK)) { 4989 hfs_unlock(cp); 4990 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 4991 /* Force lock since callers expects lock to be held. */ 4992 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) { 4993 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4994 return (retval); 4995 } 4996 /* No need to continue if file was removed. */ 4997 if (cp->c_flag & C_NOEXISTS) { 4998 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4999 return (ENOENT); 5000 } 5001 took_trunc_lock = 1; 5002 } 5003 headblks = fp->ff_blocks; 5004 datablks = howmany(fp->ff_size, blksize); 5005 growsize = datablks * blksize; 5006 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; 5007 if (blockHint >= hfsmp->hfs_metazone_start && 5008 blockHint <= hfsmp->hfs_metazone_end) 5009 eflags |= kEFMetadataMask; 5010 5011 if (hfs_start_transaction(hfsmp) != 0) { 5012 if (took_trunc_lock) 5013 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5014 return (EINVAL); 5015 } 5016 started_tr = 1; 5017 /* 5018 * Protect the extents b-tree and the allocation bitmap 5019 * during MapFileBlockC and ExtendFileC operations. 5020 */ 5021 lockflags = SFL_BITMAP; 5022 if (overflow_extents(fp)) 5023 lockflags |= SFL_EXTENTS; 5024 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5025 5026 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, §or_a, NULL); 5027 if (retval) { 5028 retval = MacToVFSError(retval); 5029 goto out; 5030 } 5031 5032 /* 5033 * STEP 1 - acquire new allocation blocks. 5034 */ 5035 nextallocsave = hfsmp->nextAllocation; 5036 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes); 5037 if (eflags & kEFMetadataMask) { 5038 hfs_lock_mount(hfsmp); 5039 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave); 5040 MarkVCBDirty(hfsmp); 5041 hfs_unlock_mount(hfsmp); 5042 } 5043 5044 retval = MacToVFSError(retval); 5045 if (retval == 0) { 5046 cp->c_flag |= C_MODIFIED; 5047 if (newbytes < growsize) { 5048 retval = ENOSPC; 5049 goto restore; 5050 } else if (fp->ff_blocks < (headblks + datablks)) { 5051 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN); 5052 retval = ENOSPC; 5053 goto restore; 5054 } 5055 5056 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, §or_b, NULL); 5057 if (retval) { 5058 retval = MacToVFSError(retval); 5059 } else if ((sector_a + 1) == sector_b) { 5060 retval = ENOSPC; 5061 goto restore; 5062 } else if ((eflags & kEFMetadataMask) && 5063 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) > 5064 hfsmp->hfs_metazone_end)) { 5065#if 0 5066 const char * filestr; 5067 char emptystr = '\0'; 5068 5069 if (cp->c_desc.cd_nameptr != NULL) { 5070 filestr = (const char *)&cp->c_desc.cd_nameptr[0]; 5071 } else if (vnode_name(vp) != NULL) { 5072 filestr = vnode_name(vp); 5073 } else { 5074 filestr = &emptystr; 5075 } 5076#endif 5077 retval = ENOSPC; 5078 goto restore; 5079 } 5080 } 5081 /* Done with system locks and journal for now. */ 5082 hfs_systemfile_unlock(hfsmp, lockflags); 5083 lockflags = 0; 5084 hfs_end_transaction(hfsmp); 5085 started_tr = 0; 5086 5087 if (retval) { 5088 /* 5089 * Check to see if failure is due to excessive fragmentation. 5090 */ 5091 if ((retval == ENOSPC) && 5092 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) { 5093 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; 5094 } 5095 goto out; 5096 } 5097 /* 5098 * STEP 2 - clone file data into the new allocation blocks. 5099 */ 5100 5101 if (vnodetype == VLNK) 5102 retval = EPERM; 5103 else if (vnode_issystem(vp)) 5104 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); 5105 else 5106 retval = hfs_clonefile(vp, headblks, datablks, blksize); 5107 5108 /* Start transaction for step 3 or for a restore. */ 5109 if (hfs_start_transaction(hfsmp) != 0) { 5110 retval = EINVAL; 5111 goto out; 5112 } 5113 started_tr = 1; 5114 if (retval) 5115 goto restore; 5116 5117 /* 5118 * STEP 3 - switch to cloned data and remove old blocks. 5119 */ 5120 lockflags = SFL_BITMAP; 5121 if (overflow_extents(fp)) 5122 lockflags |= SFL_EXTENTS; 5123 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5124 5125 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks); 5126 5127 hfs_systemfile_unlock(hfsmp, lockflags); 5128 lockflags = 0; 5129 if (retval) 5130 goto restore; 5131out: 5132 if (took_trunc_lock) 5133 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5134 5135 if (lockflags) { 5136 hfs_systemfile_unlock(hfsmp, lockflags); 5137 lockflags = 0; 5138 } 5139 5140 /* Push cnode's new extent data to disk. */ 5141 if (retval == 0) { 5142 (void) hfs_update(vp, MNT_WAIT); 5143 } 5144 if (hfsmp->jnl) { 5145 if (cp->c_cnid < kHFSFirstUserCatalogNodeID) 5146 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 5147 else 5148 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); 5149 } 5150exit: 5151 if (started_tr) 5152 hfs_end_transaction(hfsmp); 5153 5154 return (retval); 5155 5156restore: 5157 if (fp->ff_blocks == headblks) { 5158 if (took_trunc_lock) 5159 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5160 goto exit; 5161 } 5162 /* 5163 * Give back any newly allocated space. 5164 */ 5165 if (lockflags == 0) { 5166 lockflags = SFL_BITMAP; 5167 if (overflow_extents(fp)) 5168 lockflags |= SFL_EXTENTS; 5169 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5170 } 5171 5172 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 5173 FTOC(fp)->c_fileid, false); 5174 5175 hfs_systemfile_unlock(hfsmp, lockflags); 5176 lockflags = 0; 5177 5178 if (took_trunc_lock) 5179 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5180 goto exit; 5181} 5182 5183 5184/* 5185 * Clone a file's data within the file. 5186 * 5187 */ 5188static int 5189hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) 5190{ 5191 caddr_t bufp; 5192 size_t bufsize; 5193 size_t copysize; 5194 size_t iosize; 5195 size_t offset; 5196 off_t writebase; 5197 uio_t auio; 5198 int error = 0; 5199 5200 writebase = blkstart * blksize; 5201 copysize = blkcnt * blksize; 5202 iosize = bufsize = MIN(copysize, 128 * 1024); 5203 offset = 0; 5204 5205 hfs_unlock(VTOC(vp)); 5206 5207#if CONFIG_PROTECT 5208 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 5209 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5210 return (error); 5211 } 5212#endif /* CONFIG_PROTECT */ 5213 5214 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 5215 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5216 return (ENOMEM); 5217 } 5218 5219 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); 5220 5221 while (offset < copysize) { 5222 iosize = MIN(copysize - offset, iosize); 5223 5224 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); 5225 uio_addiov(auio, (uintptr_t)bufp, iosize); 5226 5227 error = cluster_read(vp, auio, copysize, IO_NOCACHE); 5228 if (error) { 5229 printf("hfs_clonefile: cluster_read failed - %d\n", error); 5230 break; 5231 } 5232 if (uio_resid(auio) != 0) { 5233 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio)); 5234 error = EIO; 5235 break; 5236 } 5237 5238 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE); 5239 uio_addiov(auio, (uintptr_t)bufp, iosize); 5240 5241 error = cluster_write(vp, auio, writebase + offset, 5242 writebase + offset + iosize, 5243 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); 5244 if (error) { 5245 printf("hfs_clonefile: cluster_write failed - %d\n", error); 5246 break; 5247 } 5248 if (uio_resid(auio) != 0) { 5249 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); 5250 error = EIO; 5251 break; 5252 } 5253 offset += iosize; 5254 } 5255 uio_free(auio); 5256 5257 if ((blksize & PAGE_MASK)) { 5258 /* 5259 * since the copy may not have started on a PAGE 5260 * boundary (or may not have ended on one), we 5261 * may have pages left in the cache since NOCACHE 5262 * will let partially written pages linger... 5263 * lets just flush the entire range to make sure 5264 * we don't have any pages left that are beyond 5265 * (or intersect) the real LEOF of this file 5266 */ 5267 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); 5268 } else { 5269 /* 5270 * No need to call ubc_msync or hfs_invalbuf 5271 * since the file was copied using IO_NOCACHE and 5272 * the copy was done starting and ending on a page 5273 * boundary in the file. 5274 */ 5275 } 5276 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 5277 5278 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5279 return (error); 5280} 5281 5282/* 5283 * Clone a system (metadata) file. 5284 * 5285 */ 5286static int 5287hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, 5288 kauth_cred_t cred, struct proc *p) 5289{ 5290 caddr_t bufp; 5291 char * offset; 5292 size_t bufsize; 5293 size_t iosize; 5294 struct buf *bp = NULL; 5295 daddr64_t blkno; 5296 daddr64_t blk; 5297 daddr64_t start_blk; 5298 daddr64_t last_blk; 5299 int breadcnt; 5300 int i; 5301 int error = 0; 5302 5303 5304 iosize = GetLogicalBlockSize(vp); 5305 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); 5306 breadcnt = bufsize / iosize; 5307 5308 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 5309 return (ENOMEM); 5310 } 5311 start_blk = ((daddr64_t)blkstart * blksize) / iosize; 5312 last_blk = ((daddr64_t)blkcnt * blksize) / iosize; 5313 blkno = 0; 5314 5315 while (blkno < last_blk) { 5316 /* 5317 * Read up to a megabyte 5318 */ 5319 offset = bufp; 5320 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) { 5321 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp); 5322 if (error) { 5323 printf("hfs_clonesysfile: meta_bread error %d\n", error); 5324 goto out; 5325 } 5326 if (buf_count(bp) != iosize) { 5327 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp)); 5328 goto out; 5329 } 5330 bcopy((char *)buf_dataptr(bp), offset, iosize); 5331 5332 buf_markinvalid(bp); 5333 buf_brelse(bp); 5334 bp = NULL; 5335 5336 offset += iosize; 5337 } 5338 5339 /* 5340 * Write up to a megabyte 5341 */ 5342 offset = bufp; 5343 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) { 5344 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META); 5345 if (bp == NULL) { 5346 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno); 5347 error = EIO; 5348 goto out; 5349 } 5350 bcopy(offset, (char *)buf_dataptr(bp), iosize); 5351 error = (int)buf_bwrite(bp); 5352 bp = NULL; 5353 if (error) 5354 goto out; 5355 offset += iosize; 5356 } 5357 } 5358out: 5359 if (bp) { 5360 buf_brelse(bp); 5361 } 5362 5363 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 5364 5365 error = hfs_fsync(vp, MNT_WAIT, 0, p); 5366 5367 return (error); 5368} 5369