1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* @(#)hfs_readwrite.c 1.0 29 * 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved 31 * 32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files. 33 * 34 */ 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/resourcevar.h> 39#include <sys/kernel.h> 40#include <sys/fcntl.h> 41#include <sys/filedesc.h> 42#include <sys/stat.h> 43#include <sys/buf.h> 44#include <sys/buf_internal.h> 45#include <sys/proc.h> 46#include <sys/kauth.h> 47#include <sys/vnode.h> 48#include <sys/vnode_internal.h> 49#include <sys/uio.h> 50#include <sys/vfs_context.h> 51#include <sys/fsevents.h> 52#include <kern/kalloc.h> 53#include <sys/disk.h> 54#include <sys/sysctl.h> 55#include <sys/fsctl.h> 56#include <sys/mount_internal.h> 57 58#include <miscfs/specfs/specdev.h> 59 60#include <sys/ubc.h> 61#include <sys/ubc_internal.h> 62 63#include <vm/vm_pageout.h> 64#include <vm/vm_kern.h> 65 66#include <sys/kdebug.h> 67 68#include "hfs.h" 69#include "hfs_attrlist.h" 70#include "hfs_endian.h" 71#include "hfs_fsctl.h" 72#include "hfs_quota.h" 73#include "hfscommon/headers/FileMgrInternal.h" 74#include "hfscommon/headers/BTreesInternal.h" 75#include "hfs_cnode.h" 76#include "hfs_dbg.h" 77 78#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2))) 79 80enum { 81 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */ 82}; 83 84/* from bsd/hfs/hfs_vfsops.c */ 85extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); 86 87static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); 88static int hfs_clonefile(struct vnode *, int, int, int); 89static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); 90static int hfs_minorupdate(struct vnode *vp); 91static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); 92 93 94int flush_cache_on_write = 0; 95SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); 96 97/* 98 * Read data from a file. 99 */ 100int 101hfs_vnop_read(struct vnop_read_args *ap) 102{ 103 /* 104 struct vnop_read_args { 105 struct vnodeop_desc *a_desc; 106 vnode_t a_vp; 107 struct uio *a_uio; 108 int a_ioflag; 109 vfs_context_t a_context; 110 }; 111 */ 112 113 uio_t uio = ap->a_uio; 114 struct vnode *vp = ap->a_vp; 115 struct cnode *cp; 116 struct filefork *fp; 117 struct hfsmount *hfsmp; 118 off_t filesize; 119 off_t filebytes; 120 off_t start_resid = uio_resid(uio); 121 off_t offset = uio_offset(uio); 122 int retval = 0; 123 int took_truncate_lock = 0; 124 int io_throttle = 0; 125 126 /* Preflight checks */ 127 if (!vnode_isreg(vp)) { 128 /* can only read regular files */ 129 if (vnode_isdir(vp)) 130 return (EISDIR); 131 else 132 return (EPERM); 133 } 134 if (start_resid == 0) 135 return (0); /* Nothing left to do */ 136 if (offset < 0) 137 return (EINVAL); /* cant read from a negative offset */ 138 139#if HFS_COMPRESSION 140 if (VNODE_IS_RSRC(vp)) { 141 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ 142 return 0; 143 } 144 /* otherwise read the resource fork normally */ 145 } else { 146 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 147 if (compressed) { 148 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); 149 if (compressed) { 150 if (retval == 0) { 151 /* successful read, update the access time */ 152 VTOC(vp)->c_touch_acctime = TRUE; 153 154 /* compressed files are not hot file candidates */ 155 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 156 VTOF(vp)->ff_bytesread = 0; 157 } 158 } 159 return retval; 160 } 161 /* otherwise the file was converted back to a regular file while we were reading it */ 162 retval = 0; 163 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 164 int error; 165 166 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); 167 if (error) { 168 return error; 169 } 170 171 } 172 } 173#endif /* HFS_COMPRESSION */ 174 175 cp = VTOC(vp); 176 fp = VTOF(vp); 177 hfsmp = VTOHFS(vp); 178 179#if CONFIG_PROTECT 180 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) { 181 goto exit; 182 } 183#endif 184 185 /* 186 * If this read request originated from a syscall (as opposed to 187 * an in-kernel page fault or something), then set it up for 188 * throttle checks. For example, large EAs may cause a VNOP_READ 189 * to occur, and we wouldn't want to throttle I/O while holding the 190 * EA B-Tree lock. 191 */ 192 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) { 193 io_throttle = IO_RETURN_ON_THROTTLE; 194 } 195 196read_again: 197 198 /* Protect against a size change. */ 199 hfs_lock_truncate(cp, HFS_SHARED_LOCK); 200 took_truncate_lock = 1; 201 202 filesize = fp->ff_size; 203 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 204 if (offset > filesize) { 205 if ((hfsmp->hfs_flags & HFS_STANDARD) && 206 (offset > (off_t)MAXHFSFILESIZE)) { 207 retval = EFBIG; 208 } 209 goto exit; 210 } 211 212 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, 213 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 214 215 retval = cluster_read(vp, uio, filesize, ap->a_ioflag | (io_throttle)); 216 217 cp->c_touch_acctime = TRUE; 218 219 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, 220 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 221 222 /* 223 * Keep track blocks read 224 */ 225 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) { 226 int took_cnode_lock = 0; 227 off_t bytesread; 228 229 bytesread = start_resid - uio_resid(uio); 230 231 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 232 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { 233 hfs_lock(cp, HFS_FORCE_LOCK); 234 took_cnode_lock = 1; 235 } 236 /* 237 * If this file hasn't been seen since the start of 238 * the current sampling period then start over. 239 */ 240 if (cp->c_atime < hfsmp->hfc_timebase) { 241 struct timeval tv; 242 243 fp->ff_bytesread = bytesread; 244 microtime(&tv); 245 cp->c_atime = tv.tv_sec; 246 } else { 247 fp->ff_bytesread += bytesread; 248 } 249 if (took_cnode_lock) 250 hfs_unlock(cp); 251 } 252exit: 253 if (took_truncate_lock) { 254 hfs_unlock_truncate(cp, 0); 255 } 256 if (retval == EAGAIN) { 257 throttle_lowpri_io(1); 258 259 retval = 0; 260 goto read_again; 261 } 262 return (retval); 263} 264 265/* 266 * Write data to a file. 267 */ 268int 269hfs_vnop_write(struct vnop_write_args *ap) 270{ 271 uio_t uio = ap->a_uio; 272 struct vnode *vp = ap->a_vp; 273 struct cnode *cp; 274 struct filefork *fp; 275 struct hfsmount *hfsmp; 276 kauth_cred_t cred = NULL; 277 off_t origFileSize; 278 off_t writelimit; 279 off_t bytesToAdd = 0; 280 off_t actualBytesAdded; 281 off_t filebytes; 282 off_t offset; 283 ssize_t resid; 284 int eflags; 285 int ioflag = ap->a_ioflag; 286 int retval = 0; 287 int lockflags; 288 int cnode_locked = 0; 289 int partialwrite = 0; 290 int do_snapshot = 1; 291 time_t orig_ctime=VTOC(vp)->c_ctime; 292 int took_truncate_lock = 0; 293 int io_return_on_throttle = 0; 294 struct rl_entry *invalid_range; 295 296#if HFS_COMPRESSION 297 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 298 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 299 switch(state) { 300 case FILE_IS_COMPRESSED: 301 return EACCES; 302 case FILE_IS_CONVERTING: 303 /* if FILE_IS_CONVERTING, we allow writes but do not 304 bother with snapshots or else we will deadlock. 305 */ 306 do_snapshot = 0; 307 break; 308 default: 309 printf("invalid state %d for compressed file\n", state); 310 /* fall through */ 311 } 312 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 313 int error; 314 315 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP); 316 if (error != 0) { 317 return error; 318 } 319 } 320 321 if (do_snapshot) { 322 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio); 323 } 324 325#endif 326 327 // LP64todo - fix this! uio_resid may be 64-bit value 328 resid = uio_resid(uio); 329 offset = uio_offset(uio); 330 331 if (offset < 0) 332 return (EINVAL); 333 if (resid == 0) 334 return (E_NONE); 335 if (!vnode_isreg(vp)) 336 return (EPERM); /* Can only write regular files */ 337 338 cp = VTOC(vp); 339 fp = VTOF(vp); 340 hfsmp = VTOHFS(vp); 341 342#if CONFIG_PROTECT 343 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) { 344 goto exit; 345 } 346#endif 347 348 eflags = kEFDeferMask; /* defer file block allocations */ 349#if HFS_SPARSE_DEV 350 /* 351 * When the underlying device is sparse and space 352 * is low (< 8MB), stop doing delayed allocations 353 * and begin doing synchronous I/O. 354 */ 355 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 356 (hfs_freeblks(hfsmp, 0) < 2048)) { 357 eflags &= ~kEFDeferMask; 358 ioflag |= IO_SYNC; 359 } 360#endif /* HFS_SPARSE_DEV */ 361 362 if ((ioflag & (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) == 363 (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) { 364 io_return_on_throttle = IO_RETURN_ON_THROTTLE; 365 } 366again: 367 /* Protect against a size change. */ 368 /* 369 * Protect against a size change. 370 * 371 * Note: If took_truncate_lock is true, then we previously got the lock shared 372 * but needed to upgrade to exclusive. So try getting it exclusive from the 373 * start. 374 */ 375 if (ioflag & IO_APPEND || took_truncate_lock) { 376 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); 377 } 378 else { 379 hfs_lock_truncate(cp, HFS_SHARED_LOCK); 380 } 381 took_truncate_lock = 1; 382 383 /* Update UIO */ 384 if (ioflag & IO_APPEND) { 385 uio_setoffset(uio, fp->ff_size); 386 offset = fp->ff_size; 387 } 388 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) { 389 retval = EPERM; 390 goto exit; 391 } 392 393 origFileSize = fp->ff_size; 394 writelimit = offset + resid; 395 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 396 397 /* 398 * We may need an exclusive truncate lock for several reasons, all 399 * of which are because we may be writing to a (portion of a) block 400 * for the first time, and we need to make sure no readers see the 401 * prior, uninitialized contents of the block. The cases are: 402 * 403 * 1. We have unallocated (delayed allocation) blocks. We may be 404 * allocating new blocks to the file and writing to them. 405 * (A more precise check would be whether the range we're writing 406 * to contains delayed allocation blocks.) 407 * 2. We need to extend the file. The bytes between the old EOF 408 * and the new EOF are not yet initialized. This is important 409 * even if we're not allocating new blocks to the file. If the 410 * old EOF and new EOF are in the same block, we still need to 411 * protect that range of bytes until they are written for the 412 * first time. 413 * 3. The write overlaps some invalid ranges (delayed zero fill; that 414 * part of the file has been allocated, but not yet written). 415 * 416 * If we had a shared lock with the above cases, we need to try to upgrade 417 * to an exclusive lock. If the upgrade fails, we will lose the shared 418 * lock, and will need to take the truncate lock again; the took_truncate_lock 419 * flag will still be set, causing us to try for an exclusive lock next time. 420 * 421 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode 422 * lock is held, since it protects the range lists. 423 */ 424 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 425 ((fp->ff_unallocblocks != 0) || 426 (writelimit > origFileSize))) { 427 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { 428 /* 429 * Lock upgrade failed and we lost our shared lock, try again. 430 * Note: we do not set took_truncate_lock=0 here. Leaving it 431 * set to 1 will cause us to try to get the lock exclusive. 432 */ 433 goto again; 434 } 435 else { 436 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */ 437 cp->c_truncatelockowner = current_thread(); 438 } 439 } 440 441 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { 442 goto exit; 443 } 444 cnode_locked = 1; 445 446 /* 447 * Now that we have the cnode lock, see if there are delayed zero fill ranges 448 * overlapping our write. If so, we need the truncate lock exclusive (see above). 449 */ 450 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 451 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) { 452 /* 453 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes 454 * a deadlock, rather than simply returning failure. (That is, it apparently does 455 * not behave like a "try_lock"). Since this condition is rare, just drop the 456 * cnode lock and try again. Since took_truncate_lock is set, we will 457 * automatically take the truncate lock exclusive. 458 */ 459 hfs_unlock(cp); 460 cnode_locked = 0; 461 hfs_unlock_truncate(cp, 0); 462 goto again; 463 } 464 465 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, 466 (int)offset, uio_resid(uio), (int)fp->ff_size, 467 (int)filebytes, 0); 468 469 /* Check if we do not need to extend the file */ 470 if (writelimit <= filebytes) { 471 goto sizeok; 472 } 473 474 cred = vfs_context_ucred(ap->a_context); 475 bytesToAdd = writelimit - filebytes; 476 477#if QUOTA 478 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 479 cred, 0); 480 if (retval) 481 goto exit; 482#endif /* QUOTA */ 483 484 if (hfs_start_transaction(hfsmp) != 0) { 485 retval = EINVAL; 486 goto exit; 487 } 488 489 while (writelimit > filebytes) { 490 bytesToAdd = writelimit - filebytes; 491 if (cred && suser(cred, NULL) != 0) 492 eflags |= kEFReserveMask; 493 494 /* Protect extents b-tree and allocation bitmap */ 495 lockflags = SFL_BITMAP; 496 if (overflow_extents(fp)) 497 lockflags |= SFL_EXTENTS; 498 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 499 500 /* Files that are changing size are not hot file candidates. */ 501 if (hfsmp->hfc_stage == HFC_RECORDING) { 502 fp->ff_bytesread = 0; 503 } 504 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd, 505 0, eflags, &actualBytesAdded)); 506 507 hfs_systemfile_unlock(hfsmp, lockflags); 508 509 if ((actualBytesAdded == 0) && (retval == E_NONE)) 510 retval = ENOSPC; 511 if (retval != E_NONE) 512 break; 513 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 514 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE, 515 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 516 } 517 (void) hfs_update(vp, TRUE); 518 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 519 (void) hfs_end_transaction(hfsmp); 520 521 /* 522 * If we didn't grow the file enough try a partial write. 523 * POSIX expects this behavior. 524 */ 525 if ((retval == ENOSPC) && (filebytes > offset)) { 526 retval = 0; 527 partialwrite = 1; 528 uio_setresid(uio, (uio_resid(uio) - bytesToAdd)); 529 resid -= bytesToAdd; 530 writelimit = filebytes; 531 } 532sizeok: 533 if (retval == E_NONE) { 534 off_t filesize; 535 off_t zero_off; 536 off_t tail_off; 537 off_t inval_start; 538 off_t inval_end; 539 off_t io_start; 540 int lflag; 541 542 if (writelimit > fp->ff_size) 543 filesize = writelimit; 544 else 545 filesize = fp->ff_size; 546 547 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY); 548 549 if (offset <= fp->ff_size) { 550 zero_off = offset & ~PAGE_MASK_64; 551 552 /* Check to see whether the area between the zero_offset and the start 553 of the transfer to see whether is invalid and should be zero-filled 554 as part of the transfer: 555 */ 556 if (offset > zero_off) { 557 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP) 558 lflag |= IO_HEADZEROFILL; 559 } 560 } else { 561 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; 562 563 /* The bytes between fp->ff_size and uio->uio_offset must never be 564 read without being zeroed. The current last block is filled with zeroes 565 if it holds valid data but in all cases merely do a little bookkeeping 566 to track the area from the end of the current last page to the start of 567 the area actually written. For the same reason only the bytes up to the 568 start of the page where this write will start is invalidated; any remainder 569 before uio->uio_offset is explicitly zeroed as part of the cluster_write. 570 571 Note that inval_start, the start of the page after the current EOF, 572 may be past the start of the write, in which case the zeroing 573 will be handled by the cluser_write of the actual data. 574 */ 575 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 576 inval_end = offset & ~PAGE_MASK_64; 577 zero_off = fp->ff_size; 578 579 if ((fp->ff_size & PAGE_MASK_64) && 580 (rl_scan(&fp->ff_invalidranges, 581 eof_page_base, 582 fp->ff_size - 1, 583 &invalid_range) != RL_NOOVERLAP)) { 584 /* The page containing the EOF is not valid, so the 585 entire page must be made inaccessible now. If the write 586 starts on a page beyond the page containing the eof 587 (inval_end > eof_page_base), add the 588 whole page to the range to be invalidated. Otherwise 589 (i.e. if the write starts on the same page), zero-fill 590 the entire page explicitly now: 591 */ 592 if (inval_end > eof_page_base) { 593 inval_start = eof_page_base; 594 } else { 595 zero_off = eof_page_base; 596 }; 597 }; 598 599 if (inval_start < inval_end) { 600 struct timeval tv; 601 /* There's some range of data that's going to be marked invalid */ 602 603 if (zero_off < inval_start) { 604 /* The pages between inval_start and inval_end are going to be invalidated, 605 and the actual write will start on a page past inval_end. Now's the last 606 chance to zero-fill the page containing the EOF: 607 */ 608 hfs_unlock(cp); 609 cnode_locked = 0; 610 retval = cluster_write(vp, (uio_t) 0, 611 fp->ff_size, inval_start, 612 zero_off, (off_t)0, 613 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); 614 hfs_lock(cp, HFS_FORCE_LOCK); 615 cnode_locked = 1; 616 if (retval) goto ioerr_exit; 617 offset = uio_offset(uio); 618 }; 619 620 /* Mark the remaining area of the newly allocated space as invalid: */ 621 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges); 622 microuptime(&tv); 623 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 624 zero_off = fp->ff_size = inval_end; 625 }; 626 627 if (offset > zero_off) lflag |= IO_HEADZEROFILL; 628 }; 629 630 /* Check to see whether the area between the end of the write and the end of 631 the page it falls in is invalid and should be zero-filled as part of the transfer: 632 */ 633 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 634 if (tail_off > filesize) tail_off = filesize; 635 if (tail_off > writelimit) { 636 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) { 637 lflag |= IO_TAILZEROFILL; 638 }; 639 }; 640 641 /* 642 * if the write starts beyond the current EOF (possibly advanced in the 643 * zeroing of the last block, above), then we'll zero fill from the current EOF 644 * to where the write begins: 645 * 646 * NOTE: If (and ONLY if) the portion of the file about to be written is 647 * before the current EOF it might be marked as invalid now and must be 648 * made readable (removed from the invalid ranges) before cluster_write 649 * tries to write it: 650 */ 651 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset; 652 if (io_start < fp->ff_size) { 653 off_t io_end; 654 655 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; 656 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges); 657 }; 658 659 hfs_unlock(cp); 660 cnode_locked = 0; 661 662 /* 663 * We need to tell UBC the fork's new size BEFORE calling 664 * cluster_write, in case any of the new pages need to be 665 * paged out before cluster_write completes (which does happen 666 * in embedded systems due to extreme memory pressure). 667 * Similarly, we need to tell hfs_vnop_pageout what the new EOF 668 * will be, so that it can pass that on to cluster_pageout, and 669 * allow those pageouts. 670 * 671 * We don't update ff_size yet since we don't want pageins to 672 * be able to see uninitialized data between the old and new 673 * EOF, until cluster_write has completed and initialized that 674 * part of the file. 675 * 676 * The vnode pager relies on the file size last given to UBC via 677 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or 678 * ff_size (whichever is larger). NOTE: ff_new_size is always 679 * zero, unless we are extending the file via write. 680 */ 681 if (filesize > fp->ff_size) { 682 fp->ff_new_size = filesize; 683 ubc_setsize(vp, filesize); 684 } 685 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, 686 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle); 687 if (retval) { 688 fp->ff_new_size = 0; /* no longer extending; use ff_size */ 689 690 if (retval == EAGAIN) { 691 /* 692 * EAGAIN indicates that we still have I/O to do, but 693 * that we now need to be throttled 694 */ 695 if (resid != uio_resid(uio)) { 696 /* 697 * did manage to do some I/O before returning EAGAIN 698 */ 699 resid = uio_resid(uio); 700 offset = uio_offset(uio); 701 702 cp->c_touch_chgtime = TRUE; 703 cp->c_touch_modtime = TRUE; 704 } 705 if (filesize > fp->ff_size) { 706 /* 707 * we called ubc_setsize before the call to 708 * cluster_write... since we only partially 709 * completed the I/O, we need to 710 * re-adjust our idea of the filesize based 711 * on our interim EOF 712 */ 713 ubc_setsize(vp, offset); 714 715 fp->ff_size = offset; 716 } 717 goto exit; 718 } 719 if (filesize > origFileSize) { 720 ubc_setsize(vp, origFileSize); 721 } 722 goto ioerr_exit; 723 } 724 725 if (filesize > origFileSize) { 726 fp->ff_size = filesize; 727 728 /* Files that are changing size are not hot file candidates. */ 729 if (hfsmp->hfc_stage == HFC_RECORDING) { 730 fp->ff_bytesread = 0; 731 } 732 } 733 fp->ff_new_size = 0; /* ff_size now has the correct size */ 734 735 /* If we wrote some bytes, then touch the change and mod times */ 736 if (resid > uio_resid(uio)) { 737 cp->c_touch_chgtime = TRUE; 738 cp->c_touch_modtime = TRUE; 739 } 740 } 741 if (partialwrite) { 742 uio_setresid(uio, (uio_resid(uio) + bytesToAdd)); 743 resid += bytesToAdd; 744 } 745 746 // XXXdbg - see radar 4871353 for more info 747 { 748 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) { 749 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); 750 } 751 } 752 753ioerr_exit: 754 /* 755 * If we successfully wrote any data, and we are not the superuser 756 * we clear the setuid and setgid bits as a precaution against 757 * tampering. 758 */ 759 if (cp->c_mode & (S_ISUID | S_ISGID)) { 760 cred = vfs_context_ucred(ap->a_context); 761 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { 762 if (!cnode_locked) { 763 hfs_lock(cp, HFS_FORCE_LOCK); 764 cnode_locked = 1; 765 } 766 cp->c_mode &= ~(S_ISUID | S_ISGID); 767 } 768 } 769 if (retval) { 770 if (ioflag & IO_UNIT) { 771 if (!cnode_locked) { 772 hfs_lock(cp, HFS_FORCE_LOCK); 773 cnode_locked = 1; 774 } 775 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, 776 0, 0, ap->a_context); 777 // LP64todo - fix this! resid needs to by user_ssize_t 778 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); 779 uio_setresid(uio, resid); 780 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 781 } 782 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { 783 if (!cnode_locked) { 784 hfs_lock(cp, HFS_FORCE_LOCK); 785 cnode_locked = 1; 786 } 787 retval = hfs_update(vp, TRUE); 788 } 789 /* Updating vcbWrCnt doesn't need to be atomic. */ 790 hfsmp->vcbWrCnt++; 791 792 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, 793 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 794exit: 795 if (cnode_locked) 796 hfs_unlock(cp); 797 798 if (took_truncate_lock) { 799 hfs_unlock_truncate(cp, 0); 800 } 801 if (retval == EAGAIN) { 802 throttle_lowpri_io(1); 803 804 retval = 0; 805 goto again; 806 } 807 return (retval); 808} 809 810/* support for the "bulk-access" fcntl */ 811 812#define CACHE_LEVELS 16 813#define NUM_CACHE_ENTRIES (64*16) 814#define PARENT_IDS_FLAG 0x100 815 816struct access_cache { 817 int numcached; 818 int cachehits; /* these two for statistics gathering */ 819 int lookups; 820 unsigned int *acache; 821 unsigned char *haveaccess; 822}; 823 824struct access_t { 825 uid_t uid; /* IN: effective user id */ 826 short flags; /* IN: access requested (i.e. R_OK) */ 827 short num_groups; /* IN: number of groups user belongs to */ 828 int num_files; /* IN: number of files to process */ 829 int *file_ids; /* IN: array of file ids */ 830 gid_t *groups; /* IN: array of groups */ 831 short *access; /* OUT: access info for each file (0 for 'has access') */ 832} __attribute__((unavailable)); // this structure is for reference purposes only 833 834struct user32_access_t { 835 uid_t uid; /* IN: effective user id */ 836 short flags; /* IN: access requested (i.e. R_OK) */ 837 short num_groups; /* IN: number of groups user belongs to */ 838 int num_files; /* IN: number of files to process */ 839 user32_addr_t file_ids; /* IN: array of file ids */ 840 user32_addr_t groups; /* IN: array of groups */ 841 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 842}; 843 844struct user64_access_t { 845 uid_t uid; /* IN: effective user id */ 846 short flags; /* IN: access requested (i.e. R_OK) */ 847 short num_groups; /* IN: number of groups user belongs to */ 848 int num_files; /* IN: number of files to process */ 849 user64_addr_t file_ids; /* IN: array of file ids */ 850 user64_addr_t groups; /* IN: array of groups */ 851 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 852}; 853 854 855// these are the "extended" versions of the above structures 856// note that it is crucial that they be different sized than 857// the regular version 858struct ext_access_t { 859 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 860 uint32_t num_files; /* IN: number of files to process */ 861 uint32_t map_size; /* IN: size of the bit map */ 862 uint32_t *file_ids; /* IN: Array of file ids */ 863 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */ 864 short *access; /* OUT: access info for each file (0 for 'has access') */ 865 uint32_t num_parents; /* future use */ 866 cnid_t *parents; /* future use */ 867} __attribute__((unavailable)); // this structure is for reference purposes only 868 869struct user32_ext_access_t { 870 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 871 uint32_t num_files; /* IN: number of files to process */ 872 uint32_t map_size; /* IN: size of the bit map */ 873 user32_addr_t file_ids; /* IN: Array of file ids */ 874 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */ 875 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 876 uint32_t num_parents; /* future use */ 877 user32_addr_t parents; /* future use */ 878}; 879 880struct user64_ext_access_t { 881 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 882 uint32_t num_files; /* IN: number of files to process */ 883 uint32_t map_size; /* IN: size of the bit map */ 884 user64_addr_t file_ids; /* IN: array of file ids */ 885 user64_addr_t bitmap; /* IN: array of groups */ 886 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 887 uint32_t num_parents;/* future use */ 888 user64_addr_t parents;/* future use */ 889}; 890 891 892/* 893 * Perform a binary search for the given parent_id. Return value is 894 * the index if there is a match. If no_match_indexp is non-NULL it 895 * will be assigned with the index to insert the item (even if it was 896 * not found). 897 */ 898static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp) 899{ 900 int index=-1; 901 unsigned int lo=0; 902 903 do { 904 unsigned int mid = ((hi - lo)/2) + lo; 905 unsigned int this_id = array[mid]; 906 907 if (parent_id == this_id) { 908 hi = mid; 909 break; 910 } 911 912 if (parent_id < this_id) { 913 hi = mid; 914 continue; 915 } 916 917 if (parent_id > this_id) { 918 lo = mid + 1; 919 continue; 920 } 921 } while(lo < hi); 922 923 /* check if lo and hi converged on the match */ 924 if (parent_id == array[hi]) { 925 index = hi; 926 } 927 928 if (no_match_indexp) { 929 *no_match_indexp = hi; 930 } 931 932 return index; 933} 934 935 936static int 937lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) 938{ 939 unsigned int hi; 940 int matches = 0; 941 int index, no_match_index; 942 943 if (cache->numcached == 0) { 944 *indexp = 0; 945 return 0; // table is empty, so insert at index=0 and report no match 946 } 947 948 if (cache->numcached > NUM_CACHE_ENTRIES) { 949 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n", 950 cache->numcached, NUM_CACHE_ENTRIES);*/ 951 cache->numcached = NUM_CACHE_ENTRIES; 952 } 953 954 hi = cache->numcached - 1; 955 956 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index); 957 958 /* if no existing entry found, find index for new one */ 959 if (index == -1) { 960 index = no_match_index; 961 matches = 0; 962 } else { 963 matches = 1; 964 } 965 966 *indexp = index; 967 return matches; 968} 969 970/* 971 * Add a node to the access_cache at the given index (or do a lookup first 972 * to find the index if -1 is passed in). We currently do a replace rather 973 * than an insert if the cache is full. 974 */ 975static void 976add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) 977{ 978 int lookup_index = -1; 979 980 /* need to do a lookup first if -1 passed for index */ 981 if (index == -1) { 982 if (lookup_bucket(cache, &lookup_index, nodeID)) { 983 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) { 984 // only update an entry if the previous access was ESRCH (i.e. a scope checking error) 985 cache->haveaccess[lookup_index] = access; 986 } 987 988 /* mission accomplished */ 989 return; 990 } else { 991 index = lookup_index; 992 } 993 994 } 995 996 /* if the cache is full, do a replace rather than an insert */ 997 if (cache->numcached >= NUM_CACHE_ENTRIES) { 998 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index); 999 cache->numcached = NUM_CACHE_ENTRIES-1; 1000 1001 if (index > cache->numcached) { 1002 // printf("hfs: index %d pinned to %d\n", index, cache->numcached); 1003 index = cache->numcached; 1004 } 1005 } 1006 1007 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) { 1008 index++; 1009 } 1010 1011 if (index >= 0 && index < cache->numcached) { 1012 /* only do bcopy if we're inserting */ 1013 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) ); 1014 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) ); 1015 } 1016 1017 cache->acache[index] = nodeID; 1018 cache->haveaccess[index] = access; 1019 cache->numcached++; 1020} 1021 1022 1023struct cinfo { 1024 uid_t uid; 1025 gid_t gid; 1026 mode_t mode; 1027 cnid_t parentcnid; 1028 u_int16_t recflags; 1029}; 1030 1031static int 1032snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg) 1033{ 1034 struct cinfo *cip = (struct cinfo *)arg; 1035 1036 cip->uid = attrp->ca_uid; 1037 cip->gid = attrp->ca_gid; 1038 cip->mode = attrp->ca_mode; 1039 cip->parentcnid = descp->cd_parentcnid; 1040 cip->recflags = attrp->ca_recflags; 1041 1042 return (0); 1043} 1044 1045/* 1046 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item 1047 * isn't incore, then go to the catalog. 1048 */ 1049static int 1050do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 1051 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp) 1052{ 1053 int error = 0; 1054 1055 /* if this id matches the one the fsctl was called with, skip the lookup */ 1056 if (cnid == skip_cp->c_cnid) { 1057 cnattrp->ca_uid = skip_cp->c_uid; 1058 cnattrp->ca_gid = skip_cp->c_gid; 1059 cnattrp->ca_mode = skip_cp->c_mode; 1060 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; 1061 keyp->hfsPlus.parentID = skip_cp->c_parentcnid; 1062 } else { 1063 struct cinfo c_info; 1064 1065 /* otherwise, check the cnode hash incase the file/dir is incore */ 1066 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) { 1067 cnattrp->ca_uid = c_info.uid; 1068 cnattrp->ca_gid = c_info.gid; 1069 cnattrp->ca_mode = c_info.mode; 1070 cnattrp->ca_recflags = c_info.recflags; 1071 keyp->hfsPlus.parentID = c_info.parentcnid; 1072 } else { 1073 int lockflags; 1074 1075 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) 1076 throttle_lowpri_io(1); 1077 1078 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 1079 1080 /* lookup this cnid in the catalog */ 1081 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); 1082 1083 hfs_systemfile_unlock(hfsmp, lockflags); 1084 1085 cache->lookups++; 1086 } 1087 } 1088 1089 return (error); 1090} 1091 1092 1093/* 1094 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache 1095 * up to CACHE_LEVELS as we progress towards the root. 1096 */ 1097static int 1098do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 1099 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, 1100 struct vfs_context *my_context, 1101 char *bitmap, 1102 uint32_t map_size, 1103 cnid_t* parents, 1104 uint32_t num_parents) 1105{ 1106 int myErr = 0; 1107 int myResult; 1108 HFSCatalogNodeID thisNodeID; 1109 unsigned int myPerms; 1110 struct cat_attr cnattr; 1111 int cache_index = -1, scope_index = -1, scope_idx_start = -1; 1112 CatalogKey catkey; 1113 1114 int i = 0, ids_to_cache = 0; 1115 int parent_ids[CACHE_LEVELS]; 1116 1117 thisNodeID = nodeID; 1118 while (thisNodeID >= kRootDirID) { 1119 myResult = 0; /* default to "no access" */ 1120 1121 /* check the cache before resorting to hitting the catalog */ 1122 1123 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need 1124 * to look any further after hitting cached dir */ 1125 1126 if (lookup_bucket(cache, &cache_index, thisNodeID)) { 1127 cache->cachehits++; 1128 myErr = cache->haveaccess[cache_index]; 1129 if (scope_index != -1) { 1130 if (myErr == ESRCH) { 1131 myErr = 0; 1132 } 1133 } else { 1134 scope_index = 0; // so we'll just use the cache result 1135 scope_idx_start = ids_to_cache; 1136 } 1137 myResult = (myErr == 0) ? 1 : 0; 1138 goto ExitThisRoutine; 1139 } 1140 1141 1142 if (parents) { 1143 int tmp; 1144 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL); 1145 if (scope_index == -1) 1146 scope_index = tmp; 1147 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) { 1148 scope_idx_start = ids_to_cache; 1149 } 1150 } 1151 1152 /* remember which parents we want to cache */ 1153 if (ids_to_cache < CACHE_LEVELS) { 1154 parent_ids[ids_to_cache] = thisNodeID; 1155 ids_to_cache++; 1156 } 1157 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"... 1158 if (bitmap && map_size) { 1159 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7)); 1160 } 1161 1162 1163 /* do the lookup (checks the cnode hash, then the catalog) */ 1164 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr); 1165 if (myErr) { 1166 goto ExitThisRoutine; /* no access */ 1167 } 1168 1169 /* Root always gets access. */ 1170 if (suser(myp_ucred, NULL) == 0) { 1171 thisNodeID = catkey.hfsPlus.parentID; 1172 myResult = 1; 1173 continue; 1174 } 1175 1176 // if the thing has acl's, do the full permission check 1177 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1178 struct vnode *vp; 1179 1180 /* get the vnode for this cnid */ 1181 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0); 1182 if ( myErr ) { 1183 myResult = 0; 1184 goto ExitThisRoutine; 1185 } 1186 1187 thisNodeID = VTOC(vp)->c_parentcnid; 1188 1189 hfs_unlock(VTOC(vp)); 1190 1191 if (vnode_vtype(vp) == VDIR) { 1192 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context); 1193 } else { 1194 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context); 1195 } 1196 1197 vnode_put(vp); 1198 if (myErr) { 1199 myResult = 0; 1200 goto ExitThisRoutine; 1201 } 1202 } else { 1203 unsigned int flags; 1204 int mode = cnattr.ca_mode & S_IFMT; 1205 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr); 1206 1207 if (mode == S_IFDIR) { 1208 flags = R_OK | X_OK; 1209 } else { 1210 flags = R_OK; 1211 } 1212 if ( (myPerms & flags) != flags) { 1213 myResult = 0; 1214 myErr = EACCES; 1215 goto ExitThisRoutine; /* no access */ 1216 } 1217 1218 /* up the hierarchy we go */ 1219 thisNodeID = catkey.hfsPlus.parentID; 1220 } 1221 } 1222 1223 /* if here, we have access to this node */ 1224 myResult = 1; 1225 1226 ExitThisRoutine: 1227 if (parents && myErr == 0 && scope_index == -1) { 1228 myErr = ESRCH; 1229 } 1230 1231 if (myErr) { 1232 myResult = 0; 1233 } 1234 *err = myErr; 1235 1236 /* cache the parent directory(ies) */ 1237 for (i = 0; i < ids_to_cache; i++) { 1238 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) { 1239 add_node(cache, -1, parent_ids[i], ESRCH); 1240 } else { 1241 add_node(cache, -1, parent_ids[i], myErr); 1242 } 1243 } 1244 1245 return (myResult); 1246} 1247 1248static int 1249do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, 1250 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context) 1251{ 1252 boolean_t is64bit; 1253 1254 /* 1255 * NOTE: on entry, the vnode has an io_ref. In case this vnode 1256 * happens to be in our list of file_ids, we'll note it 1257 * avoid calling hfs_chashget_nowait() on that id as that 1258 * will cause a "locking against myself" panic. 1259 */ 1260 Boolean check_leaf = true; 1261 1262 struct user64_ext_access_t *user_access_structp; 1263 struct user64_ext_access_t tmp_user_access; 1264 struct access_cache cache; 1265 1266 int error = 0, prev_parent_check_ok=1; 1267 unsigned int i; 1268 1269 short flags; 1270 unsigned int num_files = 0; 1271 int map_size = 0; 1272 int num_parents = 0; 1273 int *file_ids=NULL; 1274 short *access=NULL; 1275 char *bitmap=NULL; 1276 cnid_t *parents=NULL; 1277 int leaf_index; 1278 1279 cnid_t cnid; 1280 cnid_t prevParent_cnid = 0; 1281 unsigned int myPerms; 1282 short myaccess = 0; 1283 struct cat_attr cnattr; 1284 CatalogKey catkey; 1285 struct cnode *skip_cp = VTOC(vp); 1286 kauth_cred_t cred = vfs_context_ucred(context); 1287 proc_t p = vfs_context_proc(context); 1288 1289 is64bit = proc_is64bit(p); 1290 1291 /* initialize the local cache and buffers */ 1292 cache.numcached = 0; 1293 cache.cachehits = 0; 1294 cache.lookups = 0; 1295 cache.acache = NULL; 1296 cache.haveaccess = NULL; 1297 1298 /* struct copyin done during dispatch... need to copy file_id array separately */ 1299 if (ap->a_data == NULL) { 1300 error = EINVAL; 1301 goto err_exit_bulk_access; 1302 } 1303 1304 if (is64bit) { 1305 if (arg_size != sizeof(struct user64_ext_access_t)) { 1306 error = EINVAL; 1307 goto err_exit_bulk_access; 1308 } 1309 1310 user_access_structp = (struct user64_ext_access_t *)ap->a_data; 1311 1312 } else if (arg_size == sizeof(struct user32_access_t)) { 1313 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data; 1314 1315 // convert an old style bulk-access struct to the new style 1316 tmp_user_access.flags = accessp->flags; 1317 tmp_user_access.num_files = accessp->num_files; 1318 tmp_user_access.map_size = 0; 1319 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1320 tmp_user_access.bitmap = USER_ADDR_NULL; 1321 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1322 tmp_user_access.num_parents = 0; 1323 user_access_structp = &tmp_user_access; 1324 1325 } else if (arg_size == sizeof(struct user32_ext_access_t)) { 1326 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data; 1327 1328 // up-cast from a 32-bit version of the struct 1329 tmp_user_access.flags = accessp->flags; 1330 tmp_user_access.num_files = accessp->num_files; 1331 tmp_user_access.map_size = accessp->map_size; 1332 tmp_user_access.num_parents = accessp->num_parents; 1333 1334 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1335 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap); 1336 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1337 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents); 1338 1339 user_access_structp = &tmp_user_access; 1340 } else { 1341 error = EINVAL; 1342 goto err_exit_bulk_access; 1343 } 1344 1345 map_size = user_access_structp->map_size; 1346 1347 num_files = user_access_structp->num_files; 1348 1349 num_parents= user_access_structp->num_parents; 1350 1351 if (num_files < 1) { 1352 goto err_exit_bulk_access; 1353 } 1354 if (num_files > 1024) { 1355 error = EINVAL; 1356 goto err_exit_bulk_access; 1357 } 1358 1359 if (num_parents > 1024) { 1360 error = EINVAL; 1361 goto err_exit_bulk_access; 1362 } 1363 1364 file_ids = (int *) kalloc(sizeof(int) * num_files); 1365 access = (short *) kalloc(sizeof(short) * num_files); 1366 if (map_size) { 1367 bitmap = (char *) kalloc(sizeof(char) * map_size); 1368 } 1369 1370 if (num_parents) { 1371 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents); 1372 } 1373 1374 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES); 1375 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1376 1377 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) { 1378 if (file_ids) { 1379 kfree(file_ids, sizeof(int) * num_files); 1380 } 1381 if (bitmap) { 1382 kfree(bitmap, sizeof(char) * map_size); 1383 } 1384 if (access) { 1385 kfree(access, sizeof(short) * num_files); 1386 } 1387 if (cache.acache) { 1388 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1389 } 1390 if (cache.haveaccess) { 1391 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1392 } 1393 if (parents) { 1394 kfree(parents, sizeof(cnid_t) * num_parents); 1395 } 1396 return ENOMEM; 1397 } 1398 1399 // make sure the bitmap is zero'ed out... 1400 if (bitmap) { 1401 bzero(bitmap, (sizeof(char) * map_size)); 1402 } 1403 1404 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids, 1405 num_files * sizeof(int)))) { 1406 goto err_exit_bulk_access; 1407 } 1408 1409 if (num_parents) { 1410 if ((error = copyin(user_access_structp->parents, (caddr_t)parents, 1411 num_parents * sizeof(cnid_t)))) { 1412 goto err_exit_bulk_access; 1413 } 1414 } 1415 1416 flags = user_access_structp->flags; 1417 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) { 1418 flags = R_OK; 1419 } 1420 1421 /* check if we've been passed leaf node ids or parent ids */ 1422 if (flags & PARENT_IDS_FLAG) { 1423 check_leaf = false; 1424 } 1425 1426 /* Check access to each file_id passed in */ 1427 for (i = 0; i < num_files; i++) { 1428 leaf_index=-1; 1429 cnid = (cnid_t) file_ids[i]; 1430 1431 /* root always has access */ 1432 if ((!parents) && (!suser(cred, NULL))) { 1433 access[i] = 0; 1434 continue; 1435 } 1436 1437 if (check_leaf) { 1438 /* do the lookup (checks the cnode hash, then the catalog) */ 1439 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr); 1440 if (error) { 1441 access[i] = (short) error; 1442 continue; 1443 } 1444 1445 if (parents) { 1446 // Check if the leaf matches one of the parent scopes 1447 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL); 1448 if (leaf_index >= 0 && parents[leaf_index] == cnid) 1449 prev_parent_check_ok = 0; 1450 else if (leaf_index >= 0) 1451 prev_parent_check_ok = 1; 1452 } 1453 1454 // if the thing has acl's, do the full permission check 1455 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1456 struct vnode *cvp; 1457 int myErr = 0; 1458 /* get the vnode for this cnid */ 1459 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0); 1460 if ( myErr ) { 1461 access[i] = myErr; 1462 continue; 1463 } 1464 1465 hfs_unlock(VTOC(cvp)); 1466 1467 if (vnode_vtype(cvp) == VDIR) { 1468 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context); 1469 } else { 1470 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context); 1471 } 1472 1473 vnode_put(cvp); 1474 if (myErr) { 1475 access[i] = myErr; 1476 continue; 1477 } 1478 } else { 1479 /* before calling CheckAccess(), check the target file for read access */ 1480 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, 1481 cnattr.ca_mode, hfsmp->hfs_mp, cred, p); 1482 1483 /* fail fast if no access */ 1484 if ((myPerms & flags) == 0) { 1485 access[i] = EACCES; 1486 continue; 1487 } 1488 } 1489 } else { 1490 /* we were passed an array of parent ids */ 1491 catkey.hfsPlus.parentID = cnid; 1492 } 1493 1494 /* if the last guy had the same parent and had access, we're done */ 1495 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) { 1496 cache.cachehits++; 1497 access[i] = 0; 1498 continue; 1499 } 1500 1501 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 1502 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents); 1503 1504 if (myaccess || (error == ESRCH && leaf_index != -1)) { 1505 access[i] = 0; // have access.. no errors to report 1506 } else { 1507 access[i] = (error != 0 ? (short) error : EACCES); 1508 } 1509 1510 prevParent_cnid = catkey.hfsPlus.parentID; 1511 } 1512 1513 /* copyout the access array */ 1514 if ((error = copyout((caddr_t)access, user_access_structp->access, 1515 num_files * sizeof (short)))) { 1516 goto err_exit_bulk_access; 1517 } 1518 if (map_size && bitmap) { 1519 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 1520 map_size * sizeof (char)))) { 1521 goto err_exit_bulk_access; 1522 } 1523 } 1524 1525 1526 err_exit_bulk_access: 1527 1528 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); 1529 1530 if (file_ids) 1531 kfree(file_ids, sizeof(int) * num_files); 1532 if (parents) 1533 kfree(parents, sizeof(cnid_t) * num_parents); 1534 if (bitmap) 1535 kfree(bitmap, sizeof(char) * map_size); 1536 if (access) 1537 kfree(access, sizeof(short) * num_files); 1538 if (cache.acache) 1539 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1540 if (cache.haveaccess) 1541 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1542 1543 return (error); 1544} 1545 1546 1547/* end "bulk-access" support */ 1548 1549 1550/* 1551 * Callback for use with freeze ioctl. 1552 */ 1553static int 1554hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) 1555{ 1556 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze"); 1557 1558 return 0; 1559} 1560 1561/* 1562 * Control filesystem operating characteristics. 1563 */ 1564int 1565hfs_vnop_ioctl( struct vnop_ioctl_args /* { 1566 vnode_t a_vp; 1567 int a_command; 1568 caddr_t a_data; 1569 int a_fflag; 1570 vfs_context_t a_context; 1571 } */ *ap) 1572{ 1573 struct vnode * vp = ap->a_vp; 1574 struct hfsmount *hfsmp = VTOHFS(vp); 1575 vfs_context_t context = ap->a_context; 1576 kauth_cred_t cred = vfs_context_ucred(context); 1577 proc_t p = vfs_context_proc(context); 1578 struct vfsstatfs *vfsp; 1579 boolean_t is64bit; 1580 off_t jnl_start, jnl_size; 1581 struct hfs_journal_info *jip; 1582#if HFS_COMPRESSION 1583 int compressed = 0; 1584 off_t uncompressed_size = -1; 1585 int decmpfs_error = 0; 1586 1587 if (ap->a_command == F_RDADVISE) { 1588 /* we need to inspect the decmpfs state of the file as early as possible */ 1589 compressed = hfs_file_is_compressed(VTOC(vp), 0); 1590 if (compressed) { 1591 if (VNODE_IS_RSRC(vp)) { 1592 /* if this is the resource fork, treat it as if it were empty */ 1593 uncompressed_size = 0; 1594 } else { 1595 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0); 1596 if (decmpfs_error != 0) { 1597 /* failed to get the uncompressed size, we'll check for this later */ 1598 uncompressed_size = -1; 1599 } 1600 } 1601 } 1602 } 1603#endif /* HFS_COMPRESSION */ 1604 1605 is64bit = proc_is64bit(p); 1606 1607#if CONFIG_PROTECT 1608 { 1609 int error = 0; 1610 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 1611 return error; 1612 } 1613 } 1614#endif /* CONFIG_PROTECT */ 1615 1616 switch (ap->a_command) { 1617 1618 case HFS_GETPATH: 1619 { 1620 struct vnode *file_vp; 1621 cnid_t cnid; 1622 int outlen; 1623 char *bufptr; 1624 int error; 1625 1626 /* Caller must be owner of file system. */ 1627 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1628 if (suser(cred, NULL) && 1629 kauth_cred_getuid(cred) != vfsp->f_owner) { 1630 return (EACCES); 1631 } 1632 /* Target vnode must be file system's root. */ 1633 if (!vnode_isvroot(vp)) { 1634 return (EINVAL); 1635 } 1636 bufptr = (char *)ap->a_data; 1637 cnid = strtoul(bufptr, NULL, 10); 1638 1639 /* We need to call hfs_vfs_vget to leverage the code that will 1640 * fix the origin list for us if needed, as opposed to calling 1641 * hfs_vget, since we will need the parent for build_path call. 1642 */ 1643 1644 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { 1645 return (error); 1646 } 1647 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context); 1648 vnode_put(file_vp); 1649 1650 return (error); 1651 } 1652 1653 case HFS_PREV_LINK: 1654 case HFS_NEXT_LINK: 1655 { 1656 cnid_t linkfileid; 1657 cnid_t nextlinkid; 1658 cnid_t prevlinkid; 1659 int error; 1660 1661 /* Caller must be owner of file system. */ 1662 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1663 if (suser(cred, NULL) && 1664 kauth_cred_getuid(cred) != vfsp->f_owner) { 1665 return (EACCES); 1666 } 1667 /* Target vnode must be file system's root. */ 1668 if (!vnode_isvroot(vp)) { 1669 return (EINVAL); 1670 } 1671 linkfileid = *(cnid_t *)ap->a_data; 1672 if (linkfileid < kHFSFirstUserCatalogNodeID) { 1673 return (EINVAL); 1674 } 1675 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) { 1676 return (error); 1677 } 1678 if (ap->a_command == HFS_NEXT_LINK) { 1679 *(cnid_t *)ap->a_data = nextlinkid; 1680 } else { 1681 *(cnid_t *)ap->a_data = prevlinkid; 1682 } 1683 return (0); 1684 } 1685 1686 case HFS_RESIZE_PROGRESS: { 1687 1688 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1689 if (suser(cred, NULL) && 1690 kauth_cred_getuid(cred) != vfsp->f_owner) { 1691 return (EACCES); /* must be owner of file system */ 1692 } 1693 if (!vnode_isvroot(vp)) { 1694 return (EINVAL); 1695 } 1696 /* file system must not be mounted read-only */ 1697 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1698 return (EROFS); 1699 } 1700 1701 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); 1702 } 1703 1704 case HFS_RESIZE_VOLUME: { 1705 u_int64_t newsize; 1706 u_int64_t cursize; 1707 1708 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1709 if (suser(cred, NULL) && 1710 kauth_cred_getuid(cred) != vfsp->f_owner) { 1711 return (EACCES); /* must be owner of file system */ 1712 } 1713 if (!vnode_isvroot(vp)) { 1714 return (EINVAL); 1715 } 1716 1717 /* filesystem must not be mounted read only */ 1718 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1719 return (EROFS); 1720 } 1721 newsize = *(u_int64_t *)ap->a_data; 1722 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; 1723 1724 if (newsize > cursize) { 1725 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); 1726 } else if (newsize < cursize) { 1727 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); 1728 } else { 1729 return (0); 1730 } 1731 } 1732 case HFS_CHANGE_NEXT_ALLOCATION: { 1733 int error = 0; /* Assume success */ 1734 u_int32_t location; 1735 1736 if (vnode_vfsisrdonly(vp)) { 1737 return (EROFS); 1738 } 1739 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1740 if (suser(cred, NULL) && 1741 kauth_cred_getuid(cred) != vfsp->f_owner) { 1742 return (EACCES); /* must be owner of file system */ 1743 } 1744 if (!vnode_isvroot(vp)) { 1745 return (EINVAL); 1746 } 1747 HFS_MOUNT_LOCK(hfsmp, TRUE); 1748 location = *(u_int32_t *)ap->a_data; 1749 if ((location >= hfsmp->allocLimit) && 1750 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) { 1751 error = EINVAL; 1752 goto fail_change_next_allocation; 1753 } 1754 /* Return previous value. */ 1755 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation; 1756 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) { 1757 /* On magic value for location, set nextAllocation to next block 1758 * after metadata zone and set flag in mount structure to indicate 1759 * that nextAllocation should not be updated again. 1760 */ 1761 if (hfsmp->hfs_metazone_end != 0) { 1762 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); 1763 } 1764 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 1765 } else { 1766 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 1767 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location); 1768 } 1769 MarkVCBDirty(hfsmp); 1770fail_change_next_allocation: 1771 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 1772 return (error); 1773 } 1774 1775#if HFS_SPARSE_DEV 1776 case HFS_SETBACKINGSTOREINFO: { 1777 struct vnode * bsfs_rootvp; 1778 struct vnode * di_vp; 1779 struct hfs_backingstoreinfo *bsdata; 1780 int error = 0; 1781 1782 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1783 return (EROFS); 1784 } 1785 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 1786 return (EALREADY); 1787 } 1788 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1789 if (suser(cred, NULL) && 1790 kauth_cred_getuid(cred) != vfsp->f_owner) { 1791 return (EACCES); /* must be owner of file system */ 1792 } 1793 bsdata = (struct hfs_backingstoreinfo *)ap->a_data; 1794 if (bsdata == NULL) { 1795 return (EINVAL); 1796 } 1797 if ((error = file_vnode(bsdata->backingfd, &di_vp))) { 1798 return (error); 1799 } 1800 if ((error = vnode_getwithref(di_vp))) { 1801 file_drop(bsdata->backingfd); 1802 return(error); 1803 } 1804 1805 if (vnode_mount(vp) == vnode_mount(di_vp)) { 1806 (void)vnode_put(di_vp); 1807 file_drop(bsdata->backingfd); 1808 return (EINVAL); 1809 } 1810 1811 /* 1812 * Obtain the backing fs root vnode and keep a reference 1813 * on it. This reference will be dropped in hfs_unmount. 1814 */ 1815 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */ 1816 if (error) { 1817 (void)vnode_put(di_vp); 1818 file_drop(bsdata->backingfd); 1819 return (error); 1820 } 1821 vnode_ref(bsfs_rootvp); 1822 vnode_put(bsfs_rootvp); 1823 1824 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; 1825 1826 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; 1827 /* The free extent cache is managed differently for sparse devices. 1828 * There is a window between which the volume is mounted and the 1829 * device is marked as sparse, so the free extent cache for this 1830 * volume is currently initialized as normal volume (sorted by block 1831 * count). Reset the cache so that it will be rebuilt again 1832 * for sparse device (sorted by start block). 1833 */ 1834 ResetVCBFreeExtCache(hfsmp); 1835 1836 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; 1837 hfsmp->hfs_sparsebandblks *= 4; 1838 1839 vfs_markdependency(hfsmp->hfs_mp); 1840 1841 /* 1842 * If the sparse image is on a sparse image file (as opposed to a sparse 1843 * bundle), then we may need to limit the free space to the maximum size 1844 * of a file on that volume. So we query (using pathconf), and if we get 1845 * a meaningful result, we cache the number of blocks for later use in 1846 * hfs_freeblks(). 1847 */ 1848 hfsmp->hfs_backingfs_maxblocks = 0; 1849 if (vnode_vtype(di_vp) == VREG) { 1850 int terr; 1851 int hostbits; 1852 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context); 1853 if (terr == 0 && hostbits != 0 && hostbits < 64) { 1854 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits; 1855 1856 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize; 1857 } 1858 } 1859 1860 (void)vnode_put(di_vp); 1861 file_drop(bsdata->backingfd); 1862 return (0); 1863 } 1864 case HFS_CLRBACKINGSTOREINFO: { 1865 struct vnode * tmpvp; 1866 1867 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1868 if (suser(cred, NULL) && 1869 kauth_cred_getuid(cred) != vfsp->f_owner) { 1870 return (EACCES); /* must be owner of file system */ 1871 } 1872 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1873 return (EROFS); 1874 } 1875 1876 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 1877 hfsmp->hfs_backingfs_rootvp) { 1878 1879 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; 1880 tmpvp = hfsmp->hfs_backingfs_rootvp; 1881 hfsmp->hfs_backingfs_rootvp = NULLVP; 1882 hfsmp->hfs_sparsebandblks = 0; 1883 vnode_rele(tmpvp); 1884 } 1885 return (0); 1886 } 1887#endif /* HFS_SPARSE_DEV */ 1888 1889 /* Change the next CNID stored in the VH */ 1890 case HFS_CHANGE_NEXTCNID: { 1891 int error = 0; /* Assume success */ 1892 u_int32_t fileid; 1893 int wraparound = 0; 1894 int lockflags = 0; 1895 1896 if (vnode_vfsisrdonly(vp)) { 1897 return (EROFS); 1898 } 1899 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1900 if (suser(cred, NULL) && 1901 kauth_cred_getuid(cred) != vfsp->f_owner) { 1902 return (EACCES); /* must be owner of file system */ 1903 } 1904 1905 fileid = *(u_int32_t *)ap->a_data; 1906 1907 /* Must have catalog lock excl. to advance the CNID pointer */ 1908 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK); 1909 1910 HFS_MOUNT_LOCK(hfsmp, TRUE); 1911 1912 /* If it is less than the current next CNID, force the wraparound bit to be set */ 1913 if (fileid < hfsmp->vcbNxtCNID) { 1914 wraparound=1; 1915 } 1916 1917 /* Return previous value. */ 1918 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID; 1919 1920 hfsmp->vcbNxtCNID = fileid; 1921 1922 if (wraparound) { 1923 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; 1924 } 1925 1926 MarkVCBDirty(hfsmp); 1927 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 1928 hfs_systemfile_unlock (hfsmp, lockflags); 1929 1930 return (error); 1931 } 1932 1933 case F_FREEZE_FS: { 1934 struct mount *mp; 1935 1936 mp = vnode_mount(vp); 1937 hfsmp = VFSTOHFS(mp); 1938 1939 if (!(hfsmp->jnl)) 1940 return (ENOTSUP); 1941 1942 vfsp = vfs_statfs(mp); 1943 1944 if (kauth_cred_getuid(cred) != vfsp->f_owner && 1945 !kauth_cred_issuser(cred)) 1946 return (EACCES); 1947 1948 lck_rw_lock_exclusive(&hfsmp->hfs_insync); 1949 1950 // flush things before we get started to try and prevent 1951 // dirty data from being paged out while we're frozen. 1952 // note: can't do this after taking the lock as it will 1953 // deadlock against ourselves. 1954 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); 1955 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 1956 1957 // DO NOT call hfs_journal_flush() because that takes a 1958 // shared lock on the global exclusive lock! 1959 journal_flush(hfsmp->jnl, TRUE); 1960 1961 // don't need to iterate on all vnodes, we just need to 1962 // wait for writes to the system files and the device vnode 1963 // 1964 // Now that journal flush waits for all metadata blocks to 1965 // be written out, waiting for btree writes is probably no 1966 // longer required. 1967 if (HFSTOVCB(hfsmp)->extentsRefNum) 1968 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); 1969 if (HFSTOVCB(hfsmp)->catalogRefNum) 1970 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze"); 1971 if (HFSTOVCB(hfsmp)->allocationsRefNum) 1972 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze"); 1973 if (hfsmp->hfs_attribute_vp) 1974 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze"); 1975 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze"); 1976 1977 hfsmp->hfs_freezing_proc = current_proc(); 1978 1979 return (0); 1980 } 1981 1982 case F_THAW_FS: { 1983 vfsp = vfs_statfs(vnode_mount(vp)); 1984 if (kauth_cred_getuid(cred) != vfsp->f_owner && 1985 !kauth_cred_issuser(cred)) 1986 return (EACCES); 1987 1988 // if we're not the one who froze the fs then we 1989 // can't thaw it. 1990 if (hfsmp->hfs_freezing_proc != current_proc()) { 1991 return EPERM; 1992 } 1993 1994 // NOTE: if you add code here, also go check the 1995 // code that "thaws" the fs in hfs_vnop_close() 1996 // 1997 hfsmp->hfs_freezing_proc = NULL; 1998 hfs_unlock_global (hfsmp); 1999 lck_rw_unlock_exclusive(&hfsmp->hfs_insync); 2000 2001 return (0); 2002 } 2003 2004 case HFS_BULKACCESS_FSCTL: { 2005 int size; 2006 2007 if (hfsmp->hfs_flags & HFS_STANDARD) { 2008 return EINVAL; 2009 } 2010 2011 if (is64bit) { 2012 size = sizeof(struct user64_access_t); 2013 } else { 2014 size = sizeof(struct user32_access_t); 2015 } 2016 2017 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2018 } 2019 2020 case HFS_EXT_BULKACCESS_FSCTL: { 2021 int size; 2022 2023 if (hfsmp->hfs_flags & HFS_STANDARD) { 2024 return EINVAL; 2025 } 2026 2027 if (is64bit) { 2028 size = sizeof(struct user64_ext_access_t); 2029 } else { 2030 size = sizeof(struct user32_ext_access_t); 2031 } 2032 2033 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2034 } 2035 2036 case HFS_SET_XATTREXTENTS_STATE: { 2037 int state; 2038 2039 if (ap->a_data == NULL) { 2040 return (EINVAL); 2041 } 2042 2043 state = *(int *)ap->a_data; 2044 2045 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2046 return (EROFS); 2047 } 2048 2049 /* Super-user can enable or disable extent-based extended 2050 * attribute support on a volume 2051 * Note: Starting Mac OS X 10.7, extent-based extended attributes 2052 * are enabled by default, so any change will be transient only 2053 * till the volume is remounted. 2054 */ 2055 if (!is_suser()) { 2056 return (EPERM); 2057 } 2058 if (state == 0 || state == 1) 2059 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state); 2060 else 2061 return (EINVAL); 2062 } 2063 2064 case F_SETSTATICCONTENT: { 2065 int error; 2066 int enable_static = 0; 2067 struct cnode *cp = NULL; 2068 /* 2069 * lock the cnode, decorate the cnode flag, and bail out. 2070 * VFS should have already authenticated the caller for us. 2071 */ 2072 2073 if (ap->a_data) { 2074 /* 2075 * Note that even though ap->a_data is of type caddr_t, 2076 * the fcntl layer at the syscall handler will pass in NULL 2077 * or 1 depending on what the argument supplied to the fcntl 2078 * was. So it is in fact correct to check the ap->a_data 2079 * argument for zero or non-zero value when deciding whether or not 2080 * to enable the static bit in the cnode. 2081 */ 2082 enable_static = 1; 2083 } 2084 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2085 return EROFS; 2086 } 2087 cp = VTOC(vp); 2088 2089 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK); 2090 if (error == 0) { 2091 if (enable_static) { 2092 cp->c_flag |= C_SSD_STATIC; 2093 } 2094 else { 2095 cp->c_flag &= ~C_SSD_STATIC; 2096 } 2097 hfs_unlock (cp); 2098 } 2099 return error; 2100 } 2101 2102 case F_SETBACKINGSTORE: { 2103 2104 int error = 0; 2105 2106 /* 2107 * See comment in F_SETSTATICCONTENT re: using 2108 * a null check for a_data 2109 */ 2110 if (ap->a_data) { 2111 error = hfs_set_backingstore (vp, 1); 2112 } 2113 else { 2114 error = hfs_set_backingstore (vp, 0); 2115 } 2116 2117 return error; 2118 } 2119 2120 case F_GETPATH_MTMINFO: { 2121 int error = 0; 2122 2123 int *data = (int*) ap->a_data; 2124 2125 /* Ask if this is a backingstore vnode */ 2126 error = hfs_is_backingstore (vp, data); 2127 2128 return error; 2129 } 2130 2131 case F_FULLFSYNC: { 2132 int error; 2133 2134 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2135 return (EROFS); 2136 } 2137 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); 2138 if (error == 0) { 2139 error = hfs_fsync(vp, MNT_WAIT, TRUE, p); 2140 hfs_unlock(VTOC(vp)); 2141 } 2142 2143 return error; 2144 } 2145 2146 case F_CHKCLEAN: { 2147 register struct cnode *cp; 2148 int error; 2149 2150 if (!vnode_isreg(vp)) 2151 return EINVAL; 2152 2153 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); 2154 if (error == 0) { 2155 cp = VTOC(vp); 2156 /* 2157 * used by regression test to determine if 2158 * all the dirty pages (via write) have been cleaned 2159 * after a call to 'fsysnc'. 2160 */ 2161 error = is_file_clean(vp, VTOF(vp)->ff_size); 2162 hfs_unlock(cp); 2163 } 2164 return (error); 2165 } 2166 2167 case F_RDADVISE: { 2168 register struct radvisory *ra; 2169 struct filefork *fp; 2170 int error; 2171 2172 if (!vnode_isreg(vp)) 2173 return EINVAL; 2174 2175 ra = (struct radvisory *)(ap->a_data); 2176 fp = VTOF(vp); 2177 2178 /* Protect against a size change. */ 2179 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK); 2180 2181#if HFS_COMPRESSION 2182 if (compressed && (uncompressed_size == -1)) { 2183 /* fetching the uncompressed size failed above, so return the error */ 2184 error = decmpfs_error; 2185 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) || 2186 (!compressed && (ra->ra_offset >= fp->ff_size))) { 2187 error = EFBIG; 2188 } 2189#else /* HFS_COMPRESSION */ 2190 if (ra->ra_offset >= fp->ff_size) { 2191 error = EFBIG; 2192 } 2193#endif /* HFS_COMPRESSION */ 2194 else { 2195 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); 2196 } 2197 2198 hfs_unlock_truncate(VTOC(vp), 0); 2199 return (error); 2200 } 2201 2202 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ 2203 { 2204 if (is64bit) { 2205 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2206 } 2207 else { 2208 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2209 } 2210 return 0; 2211 } 2212 2213 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME: 2214 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time; 2215 break; 2216 2217 case SPOTLIGHT_FSCTL_GET_LAST_MTIME: 2218 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime; 2219 break; 2220 2221 case HFS_FSCTL_GET_VERY_LOW_DISK: 2222 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit; 2223 break; 2224 2225 case HFS_FSCTL_SET_VERY_LOW_DISK: 2226 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) { 2227 return EINVAL; 2228 } 2229 2230 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data; 2231 break; 2232 2233 case HFS_FSCTL_GET_LOW_DISK: 2234 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit; 2235 break; 2236 2237 case HFS_FSCTL_SET_LOW_DISK: 2238 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel 2239 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) { 2240 2241 return EINVAL; 2242 } 2243 2244 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data; 2245 break; 2246 2247 case HFS_FSCTL_GET_DESIRED_DISK: 2248 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel; 2249 break; 2250 2251 case HFS_FSCTL_SET_DESIRED_DISK: 2252 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) { 2253 return EINVAL; 2254 } 2255 2256 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data; 2257 break; 2258 2259 case HFS_VOLUME_STATUS: 2260 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions; 2261 break; 2262 2263 case HFS_SET_BOOT_INFO: 2264 if (!vnode_isvroot(vp)) 2265 return(EINVAL); 2266 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) 2267 return(EACCES); /* must be superuser or owner of filesystem */ 2268 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2269 return (EROFS); 2270 } 2271 HFS_MOUNT_LOCK(hfsmp, TRUE); 2272 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); 2273 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 2274 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 2275 break; 2276 2277 case HFS_GET_BOOT_INFO: 2278 if (!vnode_isvroot(vp)) 2279 return(EINVAL); 2280 HFS_MOUNT_LOCK(hfsmp, TRUE); 2281 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo)); 2282 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 2283 break; 2284 2285 case HFS_MARK_BOOT_CORRUPT: 2286 /* Mark the boot volume corrupt by setting 2287 * kHFSVolumeInconsistentBit in the volume header. This will 2288 * force fsck_hfs on next mount. 2289 */ 2290 if (!is_suser()) { 2291 return EACCES; 2292 } 2293 2294 /* Allowed only on the root vnode of the boot volume */ 2295 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 2296 !vnode_isvroot(vp)) { 2297 return EINVAL; 2298 } 2299 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2300 return (EROFS); 2301 } 2302 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); 2303 hfs_mark_volume_inconsistent(hfsmp); 2304 break; 2305 2306 case HFS_FSCTL_GET_JOURNAL_INFO: 2307 jip = (struct hfs_journal_info*)ap->a_data; 2308 2309 if (vp == NULLVP) 2310 return EINVAL; 2311 2312 if (hfsmp->jnl == NULL) { 2313 jnl_start = 0; 2314 jnl_size = 0; 2315 } else { 2316 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; 2317 jnl_size = (off_t)hfsmp->jnl_size; 2318 } 2319 2320 jip->jstart = jnl_start; 2321 jip->jsize = jnl_size; 2322 break; 2323 2324 case HFS_SET_ALWAYS_ZEROFILL: { 2325 struct cnode *cp = VTOC(vp); 2326 2327 if (*(int *)ap->a_data) { 2328 cp->c_flag |= C_ALWAYS_ZEROFILL; 2329 } else { 2330 cp->c_flag &= ~C_ALWAYS_ZEROFILL; 2331 } 2332 break; 2333 } 2334 2335 case HFS_DISABLE_METAZONE: { 2336 /* Only root can disable metadata zone */ 2337 if (!is_suser()) { 2338 return EACCES; 2339 } 2340 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2341 return (EROFS); 2342 } 2343 2344 /* Disable metadata zone now */ 2345 (void) hfs_metadatazone_init(hfsmp, true); 2346 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN); 2347 break; 2348 } 2349 2350 default: 2351 return (ENOTTY); 2352 } 2353 2354 return 0; 2355} 2356 2357/* 2358 * select 2359 */ 2360int 2361hfs_vnop_select(__unused struct vnop_select_args *ap) 2362/* 2363 struct vnop_select_args { 2364 vnode_t a_vp; 2365 int a_which; 2366 int a_fflags; 2367 void *a_wql; 2368 vfs_context_t a_context; 2369 }; 2370*/ 2371{ 2372 /* 2373 * We should really check to see if I/O is possible. 2374 */ 2375 return (1); 2376} 2377 2378/* 2379 * Converts a logical block number to a physical block, and optionally returns 2380 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize. 2381 * The physical block number is based on the device block size, currently its 512. 2382 * The block run is returned in logical blocks, and is the REMAINING amount of blocks 2383 */ 2384int 2385hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp) 2386{ 2387 struct filefork *fp = VTOF(vp); 2388 struct hfsmount *hfsmp = VTOHFS(vp); 2389 int retval = E_NONE; 2390 u_int32_t logBlockSize; 2391 size_t bytesContAvail = 0; 2392 off_t blockposition; 2393 int lockExtBtree; 2394 int lockflags = 0; 2395 2396 /* 2397 * Check for underlying vnode requests and ensure that logical 2398 * to physical mapping is requested. 2399 */ 2400 if (vpp != NULL) 2401 *vpp = hfsmp->hfs_devvp; 2402 if (bnp == NULL) 2403 return (0); 2404 2405 logBlockSize = GetLogicalBlockSize(vp); 2406 blockposition = (off_t)bn * logBlockSize; 2407 2408 lockExtBtree = overflow_extents(fp); 2409 2410 if (lockExtBtree) 2411 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); 2412 2413 retval = MacToVFSError( 2414 MapFileBlockC (HFSTOVCB(hfsmp), 2415 (FCB*)fp, 2416 MAXPHYSIO, 2417 blockposition, 2418 bnp, 2419 &bytesContAvail)); 2420 2421 if (lockExtBtree) 2422 hfs_systemfile_unlock(hfsmp, lockflags); 2423 2424 if (retval == E_NONE) { 2425 /* Figure out how many read ahead blocks there are */ 2426 if (runp != NULL) { 2427 if (can_cluster(logBlockSize)) { 2428 /* Make sure this result never goes negative: */ 2429 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1; 2430 } else { 2431 *runp = 0; 2432 } 2433 } 2434 } 2435 return (retval); 2436} 2437 2438/* 2439 * Convert logical block number to file offset. 2440 */ 2441int 2442hfs_vnop_blktooff(struct vnop_blktooff_args *ap) 2443/* 2444 struct vnop_blktooff_args { 2445 vnode_t a_vp; 2446 daddr64_t a_lblkno; 2447 off_t *a_offset; 2448 }; 2449*/ 2450{ 2451 if (ap->a_vp == NULL) 2452 return (EINVAL); 2453 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp); 2454 2455 return(0); 2456} 2457 2458/* 2459 * Convert file offset to logical block number. 2460 */ 2461int 2462hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap) 2463/* 2464 struct vnop_offtoblk_args { 2465 vnode_t a_vp; 2466 off_t a_offset; 2467 daddr64_t *a_lblkno; 2468 }; 2469*/ 2470{ 2471 if (ap->a_vp == NULL) 2472 return (EINVAL); 2473 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp)); 2474 2475 return(0); 2476} 2477 2478/* 2479 * Map file offset to physical block number. 2480 * 2481 * If this function is called for write operation, and if the file 2482 * had virtual blocks allocated (delayed allocation), real blocks 2483 * are allocated by calling ExtendFileC(). 2484 * 2485 * If this function is called for read operation, and if the file 2486 * had virtual blocks allocated (delayed allocation), no change 2487 * to the size of file is done, and if required, rangelist is 2488 * searched for mapping. 2489 * 2490 * System file cnodes are expected to be locked (shared or exclusive). 2491 */ 2492int 2493hfs_vnop_blockmap(struct vnop_blockmap_args *ap) 2494/* 2495 struct vnop_blockmap_args { 2496 vnode_t a_vp; 2497 off_t a_foffset; 2498 size_t a_size; 2499 daddr64_t *a_bpn; 2500 size_t *a_run; 2501 void *a_poff; 2502 int a_flags; 2503 vfs_context_t a_context; 2504 }; 2505*/ 2506{ 2507 struct vnode *vp = ap->a_vp; 2508 struct cnode *cp; 2509 struct filefork *fp; 2510 struct hfsmount *hfsmp; 2511 size_t bytesContAvail = 0; 2512 int retval = E_NONE; 2513 int syslocks = 0; 2514 int lockflags = 0; 2515 struct rl_entry *invalid_range; 2516 enum rl_overlaptype overlaptype; 2517 int started_tr = 0; 2518 int tooklock = 0; 2519 2520#if HFS_COMPRESSION 2521 if (VNODE_IS_RSRC(vp)) { 2522 /* allow blockmaps to the resource fork */ 2523 } else { 2524 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 2525 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 2526 switch(state) { 2527 case FILE_IS_COMPRESSED: 2528 return ENOTSUP; 2529 case FILE_IS_CONVERTING: 2530 /* if FILE_IS_CONVERTING, we allow blockmap */ 2531 break; 2532 default: 2533 printf("invalid state %d for compressed file\n", state); 2534 /* fall through */ 2535 } 2536 } 2537 } 2538#endif /* HFS_COMPRESSION */ 2539 2540 /* Do not allow blockmap operation on a directory */ 2541 if (vnode_isdir(vp)) { 2542 return (ENOTSUP); 2543 } 2544 2545 /* 2546 * Check for underlying vnode requests and ensure that logical 2547 * to physical mapping is requested. 2548 */ 2549 if (ap->a_bpn == NULL) 2550 return (0); 2551 2552 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) { 2553 if (VTOC(vp)->c_lockowner != current_thread()) { 2554 hfs_lock(VTOC(vp), HFS_FORCE_LOCK); 2555 tooklock = 1; 2556 } 2557 } 2558 hfsmp = VTOHFS(vp); 2559 cp = VTOC(vp); 2560 fp = VTOF(vp); 2561 2562retry: 2563 /* Check virtual blocks only when performing write operation */ 2564 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 2565 if (hfs_start_transaction(hfsmp) != 0) { 2566 retval = EINVAL; 2567 goto exit; 2568 } else { 2569 started_tr = 1; 2570 } 2571 syslocks = SFL_EXTENTS | SFL_BITMAP; 2572 2573 } else if (overflow_extents(fp)) { 2574 syslocks = SFL_EXTENTS; 2575 } 2576 2577 if (syslocks) 2578 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK); 2579 2580 /* 2581 * Check for any delayed allocations. 2582 */ 2583 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 2584 int64_t actbytes; 2585 u_int32_t loanedBlocks; 2586 2587 // 2588 // Make sure we have a transaction. It's possible 2589 // that we came in and fp->ff_unallocblocks was zero 2590 // but during the time we blocked acquiring the extents 2591 // btree, ff_unallocblocks became non-zero and so we 2592 // will need to start a transaction. 2593 // 2594 if (started_tr == 0) { 2595 if (syslocks) { 2596 hfs_systemfile_unlock(hfsmp, lockflags); 2597 syslocks = 0; 2598 } 2599 goto retry; 2600 } 2601 2602 /* 2603 * Note: ExtendFileC will Release any blocks on loan and 2604 * aquire real blocks. So we ask to extend by zero bytes 2605 * since ExtendFileC will account for the virtual blocks. 2606 */ 2607 2608 loanedBlocks = fp->ff_unallocblocks; 2609 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0, 2610 kEFAllMask | kEFNoClumpMask, &actbytes); 2611 2612 if (retval) { 2613 fp->ff_unallocblocks = loanedBlocks; 2614 cp->c_blocks += loanedBlocks; 2615 fp->ff_blocks += loanedBlocks; 2616 2617 HFS_MOUNT_LOCK(hfsmp, TRUE); 2618 hfsmp->loanedBlocks += loanedBlocks; 2619 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 2620 2621 hfs_systemfile_unlock(hfsmp, lockflags); 2622 cp->c_flag |= C_MODIFIED; 2623 if (started_tr) { 2624 (void) hfs_update(vp, TRUE); 2625 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 2626 2627 hfs_end_transaction(hfsmp); 2628 started_tr = 0; 2629 } 2630 goto exit; 2631 } 2632 } 2633 2634 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset, 2635 ap->a_bpn, &bytesContAvail); 2636 if (syslocks) { 2637 hfs_systemfile_unlock(hfsmp, lockflags); 2638 syslocks = 0; 2639 } 2640 2641 if (started_tr) { 2642 (void) hfs_update(vp, TRUE); 2643 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 2644 hfs_end_transaction(hfsmp); 2645 started_tr = 0; 2646 } 2647 if (retval) { 2648 /* On write, always return error because virtual blocks, if any, 2649 * should have been allocated in ExtendFileC(). We do not 2650 * allocate virtual blocks on read, therefore return error 2651 * only if no virtual blocks are allocated. Otherwise we search 2652 * rangelist for zero-fills 2653 */ 2654 if ((MacToVFSError(retval) != ERANGE) || 2655 (ap->a_flags & VNODE_WRITE) || 2656 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) { 2657 goto exit; 2658 } 2659 2660 /* Validate if the start offset is within logical file size */ 2661 if (ap->a_foffset >= fp->ff_size) { 2662 goto exit; 2663 } 2664 2665 /* 2666 * At this point, we have encountered a failure during 2667 * MapFileBlockC that resulted in ERANGE, and we are not servicing 2668 * a write, and there are borrowed blocks. 2669 * 2670 * However, the cluster layer will not call blockmap for 2671 * blocks that are borrowed and in-cache. We have to assume that 2672 * because we observed ERANGE being emitted from MapFileBlockC, this 2673 * extent range is not valid on-disk. So we treat this as a 2674 * mapping that needs to be zero-filled prior to reading. 2675 * 2676 * Note that under certain circumstances (such as non-contiguous 2677 * userland VM mappings in the calling process), cluster_io 2678 * may be forced to split a large I/O driven by hfs_vnop_write 2679 * into multiple sub-I/Os that necessitate a RMW cycle. If this is 2680 * the case here, then we have already removed the invalid range list 2681 * mapping prior to getting to this blockmap call, so we should not 2682 * search the invalid rangelist for this byte range. 2683 */ 2684 2685 bytesContAvail = fp->ff_size - ap->a_foffset; 2686 /* 2687 * Clip the contiguous available bytes to, at most, the allowable 2688 * maximum or the amount requested. 2689 */ 2690 2691 if (bytesContAvail > ap->a_size) { 2692 bytesContAvail = ap->a_size; 2693 } 2694 2695 *ap->a_bpn = (daddr64_t) -1; 2696 retval = 0; 2697 2698 goto exit; 2699 } 2700 2701 /* MapFileC() found a valid extent in the filefork. Search the 2702 * mapping information further for invalid file ranges 2703 */ 2704 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, 2705 ap->a_foffset + (off_t)bytesContAvail - 1, 2706 &invalid_range); 2707 if (overlaptype != RL_NOOVERLAP) { 2708 switch(overlaptype) { 2709 case RL_MATCHINGOVERLAP: 2710 case RL_OVERLAPCONTAINSRANGE: 2711 case RL_OVERLAPSTARTSBEFORE: 2712 /* There's no valid block for this byte offset */ 2713 *ap->a_bpn = (daddr64_t)-1; 2714 /* There's no point limiting the amount to be returned 2715 * if the invalid range that was hit extends all the way 2716 * to the EOF (i.e. there's no valid bytes between the 2717 * end of this range and the file's EOF): 2718 */ 2719 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 2720 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 2721 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 2722 } 2723 break; 2724 2725 case RL_OVERLAPISCONTAINED: 2726 case RL_OVERLAPENDSAFTER: 2727 /* The range of interest hits an invalid block before the end: */ 2728 if (invalid_range->rl_start == ap->a_foffset) { 2729 /* There's actually no valid information to be had starting here: */ 2730 *ap->a_bpn = (daddr64_t)-1; 2731 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 2732 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 2733 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 2734 } 2735 } else { 2736 bytesContAvail = invalid_range->rl_start - ap->a_foffset; 2737 } 2738 break; 2739 2740 case RL_NOOVERLAP: 2741 break; 2742 } /* end switch */ 2743 if (bytesContAvail > ap->a_size) 2744 bytesContAvail = ap->a_size; 2745 } 2746 2747exit: 2748 if (retval == 0) { 2749 if (ap->a_run) 2750 *ap->a_run = bytesContAvail; 2751 2752 if (ap->a_poff) 2753 *(int *)ap->a_poff = 0; 2754 } 2755 2756 if (tooklock) 2757 hfs_unlock(cp); 2758 2759 return (MacToVFSError(retval)); 2760} 2761 2762/* 2763 * prepare and issue the I/O 2764 * buf_strategy knows how to deal 2765 * with requests that require 2766 * fragmented I/Os 2767 */ 2768int 2769hfs_vnop_strategy(struct vnop_strategy_args *ap) 2770{ 2771 buf_t bp = ap->a_bp; 2772 vnode_t vp = buf_vnode(bp); 2773 int error = 0; 2774 2775 /* Mark buffer as containing static data if cnode flag set */ 2776 if (VTOC(vp)->c_flag & C_SSD_STATIC) { 2777 buf_markstatic(bp); 2778 } 2779 2780#if CONFIG_PROTECT 2781 cnode_t *cp = NULL; 2782 2783 if ((cp = cp_get_protected_cnode(vp)) != NULL) { 2784 /* 2785 * We rely upon the truncate lock to protect the 2786 * CP cache key from getting tossed prior to our IO finishing here. 2787 * Nearly all cluster io calls to manipulate file payload from HFS 2788 * take the truncate lock before calling into the cluster 2789 * layer to ensure the file size does not change, or that they 2790 * have exclusive right to change the EOF of the file. 2791 * That same guarantee protects us here since the code that 2792 * deals with CP lock events must now take the truncate lock 2793 * before doing anything. 2794 * 2795 * There is 1 exception here: 2796 * 1) One exception should be the VM swapfile IO, because HFS will 2797 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the 2798 * swapfile code only without holding the truncate lock. This is because 2799 * individual swapfiles are maintained at fixed-length sizes by the VM code. 2800 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to 2801 * create our own UPL and thus take the truncate lock before calling 2802 * into the cluster layer. In that case, however, we are not concerned 2803 * with the CP blob being wiped out in the middle of the IO 2804 * because there isn't anything to toss; the VM swapfile key stays 2805 * in-core as long as the file is open. 2806 * 2807 * NB: 2808 * For filesystem resize, we may not have access to the underlying 2809 * file's cache key for whatever reason (device may be locked). However, 2810 * we do not need it since we are going to use the temporary HFS-wide resize key 2811 * which is generated once we start relocating file content. If this file's I/O 2812 * should be done using the resize key, it will have been supplied already, so 2813 * do not attach the file's cp blob to the buffer. 2814 */ 2815 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) { 2816 buf_setcpaddr(bp, cp->c_cpentry); 2817 } 2818 } 2819#endif /* CONFIG_PROTECT */ 2820 2821 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap); 2822 2823 return error; 2824} 2825 2826static int 2827hfs_minorupdate(struct vnode *vp) { 2828 struct cnode *cp = VTOC(vp); 2829 cp->c_flag &= ~C_MODIFIED; 2830 cp->c_touch_acctime = 0; 2831 cp->c_touch_chgtime = 0; 2832 cp->c_touch_modtime = 0; 2833 2834 return 0; 2835} 2836 2837int 2838do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context) 2839{ 2840 register struct cnode *cp = VTOC(vp); 2841 struct filefork *fp = VTOF(vp); 2842 struct proc *p = vfs_context_proc(context);; 2843 kauth_cred_t cred = vfs_context_ucred(context); 2844 int retval; 2845 off_t bytesToAdd; 2846 off_t actualBytesAdded; 2847 off_t filebytes; 2848 u_int32_t fileblocks; 2849 int blksize; 2850 struct hfsmount *hfsmp; 2851 int lockflags; 2852 2853 blksize = VTOVCB(vp)->blockSize; 2854 fileblocks = fp->ff_blocks; 2855 filebytes = (off_t)fileblocks * (off_t)blksize; 2856 2857 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START, 2858 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 2859 2860 if (length < 0) 2861 return (EINVAL); 2862 2863 /* This should only happen with a corrupt filesystem */ 2864 if ((off_t)fp->ff_size < 0) 2865 return (EINVAL); 2866 2867 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) 2868 return (EFBIG); 2869 2870 hfsmp = VTOHFS(vp); 2871 2872 retval = E_NONE; 2873 2874 /* Files that are changing size are not hot file candidates. */ 2875 if (hfsmp->hfc_stage == HFC_RECORDING) { 2876 fp->ff_bytesread = 0; 2877 } 2878 2879 /* 2880 * We cannot just check if fp->ff_size == length (as an optimization) 2881 * since there may be extra physical blocks that also need truncation. 2882 */ 2883#if QUOTA 2884 if ((retval = hfs_getinoquota(cp))) 2885 return(retval); 2886#endif /* QUOTA */ 2887 2888 /* 2889 * Lengthen the size of the file. We must ensure that the 2890 * last byte of the file is allocated. Since the smallest 2891 * value of ff_size is 0, length will be at least 1. 2892 */ 2893 if (length > (off_t)fp->ff_size) { 2894#if QUOTA 2895 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)), 2896 cred, 0); 2897 if (retval) 2898 goto Err_Exit; 2899#endif /* QUOTA */ 2900 /* 2901 * If we don't have enough physical space then 2902 * we need to extend the physical size. 2903 */ 2904 if (length > filebytes) { 2905 int eflags; 2906 u_int32_t blockHint = 0; 2907 2908 /* All or nothing and don't round up to clumpsize. */ 2909 eflags = kEFAllMask | kEFNoClumpMask; 2910 2911 if (cred && suser(cred, NULL) != 0) 2912 eflags |= kEFReserveMask; /* keep a reserve */ 2913 2914 /* 2915 * Allocate Journal and Quota files in metadata zone. 2916 */ 2917 if (filebytes == 0 && 2918 hfsmp->hfs_flags & HFS_METADATA_ZONE && 2919 hfs_virtualmetafile(cp)) { 2920 eflags |= kEFMetadataMask; 2921 blockHint = hfsmp->hfs_metazone_start; 2922 } 2923 if (hfs_start_transaction(hfsmp) != 0) { 2924 retval = EINVAL; 2925 goto Err_Exit; 2926 } 2927 2928 /* Protect extents b-tree and allocation bitmap */ 2929 lockflags = SFL_BITMAP; 2930 if (overflow_extents(fp)) 2931 lockflags |= SFL_EXTENTS; 2932 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 2933 2934 while ((length > filebytes) && (retval == E_NONE)) { 2935 bytesToAdd = length - filebytes; 2936 retval = MacToVFSError(ExtendFileC(VTOVCB(vp), 2937 (FCB*)fp, 2938 bytesToAdd, 2939 blockHint, 2940 eflags, 2941 &actualBytesAdded)); 2942 2943 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 2944 if (actualBytesAdded == 0 && retval == E_NONE) { 2945 if (length > filebytes) 2946 length = filebytes; 2947 break; 2948 } 2949 } /* endwhile */ 2950 2951 hfs_systemfile_unlock(hfsmp, lockflags); 2952 2953 if (hfsmp->jnl) { 2954 if (skipupdate) { 2955 (void) hfs_minorupdate(vp); 2956 } 2957 else { 2958 (void) hfs_update(vp, TRUE); 2959 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 2960 } 2961 } 2962 2963 hfs_end_transaction(hfsmp); 2964 2965 if (retval) 2966 goto Err_Exit; 2967 2968 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, 2969 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 2970 } 2971 2972 if (!(flags & IO_NOZEROFILL)) { 2973 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { 2974 struct rl_entry *invalid_range; 2975 off_t zero_limit; 2976 2977 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 2978 if (length < zero_limit) zero_limit = length; 2979 2980 if (length > (off_t)fp->ff_size) { 2981 struct timeval tv; 2982 2983 /* Extending the file: time to fill out the current last page w. zeroes? */ 2984 if ((fp->ff_size & PAGE_MASK_64) && 2985 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64, 2986 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) { 2987 2988 /* There's some valid data at the start of the (current) last page 2989 of the file, so zero out the remainder of that page to ensure the 2990 entire page contains valid data. Since there is no invalid range 2991 possible past the (current) eof, there's no need to remove anything 2992 from the invalid range list before calling cluster_write(): */ 2993 hfs_unlock(cp); 2994 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, 2995 fp->ff_size, (off_t)0, 2996 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); 2997 hfs_lock(cp, HFS_FORCE_LOCK); 2998 if (retval) goto Err_Exit; 2999 3000 /* Merely invalidate the remaining area, if necessary: */ 3001 if (length > zero_limit) { 3002 microuptime(&tv); 3003 rl_add(zero_limit, length - 1, &fp->ff_invalidranges); 3004 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3005 } 3006 } else { 3007 /* The page containing the (current) eof is invalid: just add the 3008 remainder of the page to the invalid list, along with the area 3009 being newly allocated: 3010 */ 3011 microuptime(&tv); 3012 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges); 3013 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3014 }; 3015 } 3016 } else { 3017 panic("hfs_truncate: invoked on non-UBC object?!"); 3018 }; 3019 } 3020 cp->c_touch_modtime = TRUE; 3021 fp->ff_size = length; 3022 3023 } else { /* Shorten the size of the file */ 3024 3025 if ((off_t)fp->ff_size > length) { 3026 /* Any space previously marked as invalid is now irrelevant: */ 3027 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); 3028 } 3029 3030 /* 3031 * Account for any unmapped blocks. Note that the new 3032 * file length can still end up with unmapped blocks. 3033 */ 3034 if (fp->ff_unallocblocks > 0) { 3035 u_int32_t finalblks; 3036 u_int32_t loanedBlocks; 3037 3038 HFS_MOUNT_LOCK(hfsmp, TRUE); 3039 3040 loanedBlocks = fp->ff_unallocblocks; 3041 cp->c_blocks -= loanedBlocks; 3042 fp->ff_blocks -= loanedBlocks; 3043 fp->ff_unallocblocks = 0; 3044 3045 hfsmp->loanedBlocks -= loanedBlocks; 3046 3047 finalblks = (length + blksize - 1) / blksize; 3048 if (finalblks > fp->ff_blocks) { 3049 /* calculate required unmapped blocks */ 3050 loanedBlocks = finalblks - fp->ff_blocks; 3051 hfsmp->loanedBlocks += loanedBlocks; 3052 3053 fp->ff_unallocblocks = loanedBlocks; 3054 cp->c_blocks += loanedBlocks; 3055 fp->ff_blocks += loanedBlocks; 3056 } 3057 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 3058 } 3059 3060 /* 3061 * For a TBE process the deallocation of the file blocks is 3062 * delayed until the file is closed. And hfs_close calls 3063 * truncate with the IO_NDELAY flag set. So when IO_NDELAY 3064 * isn't set, we make sure this isn't a TBE process. 3065 */ 3066 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) { 3067#if QUOTA 3068 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); 3069#endif /* QUOTA */ 3070 if (hfs_start_transaction(hfsmp) != 0) { 3071 retval = EINVAL; 3072 goto Err_Exit; 3073 } 3074 3075 if (fp->ff_unallocblocks == 0) { 3076 /* Protect extents b-tree and allocation bitmap */ 3077 lockflags = SFL_BITMAP; 3078 if (overflow_extents(fp)) 3079 lockflags |= SFL_EXTENTS; 3080 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3081 3082 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 3083 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); 3084 3085 hfs_systemfile_unlock(hfsmp, lockflags); 3086 } 3087 if (hfsmp->jnl) { 3088 if (retval == 0) { 3089 fp->ff_size = length; 3090 } 3091 if (skipupdate) { 3092 (void) hfs_minorupdate(vp); 3093 } 3094 else { 3095 (void) hfs_update(vp, TRUE); 3096 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3097 } 3098 } 3099 hfs_end_transaction(hfsmp); 3100 3101 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 3102 if (retval) 3103 goto Err_Exit; 3104#if QUOTA 3105 /* These are bytesreleased */ 3106 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); 3107#endif /* QUOTA */ 3108 } 3109 /* Only set update flag if the logical length changes */ 3110 if ((off_t)fp->ff_size != length) 3111 cp->c_touch_modtime = TRUE; 3112 fp->ff_size = length; 3113 } 3114 if (cp->c_mode & (S_ISUID | S_ISGID)) { 3115 if (!vfs_context_issuser(context)) { 3116 cp->c_mode &= ~(S_ISUID | S_ISGID); 3117 skipupdate = 0; 3118 } 3119 } 3120 if (skipupdate) { 3121 retval = hfs_minorupdate(vp); 3122 } 3123 else { 3124 cp->c_touch_chgtime = TRUE; /* status changed */ 3125 cp->c_touch_modtime = TRUE; /* file data was modified */ 3126 retval = hfs_update(vp, MNT_WAIT); 3127 } 3128 if (retval) { 3129 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, 3130 -1, -1, -1, retval, 0); 3131 } 3132 3133Err_Exit: 3134 3135 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END, 3136 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0); 3137 3138 return (retval); 3139} 3140 3141/* 3142 * Preparation which must be done prior to deleting the catalog record 3143 * of a file or directory. In order to make the on-disk as safe as possible, 3144 * we remove the catalog entry before releasing the bitmap blocks and the 3145 * overflow extent records. However, some work must be done prior to deleting 3146 * the catalog record. 3147 * 3148 * When calling this function, the cnode must exist both in memory and on-disk. 3149 * If there are both resource fork and data fork vnodes, this function should 3150 * be called on both. 3151 */ 3152 3153int 3154hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) { 3155 3156 struct filefork *fp = VTOF(vp); 3157 struct cnode *cp = VTOC(vp); 3158#if QUOTA 3159 int retval = 0; 3160#endif /* QUOTA */ 3161 3162 /* Cannot truncate an HFS directory! */ 3163 if (vnode_isdir(vp)) { 3164 return (EISDIR); 3165 } 3166 3167 /* 3168 * See the comment below in hfs_truncate for why we need to call 3169 * setsize here. Essentially we want to avoid pending IO if we 3170 * already know that the blocks are going to be released here. 3171 * This function is only called when totally removing all storage for a file, so 3172 * we can take a shortcut and immediately setsize (0); 3173 */ 3174 ubc_setsize(vp, 0); 3175 3176 /* This should only happen with a corrupt filesystem */ 3177 if ((off_t)fp->ff_size < 0) 3178 return (EINVAL); 3179 3180 /* 3181 * We cannot just check if fp->ff_size == length (as an optimization) 3182 * since there may be extra physical blocks that also need truncation. 3183 */ 3184#if QUOTA 3185 if ((retval = hfs_getinoquota(cp))) { 3186 return(retval); 3187 } 3188#endif /* QUOTA */ 3189 3190 /* Wipe out any invalid ranges which have yet to be backed by disk */ 3191 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges); 3192 3193 /* 3194 * Account for any unmapped blocks. Since we're deleting the 3195 * entire file, we don't have to worry about just shrinking 3196 * to a smaller number of borrowed blocks. 3197 */ 3198 if (fp->ff_unallocblocks > 0) { 3199 u_int32_t loanedBlocks; 3200 3201 HFS_MOUNT_LOCK(hfsmp, TRUE); 3202 3203 loanedBlocks = fp->ff_unallocblocks; 3204 cp->c_blocks -= loanedBlocks; 3205 fp->ff_blocks -= loanedBlocks; 3206 fp->ff_unallocblocks = 0; 3207 3208 hfsmp->loanedBlocks -= loanedBlocks; 3209 3210 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 3211 } 3212 3213 return 0; 3214} 3215 3216 3217/* 3218 * Special wrapper around calling TruncateFileC. This function is useable 3219 * even when the catalog record does not exist any longer, making it ideal 3220 * for use when deleting a file. The simplification here is that we know 3221 * that we are releasing all blocks. 3222 * 3223 * Note that this function may be called when there is no vnode backing 3224 * the file fork in question. We may call this from hfs_vnop_inactive 3225 * to clear out resource fork data (and may not want to clear out the data 3226 * fork yet). As a result, we pointer-check both sets of inputs before 3227 * doing anything with them. 3228 * 3229 * The caller is responsible for saving off a copy of the filefork(s) 3230 * embedded within the cnode prior to calling this function. The pointers 3231 * supplied as arguments must be valid even if the cnode is no longer valid. 3232 */ 3233 3234int 3235hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 3236 struct filefork *rsrcfork, u_int32_t fileid) { 3237 3238 off_t filebytes; 3239 u_int32_t fileblocks; 3240 int blksize = 0; 3241 int error = 0; 3242 int lockflags; 3243 3244 blksize = hfsmp->blockSize; 3245 3246 /* Data Fork */ 3247 if ((datafork != NULL) && (datafork->ff_blocks > 0)) { 3248 fileblocks = datafork->ff_blocks; 3249 filebytes = (off_t)fileblocks * (off_t)blksize; 3250 3251 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3252 3253 while (filebytes > 0) { 3254 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) { 3255 filebytes -= HFS_BIGFILE_SIZE; 3256 } else { 3257 filebytes = 0; 3258 } 3259 3260 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3261 if (hfs_start_transaction(hfsmp) != 0) { 3262 error = EINVAL; 3263 break; 3264 } 3265 3266 if (datafork->ff_unallocblocks == 0) { 3267 /* Protect extents b-tree and allocation bitmap */ 3268 lockflags = SFL_BITMAP; 3269 if (overflow_extents(datafork)) 3270 lockflags |= SFL_EXTENTS; 3271 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3272 3273 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false)); 3274 3275 hfs_systemfile_unlock(hfsmp, lockflags); 3276 } 3277 if (error == 0) { 3278 datafork->ff_size = filebytes; 3279 } 3280 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3281 3282 /* Finish the transaction and start over if necessary */ 3283 hfs_end_transaction(hfsmp); 3284 3285 if (error) { 3286 break; 3287 } 3288 } 3289 } 3290 3291 /* Resource fork */ 3292 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) { 3293 fileblocks = rsrcfork->ff_blocks; 3294 filebytes = (off_t)fileblocks * (off_t)blksize; 3295 3296 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3297 3298 while (filebytes > 0) { 3299 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) { 3300 filebytes -= HFS_BIGFILE_SIZE; 3301 } else { 3302 filebytes = 0; 3303 } 3304 3305 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3306 if (hfs_start_transaction(hfsmp) != 0) { 3307 error = EINVAL; 3308 break; 3309 } 3310 3311 if (rsrcfork->ff_unallocblocks == 0) { 3312 /* Protect extents b-tree and allocation bitmap */ 3313 lockflags = SFL_BITMAP; 3314 if (overflow_extents(rsrcfork)) 3315 lockflags |= SFL_EXTENTS; 3316 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3317 3318 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false)); 3319 3320 hfs_systemfile_unlock(hfsmp, lockflags); 3321 } 3322 if (error == 0) { 3323 rsrcfork->ff_size = filebytes; 3324 } 3325 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3326 3327 /* Finish the transaction and start over if necessary */ 3328 hfs_end_transaction(hfsmp); 3329 3330 if (error) { 3331 break; 3332 } 3333 } 3334 } 3335 3336 return error; 3337} 3338 3339 3340/* 3341 * Truncate a cnode to at most length size, freeing (or adding) the 3342 * disk blocks. 3343 */ 3344int 3345hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, 3346 int skipupdate, vfs_context_t context) 3347{ 3348 struct filefork *fp = VTOF(vp); 3349 off_t filebytes; 3350 u_int32_t fileblocks; 3351 int blksize, error = 0; 3352 struct cnode *cp = VTOC(vp); 3353 3354 /* Cannot truncate an HFS directory! */ 3355 if (vnode_isdir(vp)) { 3356 return (EISDIR); 3357 } 3358 /* A swap file cannot change size. */ 3359 if (vnode_isswap(vp) && (length != 0)) { 3360 return (EPERM); 3361 } 3362 3363 blksize = VTOVCB(vp)->blockSize; 3364 fileblocks = fp->ff_blocks; 3365 filebytes = (off_t)fileblocks * (off_t)blksize; 3366 3367 // 3368 // Have to do this here so that we don't wind up with 3369 // i/o pending for blocks that are about to be released 3370 // if we truncate the file. 3371 // 3372 // If skipsetsize is set, then the caller is responsible 3373 // for the ubc_setsize. 3374 // 3375 // Even if skipsetsize is set, if the length is zero we 3376 // want to call ubc_setsize() because as of SnowLeopard 3377 // it will no longer cause any page-ins and it will drop 3378 // any dirty pages so that we don't do any i/o that we 3379 // don't have to. This also prevents a race where i/o 3380 // for truncated blocks may overwrite later data if the 3381 // blocks get reallocated to a different file. 3382 // 3383 if (!skipsetsize || length == 0) 3384 ubc_setsize(vp, length); 3385 3386 // have to loop truncating or growing files that are 3387 // really big because otherwise transactions can get 3388 // enormous and consume too many kernel resources. 3389 3390 if (length < filebytes) { 3391 while (filebytes > length) { 3392 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { 3393 filebytes -= HFS_BIGFILE_SIZE; 3394 } else { 3395 filebytes = length; 3396 } 3397 cp->c_flag |= C_FORCEUPDATE; 3398 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); 3399 if (error) 3400 break; 3401 } 3402 } else if (length > filebytes) { 3403 while (filebytes < length) { 3404 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { 3405 filebytes += HFS_BIGFILE_SIZE; 3406 } else { 3407 filebytes = length; 3408 } 3409 cp->c_flag |= C_FORCEUPDATE; 3410 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); 3411 if (error) 3412 break; 3413 } 3414 } else /* Same logical size */ { 3415 3416 error = do_hfs_truncate(vp, length, flags, skipupdate, context); 3417 } 3418 /* Files that are changing size are not hot file candidates. */ 3419 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 3420 fp->ff_bytesread = 0; 3421 } 3422 3423 return (error); 3424} 3425 3426 3427 3428/* 3429 * Preallocate file storage space. 3430 */ 3431int 3432hfs_vnop_allocate(struct vnop_allocate_args /* { 3433 vnode_t a_vp; 3434 off_t a_length; 3435 u_int32_t a_flags; 3436 off_t *a_bytesallocated; 3437 off_t a_offset; 3438 vfs_context_t a_context; 3439 } */ *ap) 3440{ 3441 struct vnode *vp = ap->a_vp; 3442 struct cnode *cp; 3443 struct filefork *fp; 3444 ExtendedVCB *vcb; 3445 off_t length = ap->a_length; 3446 off_t startingPEOF; 3447 off_t moreBytesRequested; 3448 off_t actualBytesAdded; 3449 off_t filebytes; 3450 u_int32_t fileblocks; 3451 int retval, retval2; 3452 u_int32_t blockHint; 3453 u_int32_t extendFlags; /* For call to ExtendFileC */ 3454 struct hfsmount *hfsmp; 3455 kauth_cred_t cred = vfs_context_ucred(ap->a_context); 3456 int lockflags; 3457 time_t orig_ctime; 3458 3459 *(ap->a_bytesallocated) = 0; 3460 3461 if (!vnode_isreg(vp)) 3462 return (EISDIR); 3463 if (length < (off_t)0) 3464 return (EINVAL); 3465 3466 cp = VTOC(vp); 3467 3468 orig_ctime = VTOC(vp)->c_ctime; 3469 3470 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); 3471 3472 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); 3473 3474 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { 3475 goto Err_Exit; 3476 } 3477 3478 fp = VTOF(vp); 3479 hfsmp = VTOHFS(vp); 3480 vcb = VTOVCB(vp); 3481 3482 fileblocks = fp->ff_blocks; 3483 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; 3484 3485 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) { 3486 retval = EINVAL; 3487 goto Err_Exit; 3488 } 3489 3490 /* Fill in the flags word for the call to Extend the file */ 3491 3492 extendFlags = kEFNoClumpMask; 3493 if (ap->a_flags & ALLOCATECONTIG) 3494 extendFlags |= kEFContigMask; 3495 if (ap->a_flags & ALLOCATEALL) 3496 extendFlags |= kEFAllMask; 3497 if (cred && suser(cred, NULL) != 0) 3498 extendFlags |= kEFReserveMask; 3499 if (hfs_virtualmetafile(cp)) 3500 extendFlags |= kEFMetadataMask; 3501 3502 retval = E_NONE; 3503 blockHint = 0; 3504 startingPEOF = filebytes; 3505 3506 if (ap->a_flags & ALLOCATEFROMPEOF) 3507 length += filebytes; 3508 else if (ap->a_flags & ALLOCATEFROMVOL) 3509 blockHint = ap->a_offset / VTOVCB(vp)->blockSize; 3510 3511 /* If no changes are necesary, then we're done */ 3512 if (filebytes == length) 3513 goto Std_Exit; 3514 3515 /* 3516 * Lengthen the size of the file. We must ensure that the 3517 * last byte of the file is allocated. Since the smallest 3518 * value of filebytes is 0, length will be at least 1. 3519 */ 3520 if (length > filebytes) { 3521 off_t total_bytes_added = 0, orig_request_size; 3522 3523 orig_request_size = moreBytesRequested = length - filebytes; 3524 3525#if QUOTA 3526 retval = hfs_chkdq(cp, 3527 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 3528 cred, 0); 3529 if (retval) 3530 goto Err_Exit; 3531 3532#endif /* QUOTA */ 3533 /* 3534 * Metadata zone checks. 3535 */ 3536 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { 3537 /* 3538 * Allocate Journal and Quota files in metadata zone. 3539 */ 3540 if (hfs_virtualmetafile(cp)) { 3541 blockHint = hfsmp->hfs_metazone_start; 3542 } else if ((blockHint >= hfsmp->hfs_metazone_start) && 3543 (blockHint <= hfsmp->hfs_metazone_end)) { 3544 /* 3545 * Move blockHint outside metadata zone. 3546 */ 3547 blockHint = hfsmp->hfs_metazone_end + 1; 3548 } 3549 } 3550 3551 3552 while ((length > filebytes) && (retval == E_NONE)) { 3553 off_t bytesRequested; 3554 3555 if (hfs_start_transaction(hfsmp) != 0) { 3556 retval = EINVAL; 3557 goto Err_Exit; 3558 } 3559 3560 /* Protect extents b-tree and allocation bitmap */ 3561 lockflags = SFL_BITMAP; 3562 if (overflow_extents(fp)) 3563 lockflags |= SFL_EXTENTS; 3564 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3565 3566 if (moreBytesRequested >= HFS_BIGFILE_SIZE) { 3567 bytesRequested = HFS_BIGFILE_SIZE; 3568 } else { 3569 bytesRequested = moreBytesRequested; 3570 } 3571 3572 if (extendFlags & kEFContigMask) { 3573 // if we're on a sparse device, this will force it to do a 3574 // full scan to find the space needed. 3575 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN; 3576 } 3577 3578 retval = MacToVFSError(ExtendFileC(vcb, 3579 (FCB*)fp, 3580 bytesRequested, 3581 blockHint, 3582 extendFlags, 3583 &actualBytesAdded)); 3584 3585 if (retval == E_NONE) { 3586 *(ap->a_bytesallocated) += actualBytesAdded; 3587 total_bytes_added += actualBytesAdded; 3588 moreBytesRequested -= actualBytesAdded; 3589 if (blockHint != 0) { 3590 blockHint += actualBytesAdded / vcb->blockSize; 3591 } 3592 } 3593 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 3594 3595 hfs_systemfile_unlock(hfsmp, lockflags); 3596 3597 if (hfsmp->jnl) { 3598 (void) hfs_update(vp, TRUE); 3599 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3600 } 3601 3602 hfs_end_transaction(hfsmp); 3603 } 3604 3605 3606 /* 3607 * if we get an error and no changes were made then exit 3608 * otherwise we must do the hfs_update to reflect the changes 3609 */ 3610 if (retval && (startingPEOF == filebytes)) 3611 goto Err_Exit; 3612 3613 /* 3614 * Adjust actualBytesAdded to be allocation block aligned, not 3615 * clump size aligned. 3616 * NOTE: So what we are reporting does not affect reality 3617 * until the file is closed, when we truncate the file to allocation 3618 * block size. 3619 */ 3620 if (total_bytes_added != 0 && orig_request_size < total_bytes_added) 3621 *(ap->a_bytesallocated) = 3622 roundup(orig_request_size, (off_t)vcb->blockSize); 3623 3624 } else { /* Shorten the size of the file */ 3625 3626 if (fp->ff_size > length) { 3627 /* 3628 * Any buffers that are past the truncation point need to be 3629 * invalidated (to maintain buffer cache consistency). 3630 */ 3631 } 3632 3633 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); 3634 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 3635 3636 /* 3637 * if we get an error and no changes were made then exit 3638 * otherwise we must do the hfs_update to reflect the changes 3639 */ 3640 if (retval && (startingPEOF == filebytes)) goto Err_Exit; 3641#if QUOTA 3642 /* These are bytesreleased */ 3643 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0); 3644#endif /* QUOTA */ 3645 3646 if (fp->ff_size > filebytes) { 3647 fp->ff_size = filebytes; 3648 3649 hfs_unlock(cp); 3650 ubc_setsize(vp, fp->ff_size); 3651 hfs_lock(cp, HFS_FORCE_LOCK); 3652 } 3653 } 3654 3655Std_Exit: 3656 cp->c_touch_chgtime = TRUE; 3657 cp->c_touch_modtime = TRUE; 3658 retval2 = hfs_update(vp, MNT_WAIT); 3659 3660 if (retval == 0) 3661 retval = retval2; 3662Err_Exit: 3663 hfs_unlock_truncate(cp, 0); 3664 hfs_unlock(cp); 3665 return (retval); 3666} 3667 3668 3669/* 3670 * Pagein for HFS filesystem 3671 */ 3672int 3673hfs_vnop_pagein(struct vnop_pagein_args *ap) 3674/* 3675 struct vnop_pagein_args { 3676 vnode_t a_vp, 3677 upl_t a_pl, 3678 vm_offset_t a_pl_offset, 3679 off_t a_f_offset, 3680 size_t a_size, 3681 int a_flags 3682 vfs_context_t a_context; 3683 }; 3684*/ 3685{ 3686 vnode_t vp; 3687 struct cnode *cp; 3688 struct filefork *fp; 3689 int error = 0; 3690 upl_t upl; 3691 upl_page_info_t *pl; 3692 off_t f_offset; 3693 int offset; 3694 int isize; 3695 int pg_index; 3696 boolean_t truncate_lock_held = FALSE; 3697 boolean_t file_converted = FALSE; 3698 kern_return_t kret; 3699 3700 vp = ap->a_vp; 3701 cp = VTOC(vp); 3702 fp = VTOF(vp); 3703 3704#if CONFIG_PROTECT 3705 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) { 3706 return error; 3707 } 3708#endif /* CONFIG_PROTECT */ 3709 3710 if (ap->a_pl != NULL) { 3711 /* 3712 * this can only happen for swap files now that 3713 * we're asking for V2 paging behavior... 3714 * so don't need to worry about decompression, or 3715 * keeping track of blocks read or taking the truncate lock 3716 */ 3717 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, 3718 ap->a_size, (off_t)fp->ff_size, ap->a_flags); 3719 goto pagein_done; 3720 } 3721 3722retry_pagein: 3723 /* 3724 * take truncate lock (shared/recursive) to guard against 3725 * zero-fill thru fsync interfering, but only for v2 3726 * 3727 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 3728 * lock shared and we are allowed to recurse 1 level if this thread already 3729 * owns the lock exclusively... this can legally occur 3730 * if we are doing a shrinking ftruncate against a file 3731 * that is mapped private, and the pages being truncated 3732 * do not currently exist in the cache... in that case 3733 * we will have to page-in the missing pages in order 3734 * to provide them to the private mapping... we must 3735 * also call hfs_unlock_truncate with a postive been_recursed 3736 * arg to indicate that if we have recursed, there is no need to drop 3737 * the lock. Allowing this simple recursion is necessary 3738 * in order to avoid a certain deadlock... since the ftruncate 3739 * already holds the truncate lock exclusively, if we try 3740 * to acquire it shared to protect the pagein path, we will 3741 * hang this thread 3742 * 3743 * NOTE: The if () block below is a workaround in order to prevent a 3744 * VM deadlock. See rdar://7853471. 3745 * 3746 * If we are in a forced unmount, then launchd will still have the 3747 * dyld_shared_cache file mapped as it is trying to reboot. If we 3748 * take the truncate lock here to service a page fault, then our 3749 * thread could deadlock with the forced-unmount. The forced unmount 3750 * thread will try to reclaim the dyld_shared_cache vnode, but since it's 3751 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount 3752 * thread will think it needs to copy all of the data out of the file 3753 * and into a VM copy object. If we hold the cnode lock here, then that 3754 * VM operation will not be able to proceed, because we'll set a busy page 3755 * before attempting to grab the lock. Note that this isn't as simple as "don't 3756 * call ubc_setsize" because doing that would just shift the problem to the 3757 * ubc_msync done before the vnode is reclaimed. 3758 * 3759 * So, if a forced unmount on this volume is in flight AND the cnode is 3760 * marked C_DELETED, then just go ahead and do the page in without taking 3761 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file 3762 * that is not going to be available on the next mount, this seems like a 3763 * OK solution from a correctness point of view, even though it is hacky. 3764 */ 3765 if (vfs_isforce(vp->v_mount)) { 3766 if (cp->c_flag & C_DELETED) { 3767 /* If we don't get it, then just go ahead and operate without the lock */ 3768 truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK); 3769 } 3770 } 3771 else { 3772 hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK); 3773 truncate_lock_held = TRUE; 3774 } 3775 3776 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 3777 3778 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 3779 error = EINVAL; 3780 goto pagein_done; 3781 } 3782 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1); 3783 3784 isize = ap->a_size; 3785 3786 /* 3787 * Scan from the back to find the last page in the UPL, so that we 3788 * aren't looking at a UPL that may have already been freed by the 3789 * preceding aborts/completions. 3790 */ 3791 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 3792 if (upl_page_present(pl, --pg_index)) 3793 break; 3794 if (pg_index == 0) { 3795 /* 3796 * no absent pages were found in the range specified 3797 * just abort the UPL to get rid of it and then we're done 3798 */ 3799 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 3800 goto pagein_done; 3801 } 3802 } 3803 /* 3804 * initialize the offset variables before we touch the UPL. 3805 * f_offset is the position into the file, in bytes 3806 * offset is the position into the UPL, in bytes 3807 * pg_index is the pg# of the UPL we're operating on 3808 * isize is the offset into the UPL of the last page that is present. 3809 */ 3810 isize = ((pg_index + 1) * PAGE_SIZE); 3811 pg_index = 0; 3812 offset = 0; 3813 f_offset = ap->a_f_offset; 3814 3815 while (isize) { 3816 int xsize; 3817 int num_of_pages; 3818 3819 if ( !upl_page_present(pl, pg_index)) { 3820 /* 3821 * we asked for RET_ONLY_ABSENT, so it's possible 3822 * to get back empty slots in the UPL. 3823 * just skip over them 3824 */ 3825 f_offset += PAGE_SIZE; 3826 offset += PAGE_SIZE; 3827 isize -= PAGE_SIZE; 3828 pg_index++; 3829 3830 continue; 3831 } 3832 /* 3833 * We know that we have at least one absent page. 3834 * Now checking to see how many in a row we have 3835 */ 3836 num_of_pages = 1; 3837 xsize = isize - PAGE_SIZE; 3838 3839 while (xsize) { 3840 if ( !upl_page_present(pl, pg_index + num_of_pages)) 3841 break; 3842 num_of_pages++; 3843 xsize -= PAGE_SIZE; 3844 } 3845 xsize = num_of_pages * PAGE_SIZE; 3846 3847#if HFS_COMPRESSION 3848 if (VNODE_IS_RSRC(vp)) { 3849 /* allow pageins of the resource fork */ 3850 } else { 3851 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 3852 3853 if (compressed) { 3854 if (truncate_lock_held) { 3855 /* 3856 * can't hold the truncate lock when calling into the decmpfs layer 3857 * since it calls back into this layer... even though we're only 3858 * holding the lock in shared mode, and the re-entrant path only 3859 * takes the lock shared, we can deadlock if some other thread 3860 * tries to grab the lock exclusively in between. 3861 */ 3862 hfs_unlock_truncate(cp, 1); 3863 truncate_lock_held = FALSE; 3864 } 3865 ap->a_pl = upl; 3866 ap->a_pl_offset = offset; 3867 ap->a_f_offset = f_offset; 3868 ap->a_size = xsize; 3869 3870 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp)); 3871 /* 3872 * note that decpfs_pagein_compressed can change the state of 3873 * 'compressed'... it will set it to 0 if the file is no longer 3874 * compressed once the compression lock is successfully taken 3875 * i.e. we would block on that lock while the file is being inflated 3876 */ 3877 if (compressed) { 3878 if (error == 0) { 3879 /* successful page-in, update the access time */ 3880 VTOC(vp)->c_touch_acctime = TRUE; 3881 3882 /* compressed files are not hot file candidates */ 3883 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 3884 fp->ff_bytesread = 0; 3885 } 3886 } else if (error == EAGAIN) { 3887 /* 3888 * EAGAIN indicates someone else already holds the compression lock... 3889 * to avoid deadlocking, we'll abort this range of pages with an 3890 * indication that the pagein needs to be redriven 3891 */ 3892 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); 3893 } 3894 goto pagein_next_range; 3895 } 3896 else { 3897 /* 3898 * Set file_converted only if the file became decompressed while we were 3899 * paging in. If it were still compressed, we would re-start the loop using the goto 3900 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein 3901 * condition below, since we could have avoided taking the truncate lock to prevent 3902 * a deadlock in the force unmount case. 3903 */ 3904 file_converted = TRUE; 3905 } 3906 } 3907 if (file_converted == TRUE) { 3908 /* 3909 * the file was converted back to a regular file after we first saw it as compressed 3910 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over 3911 * reset a_size so that we consider what remains of the original request 3912 * and null out a_upl and a_pl_offset. 3913 * 3914 * We should only be able to get into this block if the decmpfs_pagein_compressed 3915 * successfully decompressed the range in question for this file. 3916 */ 3917 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); 3918 3919 ap->a_size = isize; 3920 ap->a_pl = NULL; 3921 ap->a_pl_offset = 0; 3922 3923 /* Reset file_converted back to false so that we don't infinite-loop. */ 3924 file_converted = FALSE; 3925 goto retry_pagein; 3926 } 3927 } 3928#endif 3929 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags); 3930 3931 /* 3932 * Keep track of blocks read. 3933 */ 3934 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { 3935 int bytesread; 3936 int took_cnode_lock = 0; 3937 3938 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE) 3939 bytesread = fp->ff_size; 3940 else 3941 bytesread = xsize; 3942 3943 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 3944 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) { 3945 hfs_lock(cp, HFS_FORCE_LOCK); 3946 took_cnode_lock = 1; 3947 } 3948 /* 3949 * If this file hasn't been seen since the start of 3950 * the current sampling period then start over. 3951 */ 3952 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { 3953 struct timeval tv; 3954 3955 fp->ff_bytesread = bytesread; 3956 microtime(&tv); 3957 cp->c_atime = tv.tv_sec; 3958 } else { 3959 fp->ff_bytesread += bytesread; 3960 } 3961 cp->c_touch_acctime = TRUE; 3962 if (took_cnode_lock) 3963 hfs_unlock(cp); 3964 } 3965pagein_next_range: 3966 f_offset += xsize; 3967 offset += xsize; 3968 isize -= xsize; 3969 pg_index += num_of_pages; 3970 3971 error = 0; 3972 } 3973 3974pagein_done: 3975 if (truncate_lock_held == TRUE) { 3976 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */ 3977 hfs_unlock_truncate(cp, 1); 3978 } 3979 3980 return (error); 3981} 3982 3983/* 3984 * Pageout for HFS filesystem. 3985 */ 3986int 3987hfs_vnop_pageout(struct vnop_pageout_args *ap) 3988/* 3989 struct vnop_pageout_args { 3990 vnode_t a_vp, 3991 upl_t a_pl, 3992 vm_offset_t a_pl_offset, 3993 off_t a_f_offset, 3994 size_t a_size, 3995 int a_flags 3996 vfs_context_t a_context; 3997 }; 3998*/ 3999{ 4000 vnode_t vp = ap->a_vp; 4001 struct cnode *cp; 4002 struct filefork *fp; 4003 int retval = 0; 4004 off_t filesize; 4005 upl_t upl; 4006 upl_page_info_t* pl; 4007 vm_offset_t a_pl_offset; 4008 int a_flags; 4009 int is_pageoutv2 = 0; 4010 kern_return_t kret; 4011 4012 cp = VTOC(vp); 4013 fp = VTOF(vp); 4014 4015 /* 4016 * Figure out where the file ends, for pageout purposes. If 4017 * ff_new_size > ff_size, then we're in the middle of extending the 4018 * file via a write, so it is safe (and necessary) that we be able 4019 * to pageout up to that point. 4020 */ 4021 filesize = fp->ff_size; 4022 if (fp->ff_new_size > filesize) 4023 filesize = fp->ff_new_size; 4024 4025 a_flags = ap->a_flags; 4026 a_pl_offset = ap->a_pl_offset; 4027 4028 /* 4029 * we can tell if we're getting the new or old behavior from the UPL 4030 */ 4031 if ((upl = ap->a_pl) == NULL) { 4032 int request_flags; 4033 4034 is_pageoutv2 = 1; 4035 /* 4036 * we're in control of any UPL we commit 4037 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 4038 */ 4039 a_flags &= ~UPL_NOCOMMIT; 4040 a_pl_offset = 0; 4041 4042 /* 4043 * For V2 semantics, we want to take the cnode truncate lock 4044 * shared to guard against the file size changing via zero-filling. 4045 * 4046 * However, we have to be careful because we may be invoked 4047 * via the ubc_msync path to write out dirty mmap'd pages 4048 * in response to a lock event on a content-protected 4049 * filesystem (e.g. to write out class A files). 4050 * As a result, we want to take the truncate lock 'SHARED' with 4051 * the mini-recursion locktype so that we don't deadlock/panic 4052 * because we may be already holding the truncate lock exclusive to force any other 4053 * IOs to have blocked behind us. 4054 */ 4055 hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK); 4056 4057 if (a_flags & UPL_MSYNC) { 4058 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; 4059 } 4060 else { 4061 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; 4062 } 4063 4064 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 4065 4066 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 4067 retval = EINVAL; 4068 goto pageout_done; 4069 } 4070 } 4071 /* 4072 * from this point forward upl points at the UPL we're working with 4073 * it was either passed in or we succesfully created it 4074 */ 4075 4076 /* 4077 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own 4078 * UPL instead of relying on the UPL passed into us. We go ahead and do that here, 4079 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for 4080 * N dirty ranges in the UPL. Note that this is almost a direct copy of the 4081 * logic in vnode_pageout except that we need to do it after grabbing the truncate 4082 * lock in HFS so that we don't lock invert ourselves. 4083 * 4084 * Note that we can still get into this function on behalf of the default pager with 4085 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above 4086 * since fsync and other writing threads will grab the locks, then mark the 4087 * relevant pages as busy. But the pageout codepath marks the pages as busy, 4088 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So 4089 * we do not try to grab anything for the pre-V2 case, which should only be accessed 4090 * by the paging/VM system. 4091 */ 4092 4093 if (is_pageoutv2) { 4094 off_t f_offset; 4095 int offset; 4096 int isize; 4097 int pg_index; 4098 int error; 4099 int error_ret = 0; 4100 4101 isize = ap->a_size; 4102 f_offset = ap->a_f_offset; 4103 4104 /* 4105 * Scan from the back to find the last page in the UPL, so that we 4106 * aren't looking at a UPL that may have already been freed by the 4107 * preceding aborts/completions. 4108 */ 4109 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 4110 if (upl_page_present(pl, --pg_index)) 4111 break; 4112 if (pg_index == 0) { 4113 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 4114 goto pageout_done; 4115 } 4116 } 4117 4118 /* 4119 * initialize the offset variables before we touch the UPL. 4120 * a_f_offset is the position into the file, in bytes 4121 * offset is the position into the UPL, in bytes 4122 * pg_index is the pg# of the UPL we're operating on. 4123 * isize is the offset into the UPL of the last non-clean page. 4124 */ 4125 isize = ((pg_index + 1) * PAGE_SIZE); 4126 4127 offset = 0; 4128 pg_index = 0; 4129 4130 while (isize) { 4131 int xsize; 4132 int num_of_pages; 4133 4134 if ( !upl_page_present(pl, pg_index)) { 4135 /* 4136 * we asked for RET_ONLY_DIRTY, so it's possible 4137 * to get back empty slots in the UPL. 4138 * just skip over them 4139 */ 4140 f_offset += PAGE_SIZE; 4141 offset += PAGE_SIZE; 4142 isize -= PAGE_SIZE; 4143 pg_index++; 4144 4145 continue; 4146 } 4147 if ( !upl_dirty_page(pl, pg_index)) { 4148 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl); 4149 } 4150 4151 /* 4152 * We know that we have at least one dirty page. 4153 * Now checking to see how many in a row we have 4154 */ 4155 num_of_pages = 1; 4156 xsize = isize - PAGE_SIZE; 4157 4158 while (xsize) { 4159 if ( !upl_dirty_page(pl, pg_index + num_of_pages)) 4160 break; 4161 num_of_pages++; 4162 xsize -= PAGE_SIZE; 4163 } 4164 xsize = num_of_pages * PAGE_SIZE; 4165 4166 if (!vnode_isswap(vp)) { 4167 off_t end_of_range; 4168 int tooklock; 4169 4170 tooklock = 0; 4171 4172 if (cp->c_lockowner != current_thread()) { 4173 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { 4174 /* 4175 * we're in the v2 path, so we are the 4176 * owner of the UPL... we may have already 4177 * processed some of the UPL, so abort it 4178 * from the current working offset to the 4179 * end of the UPL 4180 */ 4181 ubc_upl_abort_range(upl, 4182 offset, 4183 ap->a_size - offset, 4184 UPL_ABORT_FREE_ON_EMPTY); 4185 goto pageout_done; 4186 } 4187 tooklock = 1; 4188 } 4189 end_of_range = f_offset + xsize - 1; 4190 4191 if (end_of_range >= filesize) { 4192 end_of_range = (off_t)(filesize - 1); 4193 } 4194 if (f_offset < filesize) { 4195 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); 4196 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4197 } 4198 if (tooklock) { 4199 hfs_unlock(cp); 4200 } 4201 } 4202 if ((error = cluster_pageout(vp, upl, offset, f_offset, 4203 xsize, filesize, a_flags))) { 4204 if (error_ret == 0) 4205 error_ret = error; 4206 } 4207 f_offset += xsize; 4208 offset += xsize; 4209 isize -= xsize; 4210 pg_index += num_of_pages; 4211 } 4212 /* capture errnos bubbled out of cluster_pageout if they occurred */ 4213 if (error_ret != 0) { 4214 retval = error_ret; 4215 } 4216 } /* end block for v2 pageout behavior */ 4217 else { 4218 if (!vnode_isswap(vp)) { 4219 off_t end_of_range; 4220 int tooklock = 0; 4221 4222 if (cp->c_lockowner != current_thread()) { 4223 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { 4224 if (!(a_flags & UPL_NOCOMMIT)) { 4225 ubc_upl_abort_range(upl, 4226 a_pl_offset, 4227 ap->a_size, 4228 UPL_ABORT_FREE_ON_EMPTY); 4229 } 4230 goto pageout_done; 4231 } 4232 tooklock = 1; 4233 } 4234 end_of_range = ap->a_f_offset + ap->a_size - 1; 4235 4236 if (end_of_range >= filesize) { 4237 end_of_range = (off_t)(filesize - 1); 4238 } 4239 if (ap->a_f_offset < filesize) { 4240 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); 4241 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4242 } 4243 4244 if (tooklock) { 4245 hfs_unlock(cp); 4246 } 4247 } 4248 /* 4249 * just call cluster_pageout for old pre-v2 behavior 4250 */ 4251 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset, 4252 ap->a_size, filesize, a_flags); 4253 } 4254 4255 /* 4256 * If data was written, update the modification time of the file. 4257 * If setuid or setgid bits are set and this process is not the 4258 * superuser then clear the setuid and setgid bits as a precaution 4259 * against tampering. 4260 */ 4261 if (retval == 0) { 4262 cp->c_touch_modtime = TRUE; 4263 cp->c_touch_chgtime = TRUE; 4264 if ((cp->c_mode & (S_ISUID | S_ISGID)) && 4265 (vfs_context_suser(ap->a_context) != 0)) { 4266 hfs_lock(cp, HFS_FORCE_LOCK); 4267 cp->c_mode &= ~(S_ISUID | S_ISGID); 4268 hfs_unlock(cp); 4269 } 4270 } 4271 4272pageout_done: 4273 if (is_pageoutv2) { 4274 /* 4275 * Release the truncate lock. Note that because 4276 * we may have taken the lock recursively by 4277 * being invoked via ubc_msync due to lockdown, 4278 * we should release it recursively, too. 4279 */ 4280 hfs_unlock_truncate(cp, 1); 4281 } 4282 return (retval); 4283} 4284 4285/* 4286 * Intercept B-Tree node writes to unswap them if necessary. 4287 */ 4288int 4289hfs_vnop_bwrite(struct vnop_bwrite_args *ap) 4290{ 4291 int retval = 0; 4292 register struct buf *bp = ap->a_bp; 4293 register struct vnode *vp = buf_vnode(bp); 4294 BlockDescriptor block; 4295 4296 /* Trap B-Tree writes */ 4297 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || 4298 (VTOC(vp)->c_fileid == kHFSCatalogFileID) || 4299 (VTOC(vp)->c_fileid == kHFSAttributesFileID) || 4300 (vp == VTOHFS(vp)->hfc_filevp)) { 4301 4302 /* 4303 * Swap and validate the node if it is in native byte order. 4304 * This is always be true on big endian, so we always validate 4305 * before writing here. On little endian, the node typically has 4306 * been swapped and validated when it was written to the journal, 4307 * so we won't do anything here. 4308 */ 4309 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) { 4310 /* Prepare the block pointer */ 4311 block.blockHeader = bp; 4312 block.buffer = (char *)buf_dataptr(bp); 4313 block.blockNum = buf_lblkno(bp); 4314 /* not found in cache ==> came from disk */ 4315 block.blockReadFromDisk = (buf_fromcache(bp) == 0); 4316 block.blockSize = buf_count(bp); 4317 4318 /* Endian un-swap B-Tree node */ 4319 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false); 4320 if (retval) 4321 panic("hfs_vnop_bwrite: about to write corrupt node!\n"); 4322 } 4323 } 4324 4325 /* This buffer shouldn't be locked anymore but if it is clear it */ 4326 if ((buf_flags(bp) & B_LOCKED)) { 4327 // XXXdbg 4328 if (VTOHFS(vp)->jnl) { 4329 panic("hfs: CLEARING the lock bit on bp %p\n", bp); 4330 } 4331 buf_clearflags(bp, B_LOCKED); 4332 } 4333 retval = vn_bwrite (ap); 4334 4335 return (retval); 4336} 4337 4338/* 4339 * Relocate a file to a new location on disk 4340 * cnode must be locked on entry 4341 * 4342 * Relocation occurs by cloning the file's data from its 4343 * current set of blocks to a new set of blocks. During 4344 * the relocation all of the blocks (old and new) are 4345 * owned by the file. 4346 * 4347 * ----------------- 4348 * |///////////////| 4349 * ----------------- 4350 * 0 N (file offset) 4351 * 4352 * ----------------- ----------------- 4353 * |///////////////| | | STEP 1 (acquire new blocks) 4354 * ----------------- ----------------- 4355 * 0 N N+1 2N 4356 * 4357 * ----------------- ----------------- 4358 * |///////////////| |///////////////| STEP 2 (clone data) 4359 * ----------------- ----------------- 4360 * 0 N N+1 2N 4361 * 4362 * ----------------- 4363 * |///////////////| STEP 3 (head truncate blocks) 4364 * ----------------- 4365 * 0 N 4366 * 4367 * During steps 2 and 3 page-outs to file offsets less 4368 * than or equal to N are suspended. 4369 * 4370 * During step 3 page-ins to the file get suspended. 4371 */ 4372int 4373hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, 4374 struct proc *p) 4375{ 4376 struct cnode *cp; 4377 struct filefork *fp; 4378 struct hfsmount *hfsmp; 4379 u_int32_t headblks; 4380 u_int32_t datablks; 4381 u_int32_t blksize; 4382 u_int32_t growsize; 4383 u_int32_t nextallocsave; 4384 daddr64_t sector_a, sector_b; 4385 int eflags; 4386 off_t newbytes; 4387 int retval; 4388 int lockflags = 0; 4389 int took_trunc_lock = 0; 4390 int started_tr = 0; 4391 enum vtype vnodetype; 4392 4393 vnodetype = vnode_vtype(vp); 4394 if (vnodetype != VREG && vnodetype != VLNK) { 4395 return (EPERM); 4396 } 4397 4398 hfsmp = VTOHFS(vp); 4399 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) { 4400 return (ENOSPC); 4401 } 4402 4403 cp = VTOC(vp); 4404 fp = VTOF(vp); 4405 if (fp->ff_unallocblocks) 4406 return (EINVAL); 4407 4408#if CONFIG_PROTECT 4409 /* 4410 * <rdar://problem/9118426> 4411 * Disable HFS file relocation on content-protected filesystems 4412 */ 4413 if (cp_fs_protected (hfsmp->hfs_mp)) { 4414 return EINVAL; 4415 } 4416#endif 4417 /* If it's an SSD, also disable HFS relocation */ 4418 if (hfsmp->hfs_flags & HFS_SSD) { 4419 return EINVAL; 4420 } 4421 4422 4423 blksize = hfsmp->blockSize; 4424 if (blockHint == 0) 4425 blockHint = hfsmp->nextAllocation; 4426 4427 if ((fp->ff_size > 0x7fffffff) || 4428 ((fp->ff_size > blksize) && vnodetype == VLNK)) { 4429 return (EFBIG); 4430 } 4431 4432 // 4433 // We do not believe that this call to hfs_fsync() is 4434 // necessary and it causes a journal transaction 4435 // deadlock so we are removing it. 4436 // 4437 //if (vnodetype == VREG && !vnode_issystem(vp)) { 4438 // retval = hfs_fsync(vp, MNT_WAIT, 0, p); 4439 // if (retval) 4440 // return (retval); 4441 //} 4442 4443 if (!vnode_issystem(vp) && (vnodetype != VLNK)) { 4444 hfs_unlock(cp); 4445 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); 4446 /* Force lock since callers expects lock to be held. */ 4447 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) { 4448 hfs_unlock_truncate(cp, 0); 4449 return (retval); 4450 } 4451 /* No need to continue if file was removed. */ 4452 if (cp->c_flag & C_NOEXISTS) { 4453 hfs_unlock_truncate(cp, 0); 4454 return (ENOENT); 4455 } 4456 took_trunc_lock = 1; 4457 } 4458 headblks = fp->ff_blocks; 4459 datablks = howmany(fp->ff_size, blksize); 4460 growsize = datablks * blksize; 4461 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; 4462 if (blockHint >= hfsmp->hfs_metazone_start && 4463 blockHint <= hfsmp->hfs_metazone_end) 4464 eflags |= kEFMetadataMask; 4465 4466 if (hfs_start_transaction(hfsmp) != 0) { 4467 if (took_trunc_lock) 4468 hfs_unlock_truncate(cp, 0); 4469 return (EINVAL); 4470 } 4471 started_tr = 1; 4472 /* 4473 * Protect the extents b-tree and the allocation bitmap 4474 * during MapFileBlockC and ExtendFileC operations. 4475 */ 4476 lockflags = SFL_BITMAP; 4477 if (overflow_extents(fp)) 4478 lockflags |= SFL_EXTENTS; 4479 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4480 4481 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, §or_a, NULL); 4482 if (retval) { 4483 retval = MacToVFSError(retval); 4484 goto out; 4485 } 4486 4487 /* 4488 * STEP 1 - acquire new allocation blocks. 4489 */ 4490 nextallocsave = hfsmp->nextAllocation; 4491 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes); 4492 if (eflags & kEFMetadataMask) { 4493 HFS_MOUNT_LOCK(hfsmp, TRUE); 4494 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave); 4495 MarkVCBDirty(hfsmp); 4496 HFS_MOUNT_UNLOCK(hfsmp, TRUE); 4497 } 4498 4499 retval = MacToVFSError(retval); 4500 if (retval == 0) { 4501 cp->c_flag |= C_MODIFIED; 4502 if (newbytes < growsize) { 4503 retval = ENOSPC; 4504 goto restore; 4505 } else if (fp->ff_blocks < (headblks + datablks)) { 4506 printf("hfs_relocate: allocation failed"); 4507 retval = ENOSPC; 4508 goto restore; 4509 } 4510 4511 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, §or_b, NULL); 4512 if (retval) { 4513 retval = MacToVFSError(retval); 4514 } else if ((sector_a + 1) == sector_b) { 4515 retval = ENOSPC; 4516 goto restore; 4517 } else if ((eflags & kEFMetadataMask) && 4518 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) > 4519 hfsmp->hfs_metazone_end)) { 4520#if 0 4521 const char * filestr; 4522 char emptystr = '\0'; 4523 4524 if (cp->c_desc.cd_nameptr != NULL) { 4525 filestr = (const char *)&cp->c_desc.cd_nameptr[0]; 4526 } else if (vnode_name(vp) != NULL) { 4527 filestr = vnode_name(vp); 4528 } else { 4529 filestr = &emptystr; 4530 } 4531#endif 4532 retval = ENOSPC; 4533 goto restore; 4534 } 4535 } 4536 /* Done with system locks and journal for now. */ 4537 hfs_systemfile_unlock(hfsmp, lockflags); 4538 lockflags = 0; 4539 hfs_end_transaction(hfsmp); 4540 started_tr = 0; 4541 4542 if (retval) { 4543 /* 4544 * Check to see if failure is due to excessive fragmentation. 4545 */ 4546 if ((retval == ENOSPC) && 4547 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) { 4548 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; 4549 } 4550 goto out; 4551 } 4552 /* 4553 * STEP 2 - clone file data into the new allocation blocks. 4554 */ 4555 4556 if (vnodetype == VLNK) 4557 retval = hfs_clonelink(vp, blksize, cred, p); 4558 else if (vnode_issystem(vp)) 4559 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); 4560 else 4561 retval = hfs_clonefile(vp, headblks, datablks, blksize); 4562 4563 /* Start transaction for step 3 or for a restore. */ 4564 if (hfs_start_transaction(hfsmp) != 0) { 4565 retval = EINVAL; 4566 goto out; 4567 } 4568 started_tr = 1; 4569 if (retval) 4570 goto restore; 4571 4572 /* 4573 * STEP 3 - switch to cloned data and remove old blocks. 4574 */ 4575 lockflags = SFL_BITMAP; 4576 if (overflow_extents(fp)) 4577 lockflags |= SFL_EXTENTS; 4578 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4579 4580 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks); 4581 4582 hfs_systemfile_unlock(hfsmp, lockflags); 4583 lockflags = 0; 4584 if (retval) 4585 goto restore; 4586out: 4587 if (took_trunc_lock) 4588 hfs_unlock_truncate(cp, 0); 4589 4590 if (lockflags) { 4591 hfs_systemfile_unlock(hfsmp, lockflags); 4592 lockflags = 0; 4593 } 4594 4595 /* Push cnode's new extent data to disk. */ 4596 if (retval == 0) { 4597 (void) hfs_update(vp, MNT_WAIT); 4598 } 4599 if (hfsmp->jnl) { 4600 if (cp->c_cnid < kHFSFirstUserCatalogNodeID) 4601 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 4602 else 4603 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); 4604 } 4605exit: 4606 if (started_tr) 4607 hfs_end_transaction(hfsmp); 4608 4609 return (retval); 4610 4611restore: 4612 if (fp->ff_blocks == headblks) { 4613 if (took_trunc_lock) 4614 hfs_unlock_truncate(cp, 0); 4615 goto exit; 4616 } 4617 /* 4618 * Give back any newly allocated space. 4619 */ 4620 if (lockflags == 0) { 4621 lockflags = SFL_BITMAP; 4622 if (overflow_extents(fp)) 4623 lockflags |= SFL_EXTENTS; 4624 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4625 } 4626 4627 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 4628 FTOC(fp)->c_fileid, false); 4629 4630 hfs_systemfile_unlock(hfsmp, lockflags); 4631 lockflags = 0; 4632 4633 if (took_trunc_lock) 4634 hfs_unlock_truncate(cp, 0); 4635 goto exit; 4636} 4637 4638 4639/* 4640 * Clone a symlink. 4641 * 4642 */ 4643static int 4644hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p) 4645{ 4646 struct buf *head_bp = NULL; 4647 struct buf *tail_bp = NULL; 4648 int error; 4649 4650 4651 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp); 4652 if (error) 4653 goto out; 4654 4655 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META); 4656 if (tail_bp == NULL) { 4657 error = EIO; 4658 goto out; 4659 } 4660 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize); 4661 error = (int)buf_bwrite(tail_bp); 4662out: 4663 if (head_bp) { 4664 buf_markinvalid(head_bp); 4665 buf_brelse(head_bp); 4666 } 4667 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); 4668 4669 return (error); 4670} 4671 4672/* 4673 * Clone a file's data within the file. 4674 * 4675 */ 4676static int 4677hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) 4678{ 4679 caddr_t bufp; 4680 size_t bufsize; 4681 size_t copysize; 4682 size_t iosize; 4683 size_t offset; 4684 off_t writebase; 4685 uio_t auio; 4686 int error = 0; 4687 4688 writebase = blkstart * blksize; 4689 copysize = blkcnt * blksize; 4690 iosize = bufsize = MIN(copysize, 128 * 1024); 4691 offset = 0; 4692 4693 hfs_unlock(VTOC(vp)); 4694 4695#if CONFIG_PROTECT 4696 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 4697 hfs_lock(VTOC(vp), HFS_FORCE_LOCK); 4698 return (error); 4699 } 4700#endif /* CONFIG_PROTECT */ 4701 4702 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 4703 hfs_lock(VTOC(vp), HFS_FORCE_LOCK); 4704 return (ENOMEM); 4705 } 4706 4707 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); 4708 4709 while (offset < copysize) { 4710 iosize = MIN(copysize - offset, iosize); 4711 4712 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); 4713 uio_addiov(auio, (uintptr_t)bufp, iosize); 4714 4715 error = cluster_read(vp, auio, copysize, IO_NOCACHE); 4716 if (error) { 4717 printf("hfs_clonefile: cluster_read failed - %d\n", error); 4718 break; 4719 } 4720 if (uio_resid(auio) != 0) { 4721 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio)); 4722 error = EIO; 4723 break; 4724 } 4725 4726 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE); 4727 uio_addiov(auio, (uintptr_t)bufp, iosize); 4728 4729 error = cluster_write(vp, auio, writebase + offset, 4730 writebase + offset + iosize, 4731 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); 4732 if (error) { 4733 printf("hfs_clonefile: cluster_write failed - %d\n", error); 4734 break; 4735 } 4736 if (uio_resid(auio) != 0) { 4737 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); 4738 error = EIO; 4739 break; 4740 } 4741 offset += iosize; 4742 } 4743 uio_free(auio); 4744 4745 if ((blksize & PAGE_MASK)) { 4746 /* 4747 * since the copy may not have started on a PAGE 4748 * boundary (or may not have ended on one), we 4749 * may have pages left in the cache since NOCACHE 4750 * will let partially written pages linger... 4751 * lets just flush the entire range to make sure 4752 * we don't have any pages left that are beyond 4753 * (or intersect) the real LEOF of this file 4754 */ 4755 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); 4756 } else { 4757 /* 4758 * No need to call ubc_sync_range or hfs_invalbuf 4759 * since the file was copied using IO_NOCACHE and 4760 * the copy was done starting and ending on a page 4761 * boundary in the file. 4762 */ 4763 } 4764 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 4765 4766 hfs_lock(VTOC(vp), HFS_FORCE_LOCK); 4767 return (error); 4768} 4769 4770/* 4771 * Clone a system (metadata) file. 4772 * 4773 */ 4774static int 4775hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, 4776 kauth_cred_t cred, struct proc *p) 4777{ 4778 caddr_t bufp; 4779 char * offset; 4780 size_t bufsize; 4781 size_t iosize; 4782 struct buf *bp = NULL; 4783 daddr64_t blkno; 4784 daddr64_t blk; 4785 daddr64_t start_blk; 4786 daddr64_t last_blk; 4787 int breadcnt; 4788 int i; 4789 int error = 0; 4790 4791 4792 iosize = GetLogicalBlockSize(vp); 4793 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); 4794 breadcnt = bufsize / iosize; 4795 4796 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 4797 return (ENOMEM); 4798 } 4799 start_blk = ((daddr64_t)blkstart * blksize) / iosize; 4800 last_blk = ((daddr64_t)blkcnt * blksize) / iosize; 4801 blkno = 0; 4802 4803 while (blkno < last_blk) { 4804 /* 4805 * Read up to a megabyte 4806 */ 4807 offset = bufp; 4808 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) { 4809 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp); 4810 if (error) { 4811 printf("hfs_clonesysfile: meta_bread error %d\n", error); 4812 goto out; 4813 } 4814 if (buf_count(bp) != iosize) { 4815 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp)); 4816 goto out; 4817 } 4818 bcopy((char *)buf_dataptr(bp), offset, iosize); 4819 4820 buf_markinvalid(bp); 4821 buf_brelse(bp); 4822 bp = NULL; 4823 4824 offset += iosize; 4825 } 4826 4827 /* 4828 * Write up to a megabyte 4829 */ 4830 offset = bufp; 4831 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) { 4832 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META); 4833 if (bp == NULL) { 4834 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno); 4835 error = EIO; 4836 goto out; 4837 } 4838 bcopy(offset, (char *)buf_dataptr(bp), iosize); 4839 error = (int)buf_bwrite(bp); 4840 bp = NULL; 4841 if (error) 4842 goto out; 4843 offset += iosize; 4844 } 4845 } 4846out: 4847 if (bp) { 4848 buf_brelse(bp); 4849 } 4850 4851 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 4852 4853 error = hfs_fsync(vp, MNT_WAIT, 0, p); 4854 4855 return (error); 4856} 4857