1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* @(#)hfs_readwrite.c 1.0 29 * 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved 31 * 32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files. 33 * 34 */ 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/resourcevar.h> 39#include <sys/kernel.h> 40#include <sys/fcntl.h> 41#include <sys/filedesc.h> 42#include <sys/stat.h> 43#include <sys/buf.h> 44#include <sys/buf_internal.h> 45#include <sys/proc.h> 46#include <sys/kauth.h> 47#include <sys/vnode.h> 48#include <sys/vnode_internal.h> 49#include <sys/uio.h> 50#include <sys/vfs_context.h> 51#include <sys/fsevents.h> 52#include <kern/kalloc.h> 53#include <sys/disk.h> 54#include <sys/sysctl.h> 55#include <sys/fsctl.h> 56#include <sys/mount_internal.h> 57#include <sys/file_internal.h> 58 59#include <miscfs/specfs/specdev.h> 60 61#include <sys/ubc.h> 62#include <sys/ubc_internal.h> 63 64#include <vm/vm_pageout.h> 65#include <vm/vm_kern.h> 66 67#include <sys/kdebug.h> 68 69#include "hfs.h" 70#include "hfs_attrlist.h" 71#include "hfs_endian.h" 72#include "hfs_fsctl.h" 73#include "hfs_quota.h" 74#include "hfscommon/headers/FileMgrInternal.h" 75#include "hfscommon/headers/BTreesInternal.h" 76#include "hfs_cnode.h" 77#include "hfs_dbg.h" 78 79#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2))) 80 81enum { 82 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */ 83}; 84 85/* from bsd/hfs/hfs_vfsops.c */ 86extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); 87 88static int hfs_clonefile(struct vnode *, int, int, int); 89static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); 90static int hfs_minorupdate(struct vnode *vp); 91static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); 92 93/* from bsd/hfs/hfs_vnops.c */ 94extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); 95 96 97 98int flush_cache_on_write = 0; 99SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); 100 101/* 102 * Read data from a file. 103 */ 104int 105hfs_vnop_read(struct vnop_read_args *ap) 106{ 107 /* 108 struct vnop_read_args { 109 struct vnodeop_desc *a_desc; 110 vnode_t a_vp; 111 struct uio *a_uio; 112 int a_ioflag; 113 vfs_context_t a_context; 114 }; 115 */ 116 117 uio_t uio = ap->a_uio; 118 struct vnode *vp = ap->a_vp; 119 struct cnode *cp; 120 struct filefork *fp; 121 struct hfsmount *hfsmp; 122 off_t filesize; 123 off_t filebytes; 124 off_t start_resid = uio_resid(uio); 125 off_t offset = uio_offset(uio); 126 int retval = 0; 127 int took_truncate_lock = 0; 128 int io_throttle = 0; 129 130 /* Preflight checks */ 131 if (!vnode_isreg(vp)) { 132 /* can only read regular files */ 133 if (vnode_isdir(vp)) 134 return (EISDIR); 135 else 136 return (EPERM); 137 } 138 if (start_resid == 0) 139 return (0); /* Nothing left to do */ 140 if (offset < 0) 141 return (EINVAL); /* cant read from a negative offset */ 142 143 144 145#if HFS_COMPRESSION 146 if (VNODE_IS_RSRC(vp)) { 147 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ 148 return 0; 149 } 150 /* otherwise read the resource fork normally */ 151 } else { 152 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 153 if (compressed) { 154 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); 155 if (compressed) { 156 if (retval == 0) { 157 /* successful read, update the access time */ 158 VTOC(vp)->c_touch_acctime = TRUE; 159 160 /* compressed files are not hot file candidates */ 161 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 162 VTOF(vp)->ff_bytesread = 0; 163 } 164 } 165 return retval; 166 } 167 /* otherwise the file was converted back to a regular file while we were reading it */ 168 retval = 0; 169 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 170 int error; 171 172 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); 173 if (error) { 174 return error; 175 } 176 177 } 178 } 179#endif /* HFS_COMPRESSION */ 180 181 cp = VTOC(vp); 182 fp = VTOF(vp); 183 hfsmp = VTOHFS(vp); 184 185#if CONFIG_PROTECT 186 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) { 187 goto exit; 188 } 189#endif 190 191 /* 192 * If this read request originated from a syscall (as opposed to 193 * an in-kernel page fault or something), then set it up for 194 * throttle checks 195 */ 196 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) { 197 io_throttle = IO_RETURN_ON_THROTTLE; 198 } 199 200read_again: 201 202 /* Protect against a size change. */ 203 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); 204 took_truncate_lock = 1; 205 206 filesize = fp->ff_size; 207 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 208 if (offset > filesize) { 209 if ((hfsmp->hfs_flags & HFS_STANDARD) && 210 (offset > (off_t)MAXHFSFILESIZE)) { 211 retval = EFBIG; 212 } 213 goto exit; 214 } 215 216 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, 217 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 218 219 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle); 220 221 cp->c_touch_acctime = TRUE; 222 223 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, 224 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); 225 226 /* 227 * Keep track blocks read 228 */ 229 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) { 230 int took_cnode_lock = 0; 231 off_t bytesread; 232 233 bytesread = start_resid - uio_resid(uio); 234 235 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 236 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { 237 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 238 took_cnode_lock = 1; 239 } 240 /* 241 * If this file hasn't been seen since the start of 242 * the current sampling period then start over. 243 */ 244 if (cp->c_atime < hfsmp->hfc_timebase) { 245 struct timeval tv; 246 247 fp->ff_bytesread = bytesread; 248 microtime(&tv); 249 cp->c_atime = tv.tv_sec; 250 } else { 251 fp->ff_bytesread += bytesread; 252 } 253 if (took_cnode_lock) 254 hfs_unlock(cp); 255 } 256exit: 257 if (took_truncate_lock) { 258 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 259 } 260 if (retval == EAGAIN) { 261 throttle_lowpri_io(1); 262 263 retval = 0; 264 goto read_again; 265 } 266 return (retval); 267} 268 269/* 270 * Write data to a file. 271 */ 272int 273hfs_vnop_write(struct vnop_write_args *ap) 274{ 275 uio_t uio = ap->a_uio; 276 struct vnode *vp = ap->a_vp; 277 struct cnode *cp; 278 struct filefork *fp; 279 struct hfsmount *hfsmp; 280 kauth_cred_t cred = NULL; 281 off_t origFileSize; 282 off_t writelimit; 283 off_t bytesToAdd = 0; 284 off_t actualBytesAdded; 285 off_t filebytes; 286 off_t offset; 287 ssize_t resid; 288 int eflags; 289 int ioflag = ap->a_ioflag; 290 int retval = 0; 291 int lockflags; 292 int cnode_locked = 0; 293 int partialwrite = 0; 294 int do_snapshot = 1; 295 time_t orig_ctime=VTOC(vp)->c_ctime; 296 int took_truncate_lock = 0; 297 int io_return_on_throttle = 0; 298 struct rl_entry *invalid_range; 299 300#if HFS_COMPRESSION 301 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 302 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 303 switch(state) { 304 case FILE_IS_COMPRESSED: 305 return EACCES; 306 case FILE_IS_CONVERTING: 307 /* if FILE_IS_CONVERTING, we allow writes but do not 308 bother with snapshots or else we will deadlock. 309 */ 310 do_snapshot = 0; 311 break; 312 default: 313 printf("invalid state %d for compressed file\n", state); 314 /* fall through */ 315 } 316 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { 317 int error; 318 319 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP); 320 if (error != 0) { 321 return error; 322 } 323 } 324 325 if (do_snapshot) { 326 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio); 327 } 328 329#endif 330 331 resid = uio_resid(uio); 332 offset = uio_offset(uio); 333 334 if (offset < 0) 335 return (EINVAL); 336 if (resid == 0) 337 return (E_NONE); 338 if (!vnode_isreg(vp)) 339 return (EPERM); /* Can only write regular files */ 340 341 cp = VTOC(vp); 342 fp = VTOF(vp); 343 hfsmp = VTOHFS(vp); 344 345#if CONFIG_PROTECT 346 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) { 347 goto exit; 348 } 349#endif 350 351 eflags = kEFDeferMask; /* defer file block allocations */ 352#if HFS_SPARSE_DEV 353 /* 354 * When the underlying device is sparse and space 355 * is low (< 8MB), stop doing delayed allocations 356 * and begin doing synchronous I/O. 357 */ 358 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 359 (hfs_freeblks(hfsmp, 0) < 2048)) { 360 eflags &= ~kEFDeferMask; 361 ioflag |= IO_SYNC; 362 } 363#endif /* HFS_SPARSE_DEV */ 364 365 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) == 366 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) { 367 io_return_on_throttle = IO_RETURN_ON_THROTTLE; 368 } 369 370again: 371 /* Protect against a size change. */ 372 /* 373 * Protect against a size change. 374 * 375 * Note: If took_truncate_lock is true, then we previously got the lock shared 376 * but needed to upgrade to exclusive. So try getting it exclusive from the 377 * start. 378 */ 379 if (ioflag & IO_APPEND || took_truncate_lock) { 380 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 381 } 382 else { 383 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); 384 } 385 took_truncate_lock = 1; 386 387 /* Update UIO */ 388 if (ioflag & IO_APPEND) { 389 uio_setoffset(uio, fp->ff_size); 390 offset = fp->ff_size; 391 } 392 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) { 393 retval = EPERM; 394 goto exit; 395 } 396 397 origFileSize = fp->ff_size; 398 writelimit = offset + resid; 399 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 400 401 /* 402 * We may need an exclusive truncate lock for several reasons, all 403 * of which are because we may be writing to a (portion of a) block 404 * for the first time, and we need to make sure no readers see the 405 * prior, uninitialized contents of the block. The cases are: 406 * 407 * 1. We have unallocated (delayed allocation) blocks. We may be 408 * allocating new blocks to the file and writing to them. 409 * (A more precise check would be whether the range we're writing 410 * to contains delayed allocation blocks.) 411 * 2. We need to extend the file. The bytes between the old EOF 412 * and the new EOF are not yet initialized. This is important 413 * even if we're not allocating new blocks to the file. If the 414 * old EOF and new EOF are in the same block, we still need to 415 * protect that range of bytes until they are written for the 416 * first time. 417 * 3. The write overlaps some invalid ranges (delayed zero fill; that 418 * part of the file has been allocated, but not yet written). 419 * 420 * If we had a shared lock with the above cases, we need to try to upgrade 421 * to an exclusive lock. If the upgrade fails, we will lose the shared 422 * lock, and will need to take the truncate lock again; the took_truncate_lock 423 * flag will still be set, causing us to try for an exclusive lock next time. 424 * 425 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode 426 * lock is held, since it protects the range lists. 427 */ 428 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 429 ((fp->ff_unallocblocks != 0) || 430 (writelimit > origFileSize))) { 431 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { 432 /* 433 * Lock upgrade failed and we lost our shared lock, try again. 434 * Note: we do not set took_truncate_lock=0 here. Leaving it 435 * set to 1 will cause us to try to get the lock exclusive. 436 */ 437 goto again; 438 } 439 else { 440 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */ 441 cp->c_truncatelockowner = current_thread(); 442 } 443 } 444 445 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 446 goto exit; 447 } 448 cnode_locked = 1; 449 450 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { 451 hfs_incr_gencount (cp); 452 } 453 454 /* 455 * Now that we have the cnode lock, see if there are delayed zero fill ranges 456 * overlapping our write. If so, we need the truncate lock exclusive (see above). 457 */ 458 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && 459 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) { 460 /* 461 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes 462 * a deadlock, rather than simply returning failure. (That is, it apparently does 463 * not behave like a "try_lock"). Since this condition is rare, just drop the 464 * cnode lock and try again. Since took_truncate_lock is set, we will 465 * automatically take the truncate lock exclusive. 466 */ 467 hfs_unlock(cp); 468 cnode_locked = 0; 469 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 470 goto again; 471 } 472 473 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, 474 (int)offset, uio_resid(uio), (int)fp->ff_size, 475 (int)filebytes, 0); 476 477 /* Check if we do not need to extend the file */ 478 if (writelimit <= filebytes) { 479 goto sizeok; 480 } 481 482 cred = vfs_context_ucred(ap->a_context); 483 bytesToAdd = writelimit - filebytes; 484 485#if QUOTA 486 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 487 cred, 0); 488 if (retval) 489 goto exit; 490#endif /* QUOTA */ 491 492 if (hfs_start_transaction(hfsmp) != 0) { 493 retval = EINVAL; 494 goto exit; 495 } 496 497 while (writelimit > filebytes) { 498 bytesToAdd = writelimit - filebytes; 499 if (cred && suser(cred, NULL) != 0) 500 eflags |= kEFReserveMask; 501 502 /* Protect extents b-tree and allocation bitmap */ 503 lockflags = SFL_BITMAP; 504 if (overflow_extents(fp)) 505 lockflags |= SFL_EXTENTS; 506 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 507 508 /* Files that are changing size are not hot file candidates. */ 509 if (hfsmp->hfc_stage == HFC_RECORDING) { 510 fp->ff_bytesread = 0; 511 } 512 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd, 513 0, eflags, &actualBytesAdded)); 514 515 hfs_systemfile_unlock(hfsmp, lockflags); 516 517 if ((actualBytesAdded == 0) && (retval == E_NONE)) 518 retval = ENOSPC; 519 if (retval != E_NONE) 520 break; 521 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 522 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE, 523 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 524 } 525 (void) hfs_update(vp, TRUE); 526 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 527 (void) hfs_end_transaction(hfsmp); 528 529 /* 530 * If we didn't grow the file enough try a partial write. 531 * POSIX expects this behavior. 532 */ 533 if ((retval == ENOSPC) && (filebytes > offset)) { 534 retval = 0; 535 partialwrite = 1; 536 uio_setresid(uio, (uio_resid(uio) - bytesToAdd)); 537 resid -= bytesToAdd; 538 writelimit = filebytes; 539 } 540sizeok: 541 if (retval == E_NONE) { 542 off_t filesize; 543 off_t zero_off; 544 off_t tail_off; 545 off_t inval_start; 546 off_t inval_end; 547 off_t io_start; 548 int lflag; 549 550 if (writelimit > fp->ff_size) 551 filesize = writelimit; 552 else 553 filesize = fp->ff_size; 554 555 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY); 556 557 if (offset <= fp->ff_size) { 558 zero_off = offset & ~PAGE_MASK_64; 559 560 /* Check to see whether the area between the zero_offset and the start 561 of the transfer to see whether is invalid and should be zero-filled 562 as part of the transfer: 563 */ 564 if (offset > zero_off) { 565 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP) 566 lflag |= IO_HEADZEROFILL; 567 } 568 } else { 569 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; 570 571 /* The bytes between fp->ff_size and uio->uio_offset must never be 572 read without being zeroed. The current last block is filled with zeroes 573 if it holds valid data but in all cases merely do a little bookkeeping 574 to track the area from the end of the current last page to the start of 575 the area actually written. For the same reason only the bytes up to the 576 start of the page where this write will start is invalidated; any remainder 577 before uio->uio_offset is explicitly zeroed as part of the cluster_write. 578 579 Note that inval_start, the start of the page after the current EOF, 580 may be past the start of the write, in which case the zeroing 581 will be handled by the cluser_write of the actual data. 582 */ 583 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 584 inval_end = offset & ~PAGE_MASK_64; 585 zero_off = fp->ff_size; 586 587 if ((fp->ff_size & PAGE_MASK_64) && 588 (rl_scan(&fp->ff_invalidranges, 589 eof_page_base, 590 fp->ff_size - 1, 591 &invalid_range) != RL_NOOVERLAP)) { 592 /* The page containing the EOF is not valid, so the 593 entire page must be made inaccessible now. If the write 594 starts on a page beyond the page containing the eof 595 (inval_end > eof_page_base), add the 596 whole page to the range to be invalidated. Otherwise 597 (i.e. if the write starts on the same page), zero-fill 598 the entire page explicitly now: 599 */ 600 if (inval_end > eof_page_base) { 601 inval_start = eof_page_base; 602 } else { 603 zero_off = eof_page_base; 604 }; 605 }; 606 607 if (inval_start < inval_end) { 608 struct timeval tv; 609 /* There's some range of data that's going to be marked invalid */ 610 611 if (zero_off < inval_start) { 612 /* The pages between inval_start and inval_end are going to be invalidated, 613 and the actual write will start on a page past inval_end. Now's the last 614 chance to zero-fill the page containing the EOF: 615 */ 616 hfs_unlock(cp); 617 cnode_locked = 0; 618 retval = cluster_write(vp, (uio_t) 0, 619 fp->ff_size, inval_start, 620 zero_off, (off_t)0, 621 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); 622 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 623 cnode_locked = 1; 624 if (retval) goto ioerr_exit; 625 offset = uio_offset(uio); 626 }; 627 628 /* Mark the remaining area of the newly allocated space as invalid: */ 629 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges); 630 microuptime(&tv); 631 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 632 zero_off = fp->ff_size = inval_end; 633 }; 634 635 if (offset > zero_off) lflag |= IO_HEADZEROFILL; 636 }; 637 638 /* Check to see whether the area between the end of the write and the end of 639 the page it falls in is invalid and should be zero-filled as part of the transfer: 640 */ 641 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 642 if (tail_off > filesize) tail_off = filesize; 643 if (tail_off > writelimit) { 644 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) { 645 lflag |= IO_TAILZEROFILL; 646 }; 647 }; 648 649 /* 650 * if the write starts beyond the current EOF (possibly advanced in the 651 * zeroing of the last block, above), then we'll zero fill from the current EOF 652 * to where the write begins: 653 * 654 * NOTE: If (and ONLY if) the portion of the file about to be written is 655 * before the current EOF it might be marked as invalid now and must be 656 * made readable (removed from the invalid ranges) before cluster_write 657 * tries to write it: 658 */ 659 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset; 660 if (io_start < fp->ff_size) { 661 off_t io_end; 662 663 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; 664 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges); 665 }; 666 667 hfs_unlock(cp); 668 cnode_locked = 0; 669 670 /* 671 * We need to tell UBC the fork's new size BEFORE calling 672 * cluster_write, in case any of the new pages need to be 673 * paged out before cluster_write completes (which does happen 674 * in embedded systems due to extreme memory pressure). 675 * Similarly, we need to tell hfs_vnop_pageout what the new EOF 676 * will be, so that it can pass that on to cluster_pageout, and 677 * allow those pageouts. 678 * 679 * We don't update ff_size yet since we don't want pageins to 680 * be able to see uninitialized data between the old and new 681 * EOF, until cluster_write has completed and initialized that 682 * part of the file. 683 * 684 * The vnode pager relies on the file size last given to UBC via 685 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or 686 * ff_size (whichever is larger). NOTE: ff_new_size is always 687 * zero, unless we are extending the file via write. 688 */ 689 if (filesize > fp->ff_size) { 690 fp->ff_new_size = filesize; 691 ubc_setsize(vp, filesize); 692 } 693 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, 694 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle); 695 if (retval) { 696 fp->ff_new_size = 0; /* no longer extending; use ff_size */ 697 698 if (retval == EAGAIN) { 699 /* 700 * EAGAIN indicates that we still have I/O to do, but 701 * that we now need to be throttled 702 */ 703 if (resid != uio_resid(uio)) { 704 /* 705 * did manage to do some I/O before returning EAGAIN 706 */ 707 resid = uio_resid(uio); 708 offset = uio_offset(uio); 709 710 cp->c_touch_chgtime = TRUE; 711 cp->c_touch_modtime = TRUE; 712 } 713 if (filesize > fp->ff_size) { 714 /* 715 * we called ubc_setsize before the call to 716 * cluster_write... since we only partially 717 * completed the I/O, we need to 718 * re-adjust our idea of the filesize based 719 * on our interim EOF 720 */ 721 ubc_setsize(vp, offset); 722 723 fp->ff_size = offset; 724 } 725 goto exit; 726 } 727 if (filesize > origFileSize) { 728 ubc_setsize(vp, origFileSize); 729 } 730 goto ioerr_exit; 731 } 732 733 if (filesize > origFileSize) { 734 fp->ff_size = filesize; 735 736 /* Files that are changing size are not hot file candidates. */ 737 if (hfsmp->hfc_stage == HFC_RECORDING) { 738 fp->ff_bytesread = 0; 739 } 740 } 741 fp->ff_new_size = 0; /* ff_size now has the correct size */ 742 743 /* If we wrote some bytes, then touch the change and mod times */ 744 if (resid > uio_resid(uio)) { 745 cp->c_touch_chgtime = TRUE; 746 cp->c_touch_modtime = TRUE; 747 } 748 } 749 if (partialwrite) { 750 uio_setresid(uio, (uio_resid(uio) + bytesToAdd)); 751 resid += bytesToAdd; 752 } 753 754 // XXXdbg - see radar 4871353 for more info 755 { 756 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) { 757 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); 758 } 759 } 760 761ioerr_exit: 762 /* 763 * If we successfully wrote any data, and we are not the superuser 764 * we clear the setuid and setgid bits as a precaution against 765 * tampering. 766 */ 767 if (cp->c_mode & (S_ISUID | S_ISGID)) { 768 cred = vfs_context_ucred(ap->a_context); 769 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { 770 if (!cnode_locked) { 771 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 772 cnode_locked = 1; 773 } 774 cp->c_mode &= ~(S_ISUID | S_ISGID); 775 } 776 } 777 if (retval) { 778 if (ioflag & IO_UNIT) { 779 if (!cnode_locked) { 780 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 781 cnode_locked = 1; 782 } 783 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, 784 0, 0, ap->a_context); 785 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); 786 uio_setresid(uio, resid); 787 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; 788 } 789 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { 790 if (!cnode_locked) { 791 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 792 cnode_locked = 1; 793 } 794 retval = hfs_update(vp, TRUE); 795 } 796 /* Updating vcbWrCnt doesn't need to be atomic. */ 797 hfsmp->vcbWrCnt++; 798 799 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, 800 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); 801exit: 802 if (cnode_locked) 803 hfs_unlock(cp); 804 805 if (took_truncate_lock) { 806 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 807 } 808 if (retval == EAGAIN) { 809 throttle_lowpri_io(1); 810 811 retval = 0; 812 goto again; 813 } 814 return (retval); 815} 816 817/* support for the "bulk-access" fcntl */ 818 819#define CACHE_LEVELS 16 820#define NUM_CACHE_ENTRIES (64*16) 821#define PARENT_IDS_FLAG 0x100 822 823struct access_cache { 824 int numcached; 825 int cachehits; /* these two for statistics gathering */ 826 int lookups; 827 unsigned int *acache; 828 unsigned char *haveaccess; 829}; 830 831struct access_t { 832 uid_t uid; /* IN: effective user id */ 833 short flags; /* IN: access requested (i.e. R_OK) */ 834 short num_groups; /* IN: number of groups user belongs to */ 835 int num_files; /* IN: number of files to process */ 836 int *file_ids; /* IN: array of file ids */ 837 gid_t *groups; /* IN: array of groups */ 838 short *access; /* OUT: access info for each file (0 for 'has access') */ 839} __attribute__((unavailable)); // this structure is for reference purposes only 840 841struct user32_access_t { 842 uid_t uid; /* IN: effective user id */ 843 short flags; /* IN: access requested (i.e. R_OK) */ 844 short num_groups; /* IN: number of groups user belongs to */ 845 int num_files; /* IN: number of files to process */ 846 user32_addr_t file_ids; /* IN: array of file ids */ 847 user32_addr_t groups; /* IN: array of groups */ 848 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 849}; 850 851struct user64_access_t { 852 uid_t uid; /* IN: effective user id */ 853 short flags; /* IN: access requested (i.e. R_OK) */ 854 short num_groups; /* IN: number of groups user belongs to */ 855 int num_files; /* IN: number of files to process */ 856 user64_addr_t file_ids; /* IN: array of file ids */ 857 user64_addr_t groups; /* IN: array of groups */ 858 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 859}; 860 861 862// these are the "extended" versions of the above structures 863// note that it is crucial that they be different sized than 864// the regular version 865struct ext_access_t { 866 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 867 uint32_t num_files; /* IN: number of files to process */ 868 uint32_t map_size; /* IN: size of the bit map */ 869 uint32_t *file_ids; /* IN: Array of file ids */ 870 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */ 871 short *access; /* OUT: access info for each file (0 for 'has access') */ 872 uint32_t num_parents; /* future use */ 873 cnid_t *parents; /* future use */ 874} __attribute__((unavailable)); // this structure is for reference purposes only 875 876struct user32_ext_access_t { 877 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 878 uint32_t num_files; /* IN: number of files to process */ 879 uint32_t map_size; /* IN: size of the bit map */ 880 user32_addr_t file_ids; /* IN: Array of file ids */ 881 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */ 882 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 883 uint32_t num_parents; /* future use */ 884 user32_addr_t parents; /* future use */ 885}; 886 887struct user64_ext_access_t { 888 uint32_t flags; /* IN: access requested (i.e. R_OK) */ 889 uint32_t num_files; /* IN: number of files to process */ 890 uint32_t map_size; /* IN: size of the bit map */ 891 user64_addr_t file_ids; /* IN: array of file ids */ 892 user64_addr_t bitmap; /* IN: array of groups */ 893 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ 894 uint32_t num_parents;/* future use */ 895 user64_addr_t parents;/* future use */ 896}; 897 898 899/* 900 * Perform a binary search for the given parent_id. Return value is 901 * the index if there is a match. If no_match_indexp is non-NULL it 902 * will be assigned with the index to insert the item (even if it was 903 * not found). 904 */ 905static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp) 906{ 907 int index=-1; 908 unsigned int lo=0; 909 910 do { 911 unsigned int mid = ((hi - lo)/2) + lo; 912 unsigned int this_id = array[mid]; 913 914 if (parent_id == this_id) { 915 hi = mid; 916 break; 917 } 918 919 if (parent_id < this_id) { 920 hi = mid; 921 continue; 922 } 923 924 if (parent_id > this_id) { 925 lo = mid + 1; 926 continue; 927 } 928 } while(lo < hi); 929 930 /* check if lo and hi converged on the match */ 931 if (parent_id == array[hi]) { 932 index = hi; 933 } 934 935 if (no_match_indexp) { 936 *no_match_indexp = hi; 937 } 938 939 return index; 940} 941 942 943static int 944lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) 945{ 946 unsigned int hi; 947 int matches = 0; 948 int index, no_match_index; 949 950 if (cache->numcached == 0) { 951 *indexp = 0; 952 return 0; // table is empty, so insert at index=0 and report no match 953 } 954 955 if (cache->numcached > NUM_CACHE_ENTRIES) { 956 cache->numcached = NUM_CACHE_ENTRIES; 957 } 958 959 hi = cache->numcached - 1; 960 961 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index); 962 963 /* if no existing entry found, find index for new one */ 964 if (index == -1) { 965 index = no_match_index; 966 matches = 0; 967 } else { 968 matches = 1; 969 } 970 971 *indexp = index; 972 return matches; 973} 974 975/* 976 * Add a node to the access_cache at the given index (or do a lookup first 977 * to find the index if -1 is passed in). We currently do a replace rather 978 * than an insert if the cache is full. 979 */ 980static void 981add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) 982{ 983 int lookup_index = -1; 984 985 /* need to do a lookup first if -1 passed for index */ 986 if (index == -1) { 987 if (lookup_bucket(cache, &lookup_index, nodeID)) { 988 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) { 989 // only update an entry if the previous access was ESRCH (i.e. a scope checking error) 990 cache->haveaccess[lookup_index] = access; 991 } 992 993 /* mission accomplished */ 994 return; 995 } else { 996 index = lookup_index; 997 } 998 999 } 1000 1001 /* if the cache is full, do a replace rather than an insert */ 1002 if (cache->numcached >= NUM_CACHE_ENTRIES) { 1003 cache->numcached = NUM_CACHE_ENTRIES-1; 1004 1005 if (index > cache->numcached) { 1006 index = cache->numcached; 1007 } 1008 } 1009 1010 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) { 1011 index++; 1012 } 1013 1014 if (index >= 0 && index < cache->numcached) { 1015 /* only do bcopy if we're inserting */ 1016 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) ); 1017 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) ); 1018 } 1019 1020 cache->acache[index] = nodeID; 1021 cache->haveaccess[index] = access; 1022 cache->numcached++; 1023} 1024 1025 1026struct cinfo { 1027 uid_t uid; 1028 gid_t gid; 1029 mode_t mode; 1030 cnid_t parentcnid; 1031 u_int16_t recflags; 1032}; 1033 1034static int 1035snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg) 1036{ 1037 struct cinfo *cip = (struct cinfo *)arg; 1038 1039 cip->uid = attrp->ca_uid; 1040 cip->gid = attrp->ca_gid; 1041 cip->mode = attrp->ca_mode; 1042 cip->parentcnid = descp->cd_parentcnid; 1043 cip->recflags = attrp->ca_recflags; 1044 1045 return (0); 1046} 1047 1048/* 1049 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item 1050 * isn't incore, then go to the catalog. 1051 */ 1052static int 1053do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 1054 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp) 1055{ 1056 int error = 0; 1057 1058 /* if this id matches the one the fsctl was called with, skip the lookup */ 1059 if (cnid == skip_cp->c_cnid) { 1060 cnattrp->ca_uid = skip_cp->c_uid; 1061 cnattrp->ca_gid = skip_cp->c_gid; 1062 cnattrp->ca_mode = skip_cp->c_mode; 1063 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; 1064 keyp->hfsPlus.parentID = skip_cp->c_parentcnid; 1065 } else { 1066 struct cinfo c_info; 1067 1068 /* otherwise, check the cnode hash incase the file/dir is incore */ 1069 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) { 1070 cnattrp->ca_uid = c_info.uid; 1071 cnattrp->ca_gid = c_info.gid; 1072 cnattrp->ca_mode = c_info.mode; 1073 cnattrp->ca_recflags = c_info.recflags; 1074 keyp->hfsPlus.parentID = c_info.parentcnid; 1075 } else { 1076 int lockflags; 1077 1078 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) 1079 throttle_lowpri_io(1); 1080 1081 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); 1082 1083 /* lookup this cnid in the catalog */ 1084 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); 1085 1086 hfs_systemfile_unlock(hfsmp, lockflags); 1087 1088 cache->lookups++; 1089 } 1090 } 1091 1092 return (error); 1093} 1094 1095 1096/* 1097 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache 1098 * up to CACHE_LEVELS as we progress towards the root. 1099 */ 1100static int 1101do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 1102 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, 1103 struct vfs_context *my_context, 1104 char *bitmap, 1105 uint32_t map_size, 1106 cnid_t* parents, 1107 uint32_t num_parents) 1108{ 1109 int myErr = 0; 1110 int myResult; 1111 HFSCatalogNodeID thisNodeID; 1112 unsigned int myPerms; 1113 struct cat_attr cnattr; 1114 int cache_index = -1, scope_index = -1, scope_idx_start = -1; 1115 CatalogKey catkey; 1116 1117 int i = 0, ids_to_cache = 0; 1118 int parent_ids[CACHE_LEVELS]; 1119 1120 thisNodeID = nodeID; 1121 while (thisNodeID >= kRootDirID) { 1122 myResult = 0; /* default to "no access" */ 1123 1124 /* check the cache before resorting to hitting the catalog */ 1125 1126 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need 1127 * to look any further after hitting cached dir */ 1128 1129 if (lookup_bucket(cache, &cache_index, thisNodeID)) { 1130 cache->cachehits++; 1131 myErr = cache->haveaccess[cache_index]; 1132 if (scope_index != -1) { 1133 if (myErr == ESRCH) { 1134 myErr = 0; 1135 } 1136 } else { 1137 scope_index = 0; // so we'll just use the cache result 1138 scope_idx_start = ids_to_cache; 1139 } 1140 myResult = (myErr == 0) ? 1 : 0; 1141 goto ExitThisRoutine; 1142 } 1143 1144 1145 if (parents) { 1146 int tmp; 1147 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL); 1148 if (scope_index == -1) 1149 scope_index = tmp; 1150 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) { 1151 scope_idx_start = ids_to_cache; 1152 } 1153 } 1154 1155 /* remember which parents we want to cache */ 1156 if (ids_to_cache < CACHE_LEVELS) { 1157 parent_ids[ids_to_cache] = thisNodeID; 1158 ids_to_cache++; 1159 } 1160 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"... 1161 if (bitmap && map_size) { 1162 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7)); 1163 } 1164 1165 1166 /* do the lookup (checks the cnode hash, then the catalog) */ 1167 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr); 1168 if (myErr) { 1169 goto ExitThisRoutine; /* no access */ 1170 } 1171 1172 /* Root always gets access. */ 1173 if (suser(myp_ucred, NULL) == 0) { 1174 thisNodeID = catkey.hfsPlus.parentID; 1175 myResult = 1; 1176 continue; 1177 } 1178 1179 // if the thing has acl's, do the full permission check 1180 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1181 struct vnode *vp; 1182 1183 /* get the vnode for this cnid */ 1184 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0); 1185 if ( myErr ) { 1186 myResult = 0; 1187 goto ExitThisRoutine; 1188 } 1189 1190 thisNodeID = VTOC(vp)->c_parentcnid; 1191 1192 hfs_unlock(VTOC(vp)); 1193 1194 if (vnode_vtype(vp) == VDIR) { 1195 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context); 1196 } else { 1197 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context); 1198 } 1199 1200 vnode_put(vp); 1201 if (myErr) { 1202 myResult = 0; 1203 goto ExitThisRoutine; 1204 } 1205 } else { 1206 unsigned int flags; 1207 int mode = cnattr.ca_mode & S_IFMT; 1208 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr); 1209 1210 if (mode == S_IFDIR) { 1211 flags = R_OK | X_OK; 1212 } else { 1213 flags = R_OK; 1214 } 1215 if ( (myPerms & flags) != flags) { 1216 myResult = 0; 1217 myErr = EACCES; 1218 goto ExitThisRoutine; /* no access */ 1219 } 1220 1221 /* up the hierarchy we go */ 1222 thisNodeID = catkey.hfsPlus.parentID; 1223 } 1224 } 1225 1226 /* if here, we have access to this node */ 1227 myResult = 1; 1228 1229 ExitThisRoutine: 1230 if (parents && myErr == 0 && scope_index == -1) { 1231 myErr = ESRCH; 1232 } 1233 1234 if (myErr) { 1235 myResult = 0; 1236 } 1237 *err = myErr; 1238 1239 /* cache the parent directory(ies) */ 1240 for (i = 0; i < ids_to_cache; i++) { 1241 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) { 1242 add_node(cache, -1, parent_ids[i], ESRCH); 1243 } else { 1244 add_node(cache, -1, parent_ids[i], myErr); 1245 } 1246 } 1247 1248 return (myResult); 1249} 1250 1251static int 1252do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, 1253 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context) 1254{ 1255 boolean_t is64bit; 1256 1257 /* 1258 * NOTE: on entry, the vnode has an io_ref. In case this vnode 1259 * happens to be in our list of file_ids, we'll note it 1260 * avoid calling hfs_chashget_nowait() on that id as that 1261 * will cause a "locking against myself" panic. 1262 */ 1263 Boolean check_leaf = true; 1264 1265 struct user64_ext_access_t *user_access_structp; 1266 struct user64_ext_access_t tmp_user_access; 1267 struct access_cache cache; 1268 1269 int error = 0, prev_parent_check_ok=1; 1270 unsigned int i; 1271 1272 short flags; 1273 unsigned int num_files = 0; 1274 int map_size = 0; 1275 int num_parents = 0; 1276 int *file_ids=NULL; 1277 short *access=NULL; 1278 char *bitmap=NULL; 1279 cnid_t *parents=NULL; 1280 int leaf_index; 1281 1282 cnid_t cnid; 1283 cnid_t prevParent_cnid = 0; 1284 unsigned int myPerms; 1285 short myaccess = 0; 1286 struct cat_attr cnattr; 1287 CatalogKey catkey; 1288 struct cnode *skip_cp = VTOC(vp); 1289 kauth_cred_t cred = vfs_context_ucred(context); 1290 proc_t p = vfs_context_proc(context); 1291 1292 is64bit = proc_is64bit(p); 1293 1294 /* initialize the local cache and buffers */ 1295 cache.numcached = 0; 1296 cache.cachehits = 0; 1297 cache.lookups = 0; 1298 cache.acache = NULL; 1299 cache.haveaccess = NULL; 1300 1301 /* struct copyin done during dispatch... need to copy file_id array separately */ 1302 if (ap->a_data == NULL) { 1303 error = EINVAL; 1304 goto err_exit_bulk_access; 1305 } 1306 1307 if (is64bit) { 1308 if (arg_size != sizeof(struct user64_ext_access_t)) { 1309 error = EINVAL; 1310 goto err_exit_bulk_access; 1311 } 1312 1313 user_access_structp = (struct user64_ext_access_t *)ap->a_data; 1314 1315 } else if (arg_size == sizeof(struct user32_access_t)) { 1316 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data; 1317 1318 // convert an old style bulk-access struct to the new style 1319 tmp_user_access.flags = accessp->flags; 1320 tmp_user_access.num_files = accessp->num_files; 1321 tmp_user_access.map_size = 0; 1322 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1323 tmp_user_access.bitmap = USER_ADDR_NULL; 1324 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1325 tmp_user_access.num_parents = 0; 1326 user_access_structp = &tmp_user_access; 1327 1328 } else if (arg_size == sizeof(struct user32_ext_access_t)) { 1329 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data; 1330 1331 // up-cast from a 32-bit version of the struct 1332 tmp_user_access.flags = accessp->flags; 1333 tmp_user_access.num_files = accessp->num_files; 1334 tmp_user_access.map_size = accessp->map_size; 1335 tmp_user_access.num_parents = accessp->num_parents; 1336 1337 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); 1338 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap); 1339 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); 1340 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents); 1341 1342 user_access_structp = &tmp_user_access; 1343 } else { 1344 error = EINVAL; 1345 goto err_exit_bulk_access; 1346 } 1347 1348 map_size = user_access_structp->map_size; 1349 1350 num_files = user_access_structp->num_files; 1351 1352 num_parents= user_access_structp->num_parents; 1353 1354 if (num_files < 1) { 1355 goto err_exit_bulk_access; 1356 } 1357 if (num_files > 1024) { 1358 error = EINVAL; 1359 goto err_exit_bulk_access; 1360 } 1361 1362 if (num_parents > 1024) { 1363 error = EINVAL; 1364 goto err_exit_bulk_access; 1365 } 1366 1367 file_ids = (int *) kalloc(sizeof(int) * num_files); 1368 access = (short *) kalloc(sizeof(short) * num_files); 1369 if (map_size) { 1370 bitmap = (char *) kalloc(sizeof(char) * map_size); 1371 } 1372 1373 if (num_parents) { 1374 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents); 1375 } 1376 1377 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES); 1378 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1379 1380 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) { 1381 if (file_ids) { 1382 kfree(file_ids, sizeof(int) * num_files); 1383 } 1384 if (bitmap) { 1385 kfree(bitmap, sizeof(char) * map_size); 1386 } 1387 if (access) { 1388 kfree(access, sizeof(short) * num_files); 1389 } 1390 if (cache.acache) { 1391 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1392 } 1393 if (cache.haveaccess) { 1394 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1395 } 1396 if (parents) { 1397 kfree(parents, sizeof(cnid_t) * num_parents); 1398 } 1399 return ENOMEM; 1400 } 1401 1402 // make sure the bitmap is zero'ed out... 1403 if (bitmap) { 1404 bzero(bitmap, (sizeof(char) * map_size)); 1405 } 1406 1407 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids, 1408 num_files * sizeof(int)))) { 1409 goto err_exit_bulk_access; 1410 } 1411 1412 if (num_parents) { 1413 if ((error = copyin(user_access_structp->parents, (caddr_t)parents, 1414 num_parents * sizeof(cnid_t)))) { 1415 goto err_exit_bulk_access; 1416 } 1417 } 1418 1419 flags = user_access_structp->flags; 1420 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) { 1421 flags = R_OK; 1422 } 1423 1424 /* check if we've been passed leaf node ids or parent ids */ 1425 if (flags & PARENT_IDS_FLAG) { 1426 check_leaf = false; 1427 } 1428 1429 /* Check access to each file_id passed in */ 1430 for (i = 0; i < num_files; i++) { 1431 leaf_index=-1; 1432 cnid = (cnid_t) file_ids[i]; 1433 1434 /* root always has access */ 1435 if ((!parents) && (!suser(cred, NULL))) { 1436 access[i] = 0; 1437 continue; 1438 } 1439 1440 if (check_leaf) { 1441 /* do the lookup (checks the cnode hash, then the catalog) */ 1442 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr); 1443 if (error) { 1444 access[i] = (short) error; 1445 continue; 1446 } 1447 1448 if (parents) { 1449 // Check if the leaf matches one of the parent scopes 1450 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL); 1451 if (leaf_index >= 0 && parents[leaf_index] == cnid) 1452 prev_parent_check_ok = 0; 1453 else if (leaf_index >= 0) 1454 prev_parent_check_ok = 1; 1455 } 1456 1457 // if the thing has acl's, do the full permission check 1458 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) { 1459 struct vnode *cvp; 1460 int myErr = 0; 1461 /* get the vnode for this cnid */ 1462 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0); 1463 if ( myErr ) { 1464 access[i] = myErr; 1465 continue; 1466 } 1467 1468 hfs_unlock(VTOC(cvp)); 1469 1470 if (vnode_vtype(cvp) == VDIR) { 1471 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context); 1472 } else { 1473 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context); 1474 } 1475 1476 vnode_put(cvp); 1477 if (myErr) { 1478 access[i] = myErr; 1479 continue; 1480 } 1481 } else { 1482 /* before calling CheckAccess(), check the target file for read access */ 1483 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, 1484 cnattr.ca_mode, hfsmp->hfs_mp, cred, p); 1485 1486 /* fail fast if no access */ 1487 if ((myPerms & flags) == 0) { 1488 access[i] = EACCES; 1489 continue; 1490 } 1491 } 1492 } else { 1493 /* we were passed an array of parent ids */ 1494 catkey.hfsPlus.parentID = cnid; 1495 } 1496 1497 /* if the last guy had the same parent and had access, we're done */ 1498 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) { 1499 cache.cachehits++; 1500 access[i] = 0; 1501 continue; 1502 } 1503 1504 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 1505 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents); 1506 1507 if (myaccess || (error == ESRCH && leaf_index != -1)) { 1508 access[i] = 0; // have access.. no errors to report 1509 } else { 1510 access[i] = (error != 0 ? (short) error : EACCES); 1511 } 1512 1513 prevParent_cnid = catkey.hfsPlus.parentID; 1514 } 1515 1516 /* copyout the access array */ 1517 if ((error = copyout((caddr_t)access, user_access_structp->access, 1518 num_files * sizeof (short)))) { 1519 goto err_exit_bulk_access; 1520 } 1521 if (map_size && bitmap) { 1522 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 1523 map_size * sizeof (char)))) { 1524 goto err_exit_bulk_access; 1525 } 1526 } 1527 1528 1529 err_exit_bulk_access: 1530 1531 if (file_ids) 1532 kfree(file_ids, sizeof(int) * num_files); 1533 if (parents) 1534 kfree(parents, sizeof(cnid_t) * num_parents); 1535 if (bitmap) 1536 kfree(bitmap, sizeof(char) * map_size); 1537 if (access) 1538 kfree(access, sizeof(short) * num_files); 1539 if (cache.acache) 1540 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES); 1541 if (cache.haveaccess) 1542 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES); 1543 1544 return (error); 1545} 1546 1547 1548/* end "bulk-access" support */ 1549 1550 1551/* 1552 * Callback for use with freeze ioctl. 1553 */ 1554static int 1555hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) 1556{ 1557 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze"); 1558 1559 return 0; 1560} 1561 1562/* 1563 * Control filesystem operating characteristics. 1564 */ 1565int 1566hfs_vnop_ioctl( struct vnop_ioctl_args /* { 1567 vnode_t a_vp; 1568 int a_command; 1569 caddr_t a_data; 1570 int a_fflag; 1571 vfs_context_t a_context; 1572 } */ *ap) 1573{ 1574 struct vnode * vp = ap->a_vp; 1575 struct hfsmount *hfsmp = VTOHFS(vp); 1576 vfs_context_t context = ap->a_context; 1577 kauth_cred_t cred = vfs_context_ucred(context); 1578 proc_t p = vfs_context_proc(context); 1579 struct vfsstatfs *vfsp; 1580 boolean_t is64bit; 1581 off_t jnl_start, jnl_size; 1582 struct hfs_journal_info *jip; 1583#if HFS_COMPRESSION 1584 int compressed = 0; 1585 off_t uncompressed_size = -1; 1586 int decmpfs_error = 0; 1587 1588 if (ap->a_command == F_RDADVISE) { 1589 /* we need to inspect the decmpfs state of the file as early as possible */ 1590 compressed = hfs_file_is_compressed(VTOC(vp), 0); 1591 if (compressed) { 1592 if (VNODE_IS_RSRC(vp)) { 1593 /* if this is the resource fork, treat it as if it were empty */ 1594 uncompressed_size = 0; 1595 } else { 1596 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0); 1597 if (decmpfs_error != 0) { 1598 /* failed to get the uncompressed size, we'll check for this later */ 1599 uncompressed_size = -1; 1600 } 1601 } 1602 } 1603 } 1604#endif /* HFS_COMPRESSION */ 1605 1606 is64bit = proc_is64bit(p); 1607 1608#if CONFIG_PROTECT 1609 { 1610 int error = 0; 1611 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 1612 return error; 1613 } 1614 } 1615#endif /* CONFIG_PROTECT */ 1616 1617 switch (ap->a_command) { 1618 1619 case HFS_GETPATH: 1620 { 1621 struct vnode *file_vp; 1622 cnid_t cnid; 1623 int outlen; 1624 char *bufptr; 1625 int error; 1626 int flags = 0; 1627 1628 /* Caller must be owner of file system. */ 1629 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1630 if (suser(cred, NULL) && 1631 kauth_cred_getuid(cred) != vfsp->f_owner) { 1632 return (EACCES); 1633 } 1634 /* Target vnode must be file system's root. */ 1635 if (!vnode_isvroot(vp)) { 1636 return (EINVAL); 1637 } 1638 bufptr = (char *)ap->a_data; 1639 cnid = strtoul(bufptr, NULL, 10); 1640 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) { 1641 flags |= BUILDPATH_VOLUME_RELATIVE; 1642 } 1643 1644 /* We need to call hfs_vfs_vget to leverage the code that will 1645 * fix the origin list for us if needed, as opposed to calling 1646 * hfs_vget, since we will need the parent for build_path call. 1647 */ 1648 1649 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { 1650 return (error); 1651 } 1652 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context); 1653 vnode_put(file_vp); 1654 1655 return (error); 1656 } 1657 1658 case HFS_GET_WRITE_GEN_COUNTER: 1659 { 1660 struct cnode *cp = NULL; 1661 int error; 1662 u_int32_t *counter = (u_int32_t *)ap->a_data; 1663 1664 cp = VTOC(vp); 1665 1666 if (!vnode_isdir(vp) && !(vnode_isreg(vp)) && 1667 !(vnode_islnk(vp))) { 1668 error = EBADF; 1669 *counter = 0; 1670 return error; 1671 } 1672 1673 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 1674 if (error == 0) { 1675 struct ubc_info *uip; 1676 int is_mapped_writable = 0; 1677 1678 if (UBCINFOEXISTS(vp)) { 1679 uip = vp->v_ubcinfo; 1680 if ((uip->ui_flags & UI_ISMAPPED) && (uip->ui_flags & UI_MAPPEDWRITE)) { 1681 is_mapped_writable = 1; 1682 } 1683 } 1684 1685 1686 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { 1687 uint32_t gcount = hfs_get_gencount(cp); 1688 // 1689 // Even though we return EBUSY for files that are mmap'ed 1690 // we also want to bump the value so that the write-gen 1691 // counter will always be different once the file is unmapped 1692 // (since the file may be unmapped but the pageouts have not 1693 // yet happened). 1694 // 1695 if (is_mapped_writable) { 1696 hfs_incr_gencount (cp); 1697 gcount = hfs_get_gencount(cp); 1698 } 1699 1700 *counter = gcount; 1701 } else if (S_ISDIR(cp->c_attr.ca_mode)) { 1702 *counter = hfs_get_gencount(cp); 1703 } else { 1704 /* not a file or dir? silently return */ 1705 *counter = 0; 1706 } 1707 hfs_unlock (cp); 1708 1709 if (is_mapped_writable) { 1710 error = EBUSY; 1711 } 1712 } 1713 1714 return error; 1715 } 1716 1717 case HFS_GET_DOCUMENT_ID: 1718 { 1719 struct cnode *cp = NULL; 1720 int error=0; 1721 u_int32_t *document_id = (u_int32_t *)ap->a_data; 1722 1723 cp = VTOC(vp); 1724 1725 if (cp->c_desc.cd_cnid == kHFSRootFolderID) { 1726 // the root-dir always has document id '2' (aka kHFSRootFolderID) 1727 *document_id = kHFSRootFolderID; 1728 1729 } else if ((S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode))) { 1730 int mark_it = 0; 1731 uint32_t tmp_doc_id; 1732 1733 // 1734 // we can use the FndrExtendedFileInfo because the doc-id is the first 1735 // thing in both it and the FndrExtendedDirInfo struct which is fixed 1736 // in format and can not change layout 1737 // 1738 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); 1739 1740 hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); 1741 1742 // 1743 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag isn't set 1744 // then just return a zero for the doc-id 1745 // 1746 if (!(cp->c_bsdflags & UF_TRACKED) && !(ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) { 1747 *document_id = 0; 1748 hfs_unlock(cp); 1749 return 0; 1750 } 1751 1752 // 1753 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag IS set, 1754 // then set mark_it so we know to set the UF_TRACKED flag once the 1755 // cnode is locked. 1756 // 1757 if (!(cp->c_bsdflags & UF_TRACKED) && (ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) { 1758 mark_it = 1; 1759 } 1760 1761 tmp_doc_id = extinfo->document_id; // get a copy of this 1762 1763 hfs_unlock(cp); // in case we have to call hfs_generate_document_id() 1764 1765 // 1766 // If the document_id isn't set, get a new one and then set it. 1767 // Note: we first get the document id, then lock the cnode to 1768 // avoid any deadlock potential between cp and the root vnode. 1769 // 1770 uint32_t new_id; 1771 if (tmp_doc_id == 0 && (error = hfs_generate_document_id(hfsmp, &new_id)) == 0) { 1772 1773 if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) { 1774 extinfo->document_id = tmp_doc_id = new_id; 1775 //printf("ASSIGNING: doc-id %d to ino %d\n", extinfo->document_id, cp->c_fileid); 1776 1777 if (mark_it) { 1778 cp->c_bsdflags |= UF_TRACKED; 1779 } 1780 1781 // mark the cnode dirty 1782 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; 1783 1784 int lockflags; 1785 if ((error = hfs_start_transaction(hfsmp)) == 0) { 1786 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); 1787 1788 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); 1789 1790 hfs_systemfile_unlock (hfsmp, lockflags); 1791 (void) hfs_end_transaction(hfsmp); 1792 } 1793 1794#if CONFIG_FSE 1795 add_fsevent(FSE_DOCID_CHANGED, context, 1796 FSE_ARG_DEV, hfsmp->hfs_raw_dev, 1797 FSE_ARG_INO, (ino64_t)0, // src inode # 1798 FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # 1799 FSE_ARG_INT32, extinfo->document_id, 1800 FSE_ARG_DONE); 1801 1802 hfs_unlock (cp); // so we can send the STAT_CHANGED event without deadlocking 1803 1804 if (need_fsevent(FSE_STAT_CHANGED, vp)) { 1805 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE); 1806 } 1807#else 1808 hfs_unlock (cp); 1809#endif 1810 } 1811 } 1812 1813 *document_id = tmp_doc_id; 1814 } else { 1815 *document_id = 0; 1816 } 1817 1818 return error; 1819 } 1820 1821 case HFS_TRANSFER_DOCUMENT_ID: 1822 { 1823 struct cnode *cp = NULL; 1824 int error; 1825 u_int32_t to_fd = *(u_int32_t *)ap->a_data; 1826 struct fileproc *to_fp; 1827 struct vnode *to_vp; 1828 struct cnode *to_cp; 1829 1830 cp = VTOC(vp); 1831 1832 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) { 1833 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error); 1834 return error; 1835 } 1836 if ( (error = vnode_getwithref(to_vp)) ) { 1837 file_drop(to_fd); 1838 return error; 1839 } 1840 1841 if (VTOHFS(to_vp) != hfsmp) { 1842 error = EXDEV; 1843 goto transfer_cleanup; 1844 } 1845 1846 int need_unlock = 1; 1847 to_cp = VTOC(to_vp); 1848 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); 1849 if (error != 0) { 1850 //printf("could not lock the pair of cnodes (error %d)\n", error); 1851 goto transfer_cleanup; 1852 } 1853 1854 if (!(cp->c_bsdflags & UF_TRACKED)) { 1855 error = EINVAL; 1856 } else if (to_cp->c_bsdflags & UF_TRACKED) { 1857 // 1858 // if the destination is already tracked, return an error 1859 // as otherwise it's a silent deletion of the target's 1860 // document-id 1861 // 1862 error = EEXIST; 1863 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { 1864 // 1865 // we can use the FndrExtendedFileInfo because the doc-id is the first 1866 // thing in both it and the ExtendedDirInfo struct which is fixed in 1867 // format and can not change layout 1868 // 1869 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); 1870 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16); 1871 1872 if (f_extinfo->document_id == 0) { 1873 uint32_t new_id; 1874 1875 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id 1876 1877 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) { 1878 // 1879 // re-lock the pair now that we have the document-id 1880 // 1881 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); 1882 f_extinfo->document_id = new_id; 1883 } else { 1884 goto transfer_cleanup; 1885 } 1886 } 1887 1888 to_extinfo->document_id = f_extinfo->document_id; 1889 f_extinfo->document_id = 0; 1890 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid); 1891 1892 // make sure the destination is also UF_TRACKED 1893 to_cp->c_bsdflags |= UF_TRACKED; 1894 cp->c_bsdflags &= ~UF_TRACKED; 1895 1896 // mark the cnodes dirty 1897 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; 1898 to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; 1899 1900 int lockflags; 1901 if ((error = hfs_start_transaction(hfsmp)) == 0) { 1902 1903 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); 1904 1905 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); 1906 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL); 1907 1908 hfs_systemfile_unlock (hfsmp, lockflags); 1909 (void) hfs_end_transaction(hfsmp); 1910 } 1911 1912#if CONFIG_FSE 1913 add_fsevent(FSE_DOCID_CHANGED, context, 1914 FSE_ARG_DEV, hfsmp->hfs_raw_dev, 1915 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode # 1916 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode # 1917 FSE_ARG_INT32, to_extinfo->document_id, 1918 FSE_ARG_DONE); 1919 1920 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents 1921 need_unlock = 0; 1922 1923 if (need_fsevent(FSE_STAT_CHANGED, vp)) { 1924 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE); 1925 } 1926 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) { 1927 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE); 1928 } 1929#else 1930 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents 1931 need_unlock = 0; 1932#endif 1933 } 1934 1935 if (need_unlock) { 1936 hfs_unlockpair(cp, to_cp); 1937 } 1938 1939 transfer_cleanup: 1940 vnode_put(to_vp); 1941 file_drop(to_fd); 1942 1943 return error; 1944 } 1945 1946 case HFS_PREV_LINK: 1947 case HFS_NEXT_LINK: 1948 { 1949 cnid_t linkfileid; 1950 cnid_t nextlinkid; 1951 cnid_t prevlinkid; 1952 int error; 1953 1954 /* Caller must be owner of file system. */ 1955 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1956 if (suser(cred, NULL) && 1957 kauth_cred_getuid(cred) != vfsp->f_owner) { 1958 return (EACCES); 1959 } 1960 /* Target vnode must be file system's root. */ 1961 if (!vnode_isvroot(vp)) { 1962 return (EINVAL); 1963 } 1964 linkfileid = *(cnid_t *)ap->a_data; 1965 if (linkfileid < kHFSFirstUserCatalogNodeID) { 1966 return (EINVAL); 1967 } 1968 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) { 1969 return (error); 1970 } 1971 if (ap->a_command == HFS_NEXT_LINK) { 1972 *(cnid_t *)ap->a_data = nextlinkid; 1973 } else { 1974 *(cnid_t *)ap->a_data = prevlinkid; 1975 } 1976 return (0); 1977 } 1978 1979 case HFS_RESIZE_PROGRESS: { 1980 1981 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 1982 if (suser(cred, NULL) && 1983 kauth_cred_getuid(cred) != vfsp->f_owner) { 1984 return (EACCES); /* must be owner of file system */ 1985 } 1986 if (!vnode_isvroot(vp)) { 1987 return (EINVAL); 1988 } 1989 /* file system must not be mounted read-only */ 1990 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 1991 return (EROFS); 1992 } 1993 1994 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); 1995 } 1996 1997 case HFS_RESIZE_VOLUME: { 1998 u_int64_t newsize; 1999 u_int64_t cursize; 2000 2001 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2002 if (suser(cred, NULL) && 2003 kauth_cred_getuid(cred) != vfsp->f_owner) { 2004 return (EACCES); /* must be owner of file system */ 2005 } 2006 if (!vnode_isvroot(vp)) { 2007 return (EINVAL); 2008 } 2009 2010 /* filesystem must not be mounted read only */ 2011 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2012 return (EROFS); 2013 } 2014 newsize = *(u_int64_t *)ap->a_data; 2015 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; 2016 2017 if (newsize > cursize) { 2018 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); 2019 } else if (newsize < cursize) { 2020 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); 2021 } else { 2022 return (0); 2023 } 2024 } 2025 case HFS_CHANGE_NEXT_ALLOCATION: { 2026 int error = 0; /* Assume success */ 2027 u_int32_t location; 2028 2029 if (vnode_vfsisrdonly(vp)) { 2030 return (EROFS); 2031 } 2032 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2033 if (suser(cred, NULL) && 2034 kauth_cred_getuid(cred) != vfsp->f_owner) { 2035 return (EACCES); /* must be owner of file system */ 2036 } 2037 if (!vnode_isvroot(vp)) { 2038 return (EINVAL); 2039 } 2040 hfs_lock_mount(hfsmp); 2041 location = *(u_int32_t *)ap->a_data; 2042 if ((location >= hfsmp->allocLimit) && 2043 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) { 2044 error = EINVAL; 2045 goto fail_change_next_allocation; 2046 } 2047 /* Return previous value. */ 2048 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation; 2049 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) { 2050 /* On magic value for location, set nextAllocation to next block 2051 * after metadata zone and set flag in mount structure to indicate 2052 * that nextAllocation should not be updated again. 2053 */ 2054 if (hfsmp->hfs_metazone_end != 0) { 2055 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); 2056 } 2057 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 2058 } else { 2059 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 2060 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location); 2061 } 2062 MarkVCBDirty(hfsmp); 2063fail_change_next_allocation: 2064 hfs_unlock_mount(hfsmp); 2065 return (error); 2066 } 2067 2068#if HFS_SPARSE_DEV 2069 case HFS_SETBACKINGSTOREINFO: { 2070 struct vnode * bsfs_rootvp; 2071 struct vnode * di_vp; 2072 struct hfs_backingstoreinfo *bsdata; 2073 int error = 0; 2074 2075 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2076 return (EROFS); 2077 } 2078 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { 2079 return (EALREADY); 2080 } 2081 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2082 if (suser(cred, NULL) && 2083 kauth_cred_getuid(cred) != vfsp->f_owner) { 2084 return (EACCES); /* must be owner of file system */ 2085 } 2086 bsdata = (struct hfs_backingstoreinfo *)ap->a_data; 2087 if (bsdata == NULL) { 2088 return (EINVAL); 2089 } 2090 if ((error = file_vnode(bsdata->backingfd, &di_vp))) { 2091 return (error); 2092 } 2093 if ((error = vnode_getwithref(di_vp))) { 2094 file_drop(bsdata->backingfd); 2095 return(error); 2096 } 2097 2098 if (vnode_mount(vp) == vnode_mount(di_vp)) { 2099 (void)vnode_put(di_vp); 2100 file_drop(bsdata->backingfd); 2101 return (EINVAL); 2102 } 2103 2104 /* 2105 * Obtain the backing fs root vnode and keep a reference 2106 * on it. This reference will be dropped in hfs_unmount. 2107 */ 2108 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */ 2109 if (error) { 2110 (void)vnode_put(di_vp); 2111 file_drop(bsdata->backingfd); 2112 return (error); 2113 } 2114 vnode_ref(bsfs_rootvp); 2115 vnode_put(bsfs_rootvp); 2116 2117 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; 2118 2119 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; 2120 /* The free extent cache is managed differently for sparse devices. 2121 * There is a window between which the volume is mounted and the 2122 * device is marked as sparse, so the free extent cache for this 2123 * volume is currently initialized as normal volume (sorted by block 2124 * count). Reset the cache so that it will be rebuilt again 2125 * for sparse device (sorted by start block). 2126 */ 2127 ResetVCBFreeExtCache(hfsmp); 2128 2129 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; 2130 hfsmp->hfs_sparsebandblks *= 4; 2131 2132 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */ 2133 2134 /* 2135 * If the sparse image is on a sparse image file (as opposed to a sparse 2136 * bundle), then we may need to limit the free space to the maximum size 2137 * of a file on that volume. So we query (using pathconf), and if we get 2138 * a meaningful result, we cache the number of blocks for later use in 2139 * hfs_freeblks(). 2140 */ 2141 hfsmp->hfs_backingfs_maxblocks = 0; 2142 if (vnode_vtype(di_vp) == VREG) { 2143 int terr; 2144 int hostbits; 2145 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context); 2146 if (terr == 0 && hostbits != 0 && hostbits < 64) { 2147 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits; 2148 2149 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize; 2150 } 2151 } 2152 2153 (void)vnode_put(di_vp); 2154 file_drop(bsdata->backingfd); 2155 return (0); 2156 } 2157 case HFS_CLRBACKINGSTOREINFO: { 2158 struct vnode * tmpvp; 2159 2160 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2161 if (suser(cred, NULL) && 2162 kauth_cred_getuid(cred) != vfsp->f_owner) { 2163 return (EACCES); /* must be owner of file system */ 2164 } 2165 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2166 return (EROFS); 2167 } 2168 2169 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && 2170 hfsmp->hfs_backingfs_rootvp) { 2171 2172 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; 2173 tmpvp = hfsmp->hfs_backingfs_rootvp; 2174 hfsmp->hfs_backingfs_rootvp = NULLVP; 2175 hfsmp->hfs_sparsebandblks = 0; 2176 vnode_rele(tmpvp); 2177 } 2178 return (0); 2179 } 2180#endif /* HFS_SPARSE_DEV */ 2181 2182 /* Change the next CNID stored in the VH */ 2183 case HFS_CHANGE_NEXTCNID: { 2184 int error = 0; /* Assume success */ 2185 u_int32_t fileid; 2186 int wraparound = 0; 2187 int lockflags = 0; 2188 2189 if (vnode_vfsisrdonly(vp)) { 2190 return (EROFS); 2191 } 2192 vfsp = vfs_statfs(HFSTOVFS(hfsmp)); 2193 if (suser(cred, NULL) && 2194 kauth_cred_getuid(cred) != vfsp->f_owner) { 2195 return (EACCES); /* must be owner of file system */ 2196 } 2197 2198 fileid = *(u_int32_t *)ap->a_data; 2199 2200 /* Must have catalog lock excl. to advance the CNID pointer */ 2201 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK); 2202 2203 hfs_lock_mount(hfsmp); 2204 2205 /* If it is less than the current next CNID, force the wraparound bit to be set */ 2206 if (fileid < hfsmp->vcbNxtCNID) { 2207 wraparound=1; 2208 } 2209 2210 /* Return previous value. */ 2211 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID; 2212 2213 hfsmp->vcbNxtCNID = fileid; 2214 2215 if (wraparound) { 2216 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; 2217 } 2218 2219 MarkVCBDirty(hfsmp); 2220 hfs_unlock_mount(hfsmp); 2221 hfs_systemfile_unlock (hfsmp, lockflags); 2222 2223 return (error); 2224 } 2225 2226 case F_FREEZE_FS: { 2227 struct mount *mp; 2228 2229 mp = vnode_mount(vp); 2230 hfsmp = VFSTOHFS(mp); 2231 2232 if (!(hfsmp->jnl)) 2233 return (ENOTSUP); 2234 2235 vfsp = vfs_statfs(mp); 2236 2237 if (kauth_cred_getuid(cred) != vfsp->f_owner && 2238 !kauth_cred_issuser(cred)) 2239 return (EACCES); 2240 2241 lck_rw_lock_exclusive(&hfsmp->hfs_insync); 2242 2243 // flush things before we get started to try and prevent 2244 // dirty data from being paged out while we're frozen. 2245 // note: can't do this after taking the lock as it will 2246 // deadlock against ourselves. 2247 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); 2248 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); 2249 2250 // DO NOT call hfs_journal_flush() because that takes a 2251 // shared lock on the global exclusive lock! 2252 journal_flush(hfsmp->jnl, TRUE); 2253 2254 // don't need to iterate on all vnodes, we just need to 2255 // wait for writes to the system files and the device vnode 2256 // 2257 // Now that journal flush waits for all metadata blocks to 2258 // be written out, waiting for btree writes is probably no 2259 // longer required. 2260 if (HFSTOVCB(hfsmp)->extentsRefNum) 2261 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); 2262 if (HFSTOVCB(hfsmp)->catalogRefNum) 2263 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze"); 2264 if (HFSTOVCB(hfsmp)->allocationsRefNum) 2265 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze"); 2266 if (hfsmp->hfs_attribute_vp) 2267 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze"); 2268 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze"); 2269 2270 hfsmp->hfs_freezing_proc = current_proc(); 2271 2272 return (0); 2273 } 2274 2275 case F_THAW_FS: { 2276 vfsp = vfs_statfs(vnode_mount(vp)); 2277 if (kauth_cred_getuid(cred) != vfsp->f_owner && 2278 !kauth_cred_issuser(cred)) 2279 return (EACCES); 2280 2281 // if we're not the one who froze the fs then we 2282 // can't thaw it. 2283 if (hfsmp->hfs_freezing_proc != current_proc()) { 2284 return EPERM; 2285 } 2286 2287 // NOTE: if you add code here, also go check the 2288 // code that "thaws" the fs in hfs_vnop_close() 2289 // 2290 hfsmp->hfs_freezing_proc = NULL; 2291 hfs_unlock_global (hfsmp); 2292 lck_rw_unlock_exclusive(&hfsmp->hfs_insync); 2293 2294 return (0); 2295 } 2296 2297 case HFS_BULKACCESS_FSCTL: { 2298 int size; 2299 2300 if (hfsmp->hfs_flags & HFS_STANDARD) { 2301 return EINVAL; 2302 } 2303 2304 if (is64bit) { 2305 size = sizeof(struct user64_access_t); 2306 } else { 2307 size = sizeof(struct user32_access_t); 2308 } 2309 2310 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2311 } 2312 2313 case HFS_EXT_BULKACCESS_FSCTL: { 2314 int size; 2315 2316 if (hfsmp->hfs_flags & HFS_STANDARD) { 2317 return EINVAL; 2318 } 2319 2320 if (is64bit) { 2321 size = sizeof(struct user64_ext_access_t); 2322 } else { 2323 size = sizeof(struct user32_ext_access_t); 2324 } 2325 2326 return do_bulk_access_check(hfsmp, vp, ap, size, context); 2327 } 2328 2329 case HFS_SET_XATTREXTENTS_STATE: { 2330 int state; 2331 2332 if (ap->a_data == NULL) { 2333 return (EINVAL); 2334 } 2335 2336 state = *(int *)ap->a_data; 2337 2338 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2339 return (EROFS); 2340 } 2341 2342 /* Super-user can enable or disable extent-based extended 2343 * attribute support on a volume 2344 * Note: Starting Mac OS X 10.7, extent-based extended attributes 2345 * are enabled by default, so any change will be transient only 2346 * till the volume is remounted. 2347 */ 2348 if (!kauth_cred_issuser(kauth_cred_get())) { 2349 return (EPERM); 2350 } 2351 if (state == 0 || state == 1) 2352 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state); 2353 else 2354 return (EINVAL); 2355 } 2356 2357 case F_SETSTATICCONTENT: { 2358 int error; 2359 int enable_static = 0; 2360 struct cnode *cp = NULL; 2361 /* 2362 * lock the cnode, decorate the cnode flag, and bail out. 2363 * VFS should have already authenticated the caller for us. 2364 */ 2365 2366 if (ap->a_data) { 2367 /* 2368 * Note that even though ap->a_data is of type caddr_t, 2369 * the fcntl layer at the syscall handler will pass in NULL 2370 * or 1 depending on what the argument supplied to the fcntl 2371 * was. So it is in fact correct to check the ap->a_data 2372 * argument for zero or non-zero value when deciding whether or not 2373 * to enable the static bit in the cnode. 2374 */ 2375 enable_static = 1; 2376 } 2377 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2378 return EROFS; 2379 } 2380 cp = VTOC(vp); 2381 2382 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2383 if (error == 0) { 2384 if (enable_static) { 2385 cp->c_flag |= C_SSD_STATIC; 2386 } 2387 else { 2388 cp->c_flag &= ~C_SSD_STATIC; 2389 } 2390 hfs_unlock (cp); 2391 } 2392 return error; 2393 } 2394 2395 case F_SET_GREEDY_MODE: { 2396 int error; 2397 int enable_greedy_mode = 0; 2398 struct cnode *cp = NULL; 2399 /* 2400 * lock the cnode, decorate the cnode flag, and bail out. 2401 * VFS should have already authenticated the caller for us. 2402 */ 2403 2404 if (ap->a_data) { 2405 /* 2406 * Note that even though ap->a_data is of type caddr_t, 2407 * the fcntl layer at the syscall handler will pass in NULL 2408 * or 1 depending on what the argument supplied to the fcntl 2409 * was. So it is in fact correct to check the ap->a_data 2410 * argument for zero or non-zero value when deciding whether or not 2411 * to enable the greedy mode bit in the cnode. 2412 */ 2413 enable_greedy_mode = 1; 2414 } 2415 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2416 return EROFS; 2417 } 2418 cp = VTOC(vp); 2419 2420 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2421 if (error == 0) { 2422 if (enable_greedy_mode) { 2423 cp->c_flag |= C_SSD_GREEDY_MODE; 2424 } 2425 else { 2426 cp->c_flag &= ~C_SSD_GREEDY_MODE; 2427 } 2428 hfs_unlock (cp); 2429 } 2430 return error; 2431 } 2432 2433 case F_MAKECOMPRESSED: { 2434 int error = 0; 2435 uint32_t gen_counter; 2436 struct cnode *cp = NULL; 2437 int reset_decmp = 0; 2438 2439 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2440 return EROFS; 2441 } 2442 2443 /* 2444 * acquire & lock the cnode. 2445 * VFS should have already authenticated the caller for us. 2446 */ 2447 2448 if (ap->a_data) { 2449 /* 2450 * Cast the pointer into a uint32_t so we can extract the 2451 * supplied generation counter. 2452 */ 2453 gen_counter = *((uint32_t*)ap->a_data); 2454 } 2455 else { 2456 return EINVAL; 2457 } 2458 2459#if HFS_COMPRESSION 2460 cp = VTOC(vp); 2461 /* Grab truncate lock first; we may truncate the file */ 2462 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2463 2464 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2465 if (error) { 2466 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 2467 return error; 2468 } 2469 2470 /* Are there any other usecounts/FDs? */ 2471 if (vnode_isinuse(vp, 1)) { 2472 hfs_unlock(cp); 2473 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 2474 return EBUSY; 2475 } 2476 2477 2478 /* now we have the cnode locked down; Validate arguments */ 2479 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) { 2480 /* EINVAL if you are trying to manipulate an IMMUTABLE file */ 2481 hfs_unlock(cp); 2482 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); 2483 return EINVAL; 2484 } 2485 2486 if ((hfs_get_gencount (cp)) == gen_counter) { 2487 /* 2488 * OK, the gen_counter matched. Go for it: 2489 * Toggle state bits, truncate file, and suppress mtime update 2490 */ 2491 reset_decmp = 1; 2492 cp->c_bsdflags |= UF_COMPRESSED; 2493 2494 error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context); 2495 } 2496 else { 2497 error = ESTALE; 2498 } 2499 2500 /* Unlock cnode before executing decmpfs ; they may need to get an EA */ 2501 hfs_unlock(cp); 2502 2503 /* 2504 * Reset the decmp state while still holding the truncate lock. We need to 2505 * serialize here against a listxattr on this node which may occur at any 2506 * time. 2507 * 2508 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed, 2509 * that will still potentially require getting the com.apple.decmpfs EA. If the 2510 * EA is required, then we can't hold the cnode lock, because the getxattr call is 2511 * generic(through VFS), and can't pass along any info telling it that we're already 2512 * holding it (the lock). If we don't serialize, then we risk listxattr stopping 2513 * and trying to fill in the hfs_file_is_compressed info during the callback 2514 * operation, which will result in deadlock against the b-tree node. 2515 * 2516 * So, to serialize against listxattr (which will grab buf_t meta references on 2517 * the b-tree blocks), we hold the truncate lock as we're manipulating the 2518 * decmpfs payload. 2519 */ 2520 if ((reset_decmp) && (error == 0)) { 2521 decmpfs_cnode *dp = VTOCMP (vp); 2522 if (dp != NULL) { 2523 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); 2524 } 2525 2526 /* Initialize the decmpfs node as needed */ 2527 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */ 2528 } 2529 2530 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); 2531 2532#endif 2533 return error; 2534 } 2535 2536 case F_SETBACKINGSTORE: { 2537 2538 int error = 0; 2539 2540 /* 2541 * See comment in F_SETSTATICCONTENT re: using 2542 * a null check for a_data 2543 */ 2544 if (ap->a_data) { 2545 error = hfs_set_backingstore (vp, 1); 2546 } 2547 else { 2548 error = hfs_set_backingstore (vp, 0); 2549 } 2550 2551 return error; 2552 } 2553 2554 case F_GETPATH_MTMINFO: { 2555 int error = 0; 2556 2557 int *data = (int*) ap->a_data; 2558 2559 /* Ask if this is a backingstore vnode */ 2560 error = hfs_is_backingstore (vp, data); 2561 2562 return error; 2563 } 2564 2565 case F_FULLFSYNC: { 2566 int error; 2567 2568 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2569 return (EROFS); 2570 } 2571 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2572 if (error == 0) { 2573 error = hfs_fsync(vp, MNT_WAIT, TRUE, p); 2574 hfs_unlock(VTOC(vp)); 2575 } 2576 2577 return error; 2578 } 2579 2580 case F_CHKCLEAN: { 2581 register struct cnode *cp; 2582 int error; 2583 2584 if (!vnode_isreg(vp)) 2585 return EINVAL; 2586 2587 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2588 if (error == 0) { 2589 cp = VTOC(vp); 2590 /* 2591 * used by regression test to determine if 2592 * all the dirty pages (via write) have been cleaned 2593 * after a call to 'fsysnc'. 2594 */ 2595 error = is_file_clean(vp, VTOF(vp)->ff_size); 2596 hfs_unlock(cp); 2597 } 2598 return (error); 2599 } 2600 2601 case F_RDADVISE: { 2602 register struct radvisory *ra; 2603 struct filefork *fp; 2604 int error; 2605 2606 if (!vnode_isreg(vp)) 2607 return EINVAL; 2608 2609 ra = (struct radvisory *)(ap->a_data); 2610 fp = VTOF(vp); 2611 2612 /* Protect against a size change. */ 2613 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 2614 2615#if HFS_COMPRESSION 2616 if (compressed && (uncompressed_size == -1)) { 2617 /* fetching the uncompressed size failed above, so return the error */ 2618 error = decmpfs_error; 2619 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) || 2620 (!compressed && (ra->ra_offset >= fp->ff_size))) { 2621 error = EFBIG; 2622 } 2623#else /* HFS_COMPRESSION */ 2624 if (ra->ra_offset >= fp->ff_size) { 2625 error = EFBIG; 2626 } 2627#endif /* HFS_COMPRESSION */ 2628 else { 2629 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); 2630 } 2631 2632 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); 2633 return (error); 2634 } 2635 2636 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ 2637 { 2638 if (is64bit) { 2639 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2640 } 2641 else { 2642 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); 2643 } 2644 return 0; 2645 } 2646 2647 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME: 2648 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time; 2649 break; 2650 2651 case SPOTLIGHT_FSCTL_GET_LAST_MTIME: 2652 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime; 2653 break; 2654 2655 case HFS_FSCTL_GET_VERY_LOW_DISK: 2656 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit; 2657 break; 2658 2659 case HFS_FSCTL_SET_VERY_LOW_DISK: 2660 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) { 2661 return EINVAL; 2662 } 2663 2664 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data; 2665 break; 2666 2667 case HFS_FSCTL_GET_LOW_DISK: 2668 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit; 2669 break; 2670 2671 case HFS_FSCTL_SET_LOW_DISK: 2672 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel 2673 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) { 2674 2675 return EINVAL; 2676 } 2677 2678 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data; 2679 break; 2680 2681 case HFS_FSCTL_GET_DESIRED_DISK: 2682 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel; 2683 break; 2684 2685 case HFS_FSCTL_SET_DESIRED_DISK: 2686 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) { 2687 return EINVAL; 2688 } 2689 2690 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data; 2691 break; 2692 2693 case HFS_VOLUME_STATUS: 2694 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions; 2695 break; 2696 2697 case HFS_SET_BOOT_INFO: 2698 if (!vnode_isvroot(vp)) 2699 return(EINVAL); 2700 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) 2701 return(EACCES); /* must be superuser or owner of filesystem */ 2702 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2703 return (EROFS); 2704 } 2705 hfs_lock_mount (hfsmp); 2706 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); 2707 hfs_unlock_mount (hfsmp); 2708 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); 2709 break; 2710 2711 case HFS_GET_BOOT_INFO: 2712 if (!vnode_isvroot(vp)) 2713 return(EINVAL); 2714 hfs_lock_mount (hfsmp); 2715 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo)); 2716 hfs_unlock_mount(hfsmp); 2717 break; 2718 2719 case HFS_MARK_BOOT_CORRUPT: 2720 /* Mark the boot volume corrupt by setting 2721 * kHFSVolumeInconsistentBit in the volume header. This will 2722 * force fsck_hfs on next mount. 2723 */ 2724 if (!kauth_cred_issuser(kauth_cred_get())) { 2725 return EACCES; 2726 } 2727 2728 /* Allowed only on the root vnode of the boot volume */ 2729 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 2730 !vnode_isvroot(vp)) { 2731 return EINVAL; 2732 } 2733 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2734 return (EROFS); 2735 } 2736 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); 2737 hfs_mark_volume_inconsistent(hfsmp); 2738 break; 2739 2740 case HFS_FSCTL_GET_JOURNAL_INFO: 2741 jip = (struct hfs_journal_info*)ap->a_data; 2742 2743 if (vp == NULLVP) 2744 return EINVAL; 2745 2746 if (hfsmp->jnl == NULL) { 2747 jnl_start = 0; 2748 jnl_size = 0; 2749 } else { 2750 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; 2751 jnl_size = (off_t)hfsmp->jnl_size; 2752 } 2753 2754 jip->jstart = jnl_start; 2755 jip->jsize = jnl_size; 2756 break; 2757 2758 case HFS_SET_ALWAYS_ZEROFILL: { 2759 struct cnode *cp = VTOC(vp); 2760 2761 if (*(int *)ap->a_data) { 2762 cp->c_flag |= C_ALWAYS_ZEROFILL; 2763 } else { 2764 cp->c_flag &= ~C_ALWAYS_ZEROFILL; 2765 } 2766 break; 2767 } 2768 2769 case HFS_DISABLE_METAZONE: { 2770 /* Only root can disable metadata zone */ 2771 if (!kauth_cred_issuser(kauth_cred_get())) { 2772 return EACCES; 2773 } 2774 if (hfsmp->hfs_flags & HFS_READ_ONLY) { 2775 return (EROFS); 2776 } 2777 2778 /* Disable metadata zone now */ 2779 (void) hfs_metadatazone_init(hfsmp, true); 2780 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN); 2781 break; 2782 } 2783 2784 default: 2785 return (ENOTTY); 2786 } 2787 2788 return 0; 2789} 2790 2791/* 2792 * select 2793 */ 2794int 2795hfs_vnop_select(__unused struct vnop_select_args *ap) 2796/* 2797 struct vnop_select_args { 2798 vnode_t a_vp; 2799 int a_which; 2800 int a_fflags; 2801 void *a_wql; 2802 vfs_context_t a_context; 2803 }; 2804*/ 2805{ 2806 /* 2807 * We should really check to see if I/O is possible. 2808 */ 2809 return (1); 2810} 2811 2812/* 2813 * Converts a logical block number to a physical block, and optionally returns 2814 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize. 2815 * The physical block number is based on the device block size, currently its 512. 2816 * The block run is returned in logical blocks, and is the REMAINING amount of blocks 2817 */ 2818int 2819hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp) 2820{ 2821 struct filefork *fp = VTOF(vp); 2822 struct hfsmount *hfsmp = VTOHFS(vp); 2823 int retval = E_NONE; 2824 u_int32_t logBlockSize; 2825 size_t bytesContAvail = 0; 2826 off_t blockposition; 2827 int lockExtBtree; 2828 int lockflags = 0; 2829 2830 /* 2831 * Check for underlying vnode requests and ensure that logical 2832 * to physical mapping is requested. 2833 */ 2834 if (vpp != NULL) 2835 *vpp = hfsmp->hfs_devvp; 2836 if (bnp == NULL) 2837 return (0); 2838 2839 logBlockSize = GetLogicalBlockSize(vp); 2840 blockposition = (off_t)bn * logBlockSize; 2841 2842 lockExtBtree = overflow_extents(fp); 2843 2844 if (lockExtBtree) 2845 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); 2846 2847 retval = MacToVFSError( 2848 MapFileBlockC (HFSTOVCB(hfsmp), 2849 (FCB*)fp, 2850 MAXPHYSIO, 2851 blockposition, 2852 bnp, 2853 &bytesContAvail)); 2854 2855 if (lockExtBtree) 2856 hfs_systemfile_unlock(hfsmp, lockflags); 2857 2858 if (retval == E_NONE) { 2859 /* Figure out how many read ahead blocks there are */ 2860 if (runp != NULL) { 2861 if (can_cluster(logBlockSize)) { 2862 /* Make sure this result never goes negative: */ 2863 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1; 2864 } else { 2865 *runp = 0; 2866 } 2867 } 2868 } 2869 return (retval); 2870} 2871 2872/* 2873 * Convert logical block number to file offset. 2874 */ 2875int 2876hfs_vnop_blktooff(struct vnop_blktooff_args *ap) 2877/* 2878 struct vnop_blktooff_args { 2879 vnode_t a_vp; 2880 daddr64_t a_lblkno; 2881 off_t *a_offset; 2882 }; 2883*/ 2884{ 2885 if (ap->a_vp == NULL) 2886 return (EINVAL); 2887 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp); 2888 2889 return(0); 2890} 2891 2892/* 2893 * Convert file offset to logical block number. 2894 */ 2895int 2896hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap) 2897/* 2898 struct vnop_offtoblk_args { 2899 vnode_t a_vp; 2900 off_t a_offset; 2901 daddr64_t *a_lblkno; 2902 }; 2903*/ 2904{ 2905 if (ap->a_vp == NULL) 2906 return (EINVAL); 2907 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp)); 2908 2909 return(0); 2910} 2911 2912/* 2913 * Map file offset to physical block number. 2914 * 2915 * If this function is called for write operation, and if the file 2916 * had virtual blocks allocated (delayed allocation), real blocks 2917 * are allocated by calling ExtendFileC(). 2918 * 2919 * If this function is called for read operation, and if the file 2920 * had virtual blocks allocated (delayed allocation), no change 2921 * to the size of file is done, and if required, rangelist is 2922 * searched for mapping. 2923 * 2924 * System file cnodes are expected to be locked (shared or exclusive). 2925 */ 2926int 2927hfs_vnop_blockmap(struct vnop_blockmap_args *ap) 2928/* 2929 struct vnop_blockmap_args { 2930 vnode_t a_vp; 2931 off_t a_foffset; 2932 size_t a_size; 2933 daddr64_t *a_bpn; 2934 size_t *a_run; 2935 void *a_poff; 2936 int a_flags; 2937 vfs_context_t a_context; 2938 }; 2939*/ 2940{ 2941 struct vnode *vp = ap->a_vp; 2942 struct cnode *cp; 2943 struct filefork *fp; 2944 struct hfsmount *hfsmp; 2945 size_t bytesContAvail = 0; 2946 int retval = E_NONE; 2947 int syslocks = 0; 2948 int lockflags = 0; 2949 struct rl_entry *invalid_range; 2950 enum rl_overlaptype overlaptype; 2951 int started_tr = 0; 2952 int tooklock = 0; 2953 2954#if HFS_COMPRESSION 2955 if (VNODE_IS_RSRC(vp)) { 2956 /* allow blockmaps to the resource fork */ 2957 } else { 2958 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ 2959 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); 2960 switch(state) { 2961 case FILE_IS_COMPRESSED: 2962 return ENOTSUP; 2963 case FILE_IS_CONVERTING: 2964 /* if FILE_IS_CONVERTING, we allow blockmap */ 2965 break; 2966 default: 2967 printf("invalid state %d for compressed file\n", state); 2968 /* fall through */ 2969 } 2970 } 2971 } 2972#endif /* HFS_COMPRESSION */ 2973 2974 /* Do not allow blockmap operation on a directory */ 2975 if (vnode_isdir(vp)) { 2976 return (ENOTSUP); 2977 } 2978 2979 /* 2980 * Check for underlying vnode requests and ensure that logical 2981 * to physical mapping is requested. 2982 */ 2983 if (ap->a_bpn == NULL) 2984 return (0); 2985 2986 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) { 2987 if (VTOC(vp)->c_lockowner != current_thread()) { 2988 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 2989 tooklock = 1; 2990 } 2991 } 2992 hfsmp = VTOHFS(vp); 2993 cp = VTOC(vp); 2994 fp = VTOF(vp); 2995 2996retry: 2997 /* Check virtual blocks only when performing write operation */ 2998 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 2999 if (hfs_start_transaction(hfsmp) != 0) { 3000 retval = EINVAL; 3001 goto exit; 3002 } else { 3003 started_tr = 1; 3004 } 3005 syslocks = SFL_EXTENTS | SFL_BITMAP; 3006 3007 } else if (overflow_extents(fp)) { 3008 syslocks = SFL_EXTENTS; 3009 } 3010 3011 if (syslocks) 3012 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK); 3013 3014 /* 3015 * Check for any delayed allocations. 3016 */ 3017 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) { 3018 int64_t actbytes; 3019 u_int32_t loanedBlocks; 3020 3021 // 3022 // Make sure we have a transaction. It's possible 3023 // that we came in and fp->ff_unallocblocks was zero 3024 // but during the time we blocked acquiring the extents 3025 // btree, ff_unallocblocks became non-zero and so we 3026 // will need to start a transaction. 3027 // 3028 if (started_tr == 0) { 3029 if (syslocks) { 3030 hfs_systemfile_unlock(hfsmp, lockflags); 3031 syslocks = 0; 3032 } 3033 goto retry; 3034 } 3035 3036 /* 3037 * Note: ExtendFileC will Release any blocks on loan and 3038 * aquire real blocks. So we ask to extend by zero bytes 3039 * since ExtendFileC will account for the virtual blocks. 3040 */ 3041 3042 loanedBlocks = fp->ff_unallocblocks; 3043 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0, 3044 kEFAllMask | kEFNoClumpMask, &actbytes); 3045 3046 if (retval) { 3047 fp->ff_unallocblocks = loanedBlocks; 3048 cp->c_blocks += loanedBlocks; 3049 fp->ff_blocks += loanedBlocks; 3050 3051 hfs_lock_mount (hfsmp); 3052 hfsmp->loanedBlocks += loanedBlocks; 3053 hfs_unlock_mount (hfsmp); 3054 3055 hfs_systemfile_unlock(hfsmp, lockflags); 3056 cp->c_flag |= C_MODIFIED; 3057 if (started_tr) { 3058 (void) hfs_update(vp, TRUE); 3059 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3060 3061 hfs_end_transaction(hfsmp); 3062 started_tr = 0; 3063 } 3064 goto exit; 3065 } 3066 } 3067 3068 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset, 3069 ap->a_bpn, &bytesContAvail); 3070 if (syslocks) { 3071 hfs_systemfile_unlock(hfsmp, lockflags); 3072 syslocks = 0; 3073 } 3074 3075 if (started_tr) { 3076 (void) hfs_update(vp, TRUE); 3077 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3078 hfs_end_transaction(hfsmp); 3079 started_tr = 0; 3080 } 3081 if (retval) { 3082 /* On write, always return error because virtual blocks, if any, 3083 * should have been allocated in ExtendFileC(). We do not 3084 * allocate virtual blocks on read, therefore return error 3085 * only if no virtual blocks are allocated. Otherwise we search 3086 * rangelist for zero-fills 3087 */ 3088 if ((MacToVFSError(retval) != ERANGE) || 3089 (ap->a_flags & VNODE_WRITE) || 3090 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) { 3091 goto exit; 3092 } 3093 3094 /* Validate if the start offset is within logical file size */ 3095 if (ap->a_foffset >= fp->ff_size) { 3096 goto exit; 3097 } 3098 3099 /* 3100 * At this point, we have encountered a failure during 3101 * MapFileBlockC that resulted in ERANGE, and we are not servicing 3102 * a write, and there are borrowed blocks. 3103 * 3104 * However, the cluster layer will not call blockmap for 3105 * blocks that are borrowed and in-cache. We have to assume that 3106 * because we observed ERANGE being emitted from MapFileBlockC, this 3107 * extent range is not valid on-disk. So we treat this as a 3108 * mapping that needs to be zero-filled prior to reading. 3109 * 3110 * Note that under certain circumstances (such as non-contiguous 3111 * userland VM mappings in the calling process), cluster_io 3112 * may be forced to split a large I/O driven by hfs_vnop_write 3113 * into multiple sub-I/Os that necessitate a RMW cycle. If this is 3114 * the case here, then we have already removed the invalid range list 3115 * mapping prior to getting to this blockmap call, so we should not 3116 * search the invalid rangelist for this byte range. 3117 */ 3118 3119 bytesContAvail = fp->ff_size - ap->a_foffset; 3120 /* 3121 * Clip the contiguous available bytes to, at most, the allowable 3122 * maximum or the amount requested. 3123 */ 3124 3125 if (bytesContAvail > ap->a_size) { 3126 bytesContAvail = ap->a_size; 3127 } 3128 3129 *ap->a_bpn = (daddr64_t) -1; 3130 retval = 0; 3131 3132 goto exit; 3133 } 3134 3135 /* MapFileC() found a valid extent in the filefork. Search the 3136 * mapping information further for invalid file ranges 3137 */ 3138 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, 3139 ap->a_foffset + (off_t)bytesContAvail - 1, 3140 &invalid_range); 3141 if (overlaptype != RL_NOOVERLAP) { 3142 switch(overlaptype) { 3143 case RL_MATCHINGOVERLAP: 3144 case RL_OVERLAPCONTAINSRANGE: 3145 case RL_OVERLAPSTARTSBEFORE: 3146 /* There's no valid block for this byte offset */ 3147 *ap->a_bpn = (daddr64_t)-1; 3148 /* There's no point limiting the amount to be returned 3149 * if the invalid range that was hit extends all the way 3150 * to the EOF (i.e. there's no valid bytes between the 3151 * end of this range and the file's EOF): 3152 */ 3153 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 3154 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 3155 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 3156 } 3157 break; 3158 3159 case RL_OVERLAPISCONTAINED: 3160 case RL_OVERLAPENDSAFTER: 3161 /* The range of interest hits an invalid block before the end: */ 3162 if (invalid_range->rl_start == ap->a_foffset) { 3163 /* There's actually no valid information to be had starting here: */ 3164 *ap->a_bpn = (daddr64_t)-1; 3165 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && 3166 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { 3167 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; 3168 } 3169 } else { 3170 bytesContAvail = invalid_range->rl_start - ap->a_foffset; 3171 } 3172 break; 3173 3174 case RL_NOOVERLAP: 3175 break; 3176 } /* end switch */ 3177 if (bytesContAvail > ap->a_size) 3178 bytesContAvail = ap->a_size; 3179 } 3180 3181exit: 3182 if (retval == 0) { 3183 if (ap->a_run) 3184 *ap->a_run = bytesContAvail; 3185 3186 if (ap->a_poff) 3187 *(int *)ap->a_poff = 0; 3188 } 3189 3190 if (tooklock) 3191 hfs_unlock(cp); 3192 3193 return (MacToVFSError(retval)); 3194} 3195 3196/* 3197 * prepare and issue the I/O 3198 * buf_strategy knows how to deal 3199 * with requests that require 3200 * fragmented I/Os 3201 */ 3202int 3203hfs_vnop_strategy(struct vnop_strategy_args *ap) 3204{ 3205 buf_t bp = ap->a_bp; 3206 vnode_t vp = buf_vnode(bp); 3207 int error = 0; 3208 3209 /* Mark buffer as containing static data if cnode flag set */ 3210 if (VTOC(vp)->c_flag & C_SSD_STATIC) { 3211 buf_markstatic(bp); 3212 } 3213 3214 /* Mark buffer as containing static data if cnode flag set */ 3215 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) { 3216 bufattr_markgreedymode((bufattr_t)(&bp->b_attr)); 3217 } 3218 3219#if CONFIG_PROTECT 3220 cnode_t *cp = NULL; 3221 3222 if ((cp = cp_get_protected_cnode(vp)) != NULL) { 3223 /* 3224 * We rely upon the truncate lock to protect the 3225 * CP cache key from getting tossed prior to our IO finishing here. 3226 * Nearly all cluster io calls to manipulate file payload from HFS 3227 * take the truncate lock before calling into the cluster 3228 * layer to ensure the file size does not change, or that they 3229 * have exclusive right to change the EOF of the file. 3230 * That same guarantee protects us here since the code that 3231 * deals with CP lock events must now take the truncate lock 3232 * before doing anything. 3233 * 3234 * There is 1 exception here: 3235 * 1) One exception should be the VM swapfile IO, because HFS will 3236 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the 3237 * swapfile code only without holding the truncate lock. This is because 3238 * individual swapfiles are maintained at fixed-length sizes by the VM code. 3239 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to 3240 * create our own UPL and thus take the truncate lock before calling 3241 * into the cluster layer. In that case, however, we are not concerned 3242 * with the CP blob being wiped out in the middle of the IO 3243 * because there isn't anything to toss; the VM swapfile key stays 3244 * in-core as long as the file is open. 3245 * 3246 * NB: 3247 * For filesystem resize, we may not have access to the underlying 3248 * file's cache key for whatever reason (device may be locked). However, 3249 * we do not need it since we are going to use the temporary HFS-wide resize key 3250 * which is generated once we start relocating file content. If this file's I/O 3251 * should be done using the resize key, it will have been supplied already, so 3252 * do not attach the file's cp blob to the buffer. 3253 */ 3254 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) { 3255 buf_setcpaddr(bp, cp->c_cpentry); 3256 } 3257 } 3258#endif /* CONFIG_PROTECT */ 3259 3260 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap); 3261 3262 return error; 3263} 3264 3265static int 3266hfs_minorupdate(struct vnode *vp) { 3267 struct cnode *cp = VTOC(vp); 3268 cp->c_flag &= ~C_MODIFIED; 3269 cp->c_touch_acctime = 0; 3270 cp->c_touch_chgtime = 0; 3271 cp->c_touch_modtime = 0; 3272 3273 return 0; 3274} 3275 3276int 3277do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context) 3278{ 3279 register struct cnode *cp = VTOC(vp); 3280 struct filefork *fp = VTOF(vp); 3281 struct proc *p = vfs_context_proc(context);; 3282 kauth_cred_t cred = vfs_context_ucred(context); 3283 int retval; 3284 off_t bytesToAdd; 3285 off_t actualBytesAdded; 3286 off_t filebytes; 3287 u_int32_t fileblocks; 3288 int blksize; 3289 struct hfsmount *hfsmp; 3290 int lockflags; 3291 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); 3292 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); 3293 3294 blksize = VTOVCB(vp)->blockSize; 3295 fileblocks = fp->ff_blocks; 3296 filebytes = (off_t)fileblocks * (off_t)blksize; 3297 3298 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START, 3299 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 3300 3301 if (length < 0) 3302 return (EINVAL); 3303 3304 /* This should only happen with a corrupt filesystem */ 3305 if ((off_t)fp->ff_size < 0) 3306 return (EINVAL); 3307 3308 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) 3309 return (EFBIG); 3310 3311 hfsmp = VTOHFS(vp); 3312 3313 retval = E_NONE; 3314 3315 /* Files that are changing size are not hot file candidates. */ 3316 if (hfsmp->hfc_stage == HFC_RECORDING) { 3317 fp->ff_bytesread = 0; 3318 } 3319 3320 /* 3321 * We cannot just check if fp->ff_size == length (as an optimization) 3322 * since there may be extra physical blocks that also need truncation. 3323 */ 3324#if QUOTA 3325 if ((retval = hfs_getinoquota(cp))) 3326 return(retval); 3327#endif /* QUOTA */ 3328 3329 /* 3330 * Lengthen the size of the file. We must ensure that the 3331 * last byte of the file is allocated. Since the smallest 3332 * value of ff_size is 0, length will be at least 1. 3333 */ 3334 if (length > (off_t)fp->ff_size) { 3335#if QUOTA 3336 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)), 3337 cred, 0); 3338 if (retval) 3339 goto Err_Exit; 3340#endif /* QUOTA */ 3341 /* 3342 * If we don't have enough physical space then 3343 * we need to extend the physical size. 3344 */ 3345 if (length > filebytes) { 3346 int eflags; 3347 u_int32_t blockHint = 0; 3348 3349 /* All or nothing and don't round up to clumpsize. */ 3350 eflags = kEFAllMask | kEFNoClumpMask; 3351 3352 if (cred && suser(cred, NULL) != 0) 3353 eflags |= kEFReserveMask; /* keep a reserve */ 3354 3355 /* 3356 * Allocate Journal and Quota files in metadata zone. 3357 */ 3358 if (filebytes == 0 && 3359 hfsmp->hfs_flags & HFS_METADATA_ZONE && 3360 hfs_virtualmetafile(cp)) { 3361 eflags |= kEFMetadataMask; 3362 blockHint = hfsmp->hfs_metazone_start; 3363 } 3364 if (hfs_start_transaction(hfsmp) != 0) { 3365 retval = EINVAL; 3366 goto Err_Exit; 3367 } 3368 3369 /* Protect extents b-tree and allocation bitmap */ 3370 lockflags = SFL_BITMAP; 3371 if (overflow_extents(fp)) 3372 lockflags |= SFL_EXTENTS; 3373 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3374 3375 while ((length > filebytes) && (retval == E_NONE)) { 3376 bytesToAdd = length - filebytes; 3377 retval = MacToVFSError(ExtendFileC(VTOVCB(vp), 3378 (FCB*)fp, 3379 bytesToAdd, 3380 blockHint, 3381 eflags, 3382 &actualBytesAdded)); 3383 3384 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 3385 if (actualBytesAdded == 0 && retval == E_NONE) { 3386 if (length > filebytes) 3387 length = filebytes; 3388 break; 3389 } 3390 } /* endwhile */ 3391 3392 hfs_systemfile_unlock(hfsmp, lockflags); 3393 3394 if (hfsmp->jnl) { 3395 if (skipupdate) { 3396 (void) hfs_minorupdate(vp); 3397 } 3398 else { 3399 (void) hfs_update(vp, TRUE); 3400 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3401 } 3402 } 3403 3404 hfs_end_transaction(hfsmp); 3405 3406 if (retval) 3407 goto Err_Exit; 3408 3409 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, 3410 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); 3411 } 3412 3413 if (!(flags & IO_NOZEROFILL)) { 3414 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { 3415 struct rl_entry *invalid_range; 3416 off_t zero_limit; 3417 3418 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; 3419 if (length < zero_limit) zero_limit = length; 3420 3421 if (length > (off_t)fp->ff_size) { 3422 struct timeval tv; 3423 3424 /* Extending the file: time to fill out the current last page w. zeroes? */ 3425 if ((fp->ff_size & PAGE_MASK_64) && 3426 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64, 3427 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) { 3428 3429 /* There's some valid data at the start of the (current) last page 3430 of the file, so zero out the remainder of that page to ensure the 3431 entire page contains valid data. Since there is no invalid range 3432 possible past the (current) eof, there's no need to remove anything 3433 from the invalid range list before calling cluster_write(): */ 3434 hfs_unlock(cp); 3435 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, 3436 fp->ff_size, (off_t)0, 3437 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); 3438 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 3439 if (retval) goto Err_Exit; 3440 3441 /* Merely invalidate the remaining area, if necessary: */ 3442 if (length > zero_limit) { 3443 microuptime(&tv); 3444 rl_add(zero_limit, length - 1, &fp->ff_invalidranges); 3445 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3446 } 3447 } else { 3448 /* The page containing the (current) eof is invalid: just add the 3449 remainder of the page to the invalid list, along with the area 3450 being newly allocated: 3451 */ 3452 microuptime(&tv); 3453 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges); 3454 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; 3455 }; 3456 } 3457 } else { 3458 panic("hfs_truncate: invoked on non-UBC object?!"); 3459 }; 3460 } 3461 if (suppress_times == 0) { 3462 cp->c_touch_modtime = TRUE; 3463 } 3464 fp->ff_size = length; 3465 3466 } else { /* Shorten the size of the file */ 3467 3468 if ((off_t)fp->ff_size > length) { 3469 /* Any space previously marked as invalid is now irrelevant: */ 3470 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); 3471 } 3472 3473 /* 3474 * Account for any unmapped blocks. Note that the new 3475 * file length can still end up with unmapped blocks. 3476 */ 3477 if (fp->ff_unallocblocks > 0) { 3478 u_int32_t finalblks; 3479 u_int32_t loanedBlocks; 3480 3481 hfs_lock_mount(hfsmp); 3482 loanedBlocks = fp->ff_unallocblocks; 3483 cp->c_blocks -= loanedBlocks; 3484 fp->ff_blocks -= loanedBlocks; 3485 fp->ff_unallocblocks = 0; 3486 3487 hfsmp->loanedBlocks -= loanedBlocks; 3488 3489 finalblks = (length + blksize - 1) / blksize; 3490 if (finalblks > fp->ff_blocks) { 3491 /* calculate required unmapped blocks */ 3492 loanedBlocks = finalblks - fp->ff_blocks; 3493 hfsmp->loanedBlocks += loanedBlocks; 3494 3495 fp->ff_unallocblocks = loanedBlocks; 3496 cp->c_blocks += loanedBlocks; 3497 fp->ff_blocks += loanedBlocks; 3498 } 3499 hfs_unlock_mount (hfsmp); 3500 } 3501 3502 /* 3503 * For a TBE process the deallocation of the file blocks is 3504 * delayed until the file is closed. And hfs_close calls 3505 * truncate with the IO_NDELAY flag set. So when IO_NDELAY 3506 * isn't set, we make sure this isn't a TBE process. 3507 */ 3508 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) { 3509#if QUOTA 3510 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); 3511#endif /* QUOTA */ 3512 if (hfs_start_transaction(hfsmp) != 0) { 3513 retval = EINVAL; 3514 goto Err_Exit; 3515 } 3516 3517 if (fp->ff_unallocblocks == 0) { 3518 /* Protect extents b-tree and allocation bitmap */ 3519 lockflags = SFL_BITMAP; 3520 if (overflow_extents(fp)) 3521 lockflags |= SFL_EXTENTS; 3522 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3523 3524 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 3525 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); 3526 3527 hfs_systemfile_unlock(hfsmp, lockflags); 3528 } 3529 if (hfsmp->jnl) { 3530 if (retval == 0) { 3531 fp->ff_size = length; 3532 } 3533 if (skipupdate) { 3534 (void) hfs_minorupdate(vp); 3535 } 3536 else { 3537 (void) hfs_update(vp, TRUE); 3538 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3539 } 3540 } 3541 hfs_end_transaction(hfsmp); 3542 3543 filebytes = (off_t)fp->ff_blocks * (off_t)blksize; 3544 if (retval) 3545 goto Err_Exit; 3546#if QUOTA 3547 /* These are bytesreleased */ 3548 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); 3549#endif /* QUOTA */ 3550 } 3551 /* 3552 * Only set update flag if the logical length changes & we aren't 3553 * suppressing modtime updates. 3554 */ 3555 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) { 3556 cp->c_touch_modtime = TRUE; 3557 } 3558 fp->ff_size = length; 3559 } 3560 if (cp->c_mode & (S_ISUID | S_ISGID)) { 3561 if (!vfs_context_issuser(context)) { 3562 cp->c_mode &= ~(S_ISUID | S_ISGID); 3563 skipupdate = 0; 3564 } 3565 } 3566 if (skipupdate) { 3567 retval = hfs_minorupdate(vp); 3568 } 3569 else { 3570 cp->c_touch_chgtime = TRUE; /* status changed */ 3571 if (suppress_times == 0) { 3572 cp->c_touch_modtime = TRUE; /* file data was modified */ 3573 3574 /* 3575 * If we are not suppressing the modtime update, then 3576 * update the gen count as well. 3577 */ 3578 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) { 3579 hfs_incr_gencount(cp); 3580 } 3581 } 3582 3583 retval = hfs_update(vp, MNT_WAIT); 3584 } 3585 if (retval) { 3586 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, 3587 -1, -1, -1, retval, 0); 3588 } 3589 3590Err_Exit: 3591 3592 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END, 3593 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0); 3594 3595 return (retval); 3596} 3597 3598/* 3599 * Preparation which must be done prior to deleting the catalog record 3600 * of a file or directory. In order to make the on-disk as safe as possible, 3601 * we remove the catalog entry before releasing the bitmap blocks and the 3602 * overflow extent records. However, some work must be done prior to deleting 3603 * the catalog record. 3604 * 3605 * When calling this function, the cnode must exist both in memory and on-disk. 3606 * If there are both resource fork and data fork vnodes, this function should 3607 * be called on both. 3608 */ 3609 3610int 3611hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) { 3612 3613 struct filefork *fp = VTOF(vp); 3614 struct cnode *cp = VTOC(vp); 3615#if QUOTA 3616 int retval = 0; 3617#endif /* QUOTA */ 3618 3619 /* Cannot truncate an HFS directory! */ 3620 if (vnode_isdir(vp)) { 3621 return (EISDIR); 3622 } 3623 3624 /* 3625 * See the comment below in hfs_truncate for why we need to call 3626 * setsize here. Essentially we want to avoid pending IO if we 3627 * already know that the blocks are going to be released here. 3628 * This function is only called when totally removing all storage for a file, so 3629 * we can take a shortcut and immediately setsize (0); 3630 */ 3631 ubc_setsize(vp, 0); 3632 3633 /* This should only happen with a corrupt filesystem */ 3634 if ((off_t)fp->ff_size < 0) 3635 return (EINVAL); 3636 3637 /* 3638 * We cannot just check if fp->ff_size == length (as an optimization) 3639 * since there may be extra physical blocks that also need truncation. 3640 */ 3641#if QUOTA 3642 if ((retval = hfs_getinoquota(cp))) { 3643 return(retval); 3644 } 3645#endif /* QUOTA */ 3646 3647 /* Wipe out any invalid ranges which have yet to be backed by disk */ 3648 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges); 3649 3650 /* 3651 * Account for any unmapped blocks. Since we're deleting the 3652 * entire file, we don't have to worry about just shrinking 3653 * to a smaller number of borrowed blocks. 3654 */ 3655 if (fp->ff_unallocblocks > 0) { 3656 u_int32_t loanedBlocks; 3657 3658 hfs_lock_mount (hfsmp); 3659 loanedBlocks = fp->ff_unallocblocks; 3660 cp->c_blocks -= loanedBlocks; 3661 fp->ff_blocks -= loanedBlocks; 3662 fp->ff_unallocblocks = 0; 3663 3664 hfsmp->loanedBlocks -= loanedBlocks; 3665 3666 hfs_unlock_mount (hfsmp); 3667 } 3668 3669 return 0; 3670} 3671 3672 3673/* 3674 * Special wrapper around calling TruncateFileC. This function is useable 3675 * even when the catalog record does not exist any longer, making it ideal 3676 * for use when deleting a file. The simplification here is that we know 3677 * that we are releasing all blocks. 3678 * 3679 * Note that this function may be called when there is no vnode backing 3680 * the file fork in question. We may call this from hfs_vnop_inactive 3681 * to clear out resource fork data (and may not want to clear out the data 3682 * fork yet). As a result, we pointer-check both sets of inputs before 3683 * doing anything with them. 3684 * 3685 * The caller is responsible for saving off a copy of the filefork(s) 3686 * embedded within the cnode prior to calling this function. The pointers 3687 * supplied as arguments must be valid even if the cnode is no longer valid. 3688 */ 3689 3690int 3691hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 3692 struct filefork *rsrcfork, u_int32_t fileid) { 3693 3694 off_t filebytes; 3695 u_int32_t fileblocks; 3696 int blksize = 0; 3697 int error = 0; 3698 int lockflags; 3699 3700 blksize = hfsmp->blockSize; 3701 3702 /* Data Fork */ 3703 if ((datafork != NULL) && (datafork->ff_blocks > 0)) { 3704 fileblocks = datafork->ff_blocks; 3705 filebytes = (off_t)fileblocks * (off_t)blksize; 3706 3707 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3708 3709 while (filebytes > 0) { 3710 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) { 3711 filebytes -= HFS_BIGFILE_SIZE; 3712 } else { 3713 filebytes = 0; 3714 } 3715 3716 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3717 if (hfs_start_transaction(hfsmp) != 0) { 3718 error = EINVAL; 3719 break; 3720 } 3721 3722 if (datafork->ff_unallocblocks == 0) { 3723 /* Protect extents b-tree and allocation bitmap */ 3724 lockflags = SFL_BITMAP; 3725 if (overflow_extents(datafork)) 3726 lockflags |= SFL_EXTENTS; 3727 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3728 3729 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false)); 3730 3731 hfs_systemfile_unlock(hfsmp, lockflags); 3732 } 3733 if (error == 0) { 3734 datafork->ff_size = filebytes; 3735 } 3736 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3737 3738 /* Finish the transaction and start over if necessary */ 3739 hfs_end_transaction(hfsmp); 3740 3741 if (error) { 3742 break; 3743 } 3744 } 3745 } 3746 3747 /* Resource fork */ 3748 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) { 3749 fileblocks = rsrcfork->ff_blocks; 3750 filebytes = (off_t)fileblocks * (off_t)blksize; 3751 3752 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ 3753 3754 while (filebytes > 0) { 3755 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) { 3756 filebytes -= HFS_BIGFILE_SIZE; 3757 } else { 3758 filebytes = 0; 3759 } 3760 3761 /* Start a transaction, and wipe out as many blocks as we can in this iteration */ 3762 if (hfs_start_transaction(hfsmp) != 0) { 3763 error = EINVAL; 3764 break; 3765 } 3766 3767 if (rsrcfork->ff_unallocblocks == 0) { 3768 /* Protect extents b-tree and allocation bitmap */ 3769 lockflags = SFL_BITMAP; 3770 if (overflow_extents(rsrcfork)) 3771 lockflags |= SFL_EXTENTS; 3772 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 3773 3774 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false)); 3775 3776 hfs_systemfile_unlock(hfsmp, lockflags); 3777 } 3778 if (error == 0) { 3779 rsrcfork->ff_size = filebytes; 3780 } 3781 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 3782 3783 /* Finish the transaction and start over if necessary */ 3784 hfs_end_transaction(hfsmp); 3785 3786 if (error) { 3787 break; 3788 } 3789 } 3790 } 3791 3792 return error; 3793} 3794 3795 3796/* 3797 * Truncate a cnode to at most length size, freeing (or adding) the 3798 * disk blocks. 3799 */ 3800int 3801hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, 3802 int truncateflags, vfs_context_t context) 3803{ 3804 struct filefork *fp = VTOF(vp); 3805 off_t filebytes; 3806 u_int32_t fileblocks; 3807 int blksize, error = 0; 3808 struct cnode *cp = VTOC(vp); 3809 3810 /* Cannot truncate an HFS directory! */ 3811 if (vnode_isdir(vp)) { 3812 return (EISDIR); 3813 } 3814 /* A swap file cannot change size. */ 3815 if (vnode_isswap(vp) && (length != 0)) { 3816 return (EPERM); 3817 } 3818 3819 blksize = VTOVCB(vp)->blockSize; 3820 fileblocks = fp->ff_blocks; 3821 filebytes = (off_t)fileblocks * (off_t)blksize; 3822 3823 // 3824 // Have to do this here so that we don't wind up with 3825 // i/o pending for blocks that are about to be released 3826 // if we truncate the file. 3827 // 3828 // If skipsetsize is set, then the caller is responsible 3829 // for the ubc_setsize. 3830 // 3831 // Even if skipsetsize is set, if the length is zero we 3832 // want to call ubc_setsize() because as of SnowLeopard 3833 // it will no longer cause any page-ins and it will drop 3834 // any dirty pages so that we don't do any i/o that we 3835 // don't have to. This also prevents a race where i/o 3836 // for truncated blocks may overwrite later data if the 3837 // blocks get reallocated to a different file. 3838 // 3839 if (!skipsetsize || length == 0) 3840 ubc_setsize(vp, length); 3841 3842 // have to loop truncating or growing files that are 3843 // really big because otherwise transactions can get 3844 // enormous and consume too many kernel resources. 3845 3846 if (length < filebytes) { 3847 while (filebytes > length) { 3848 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { 3849 filebytes -= HFS_BIGFILE_SIZE; 3850 } else { 3851 filebytes = length; 3852 } 3853 cp->c_flag |= C_FORCEUPDATE; 3854 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); 3855 if (error) 3856 break; 3857 } 3858 } else if (length > filebytes) { 3859 while (filebytes < length) { 3860 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { 3861 filebytes += HFS_BIGFILE_SIZE; 3862 } else { 3863 filebytes = length; 3864 } 3865 cp->c_flag |= C_FORCEUPDATE; 3866 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); 3867 if (error) 3868 break; 3869 } 3870 } else /* Same logical size */ { 3871 3872 error = do_hfs_truncate(vp, length, flags, truncateflags, context); 3873 } 3874 /* Files that are changing size are not hot file candidates. */ 3875 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 3876 fp->ff_bytesread = 0; 3877 } 3878 3879 return (error); 3880} 3881 3882 3883 3884/* 3885 * Preallocate file storage space. 3886 */ 3887int 3888hfs_vnop_allocate(struct vnop_allocate_args /* { 3889 vnode_t a_vp; 3890 off_t a_length; 3891 u_int32_t a_flags; 3892 off_t *a_bytesallocated; 3893 off_t a_offset; 3894 vfs_context_t a_context; 3895 } */ *ap) 3896{ 3897 struct vnode *vp = ap->a_vp; 3898 struct cnode *cp; 3899 struct filefork *fp; 3900 ExtendedVCB *vcb; 3901 off_t length = ap->a_length; 3902 off_t startingPEOF; 3903 off_t moreBytesRequested; 3904 off_t actualBytesAdded; 3905 off_t filebytes; 3906 u_int32_t fileblocks; 3907 int retval, retval2; 3908 u_int32_t blockHint; 3909 u_int32_t extendFlags; /* For call to ExtendFileC */ 3910 struct hfsmount *hfsmp; 3911 kauth_cred_t cred = vfs_context_ucred(ap->a_context); 3912 int lockflags; 3913 time_t orig_ctime; 3914 3915 *(ap->a_bytesallocated) = 0; 3916 3917 if (!vnode_isreg(vp)) 3918 return (EISDIR); 3919 if (length < (off_t)0) 3920 return (EINVAL); 3921 3922 cp = VTOC(vp); 3923 3924 orig_ctime = VTOC(vp)->c_ctime; 3925 3926 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); 3927 3928 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 3929 3930 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 3931 goto Err_Exit; 3932 } 3933 3934 fp = VTOF(vp); 3935 hfsmp = VTOHFS(vp); 3936 vcb = VTOVCB(vp); 3937 3938 fileblocks = fp->ff_blocks; 3939 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; 3940 3941 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) { 3942 retval = EINVAL; 3943 goto Err_Exit; 3944 } 3945 3946 /* Fill in the flags word for the call to Extend the file */ 3947 3948 extendFlags = kEFNoClumpMask; 3949 if (ap->a_flags & ALLOCATECONTIG) 3950 extendFlags |= kEFContigMask; 3951 if (ap->a_flags & ALLOCATEALL) 3952 extendFlags |= kEFAllMask; 3953 if (cred && suser(cred, NULL) != 0) 3954 extendFlags |= kEFReserveMask; 3955 if (hfs_virtualmetafile(cp)) 3956 extendFlags |= kEFMetadataMask; 3957 3958 retval = E_NONE; 3959 blockHint = 0; 3960 startingPEOF = filebytes; 3961 3962 if (ap->a_flags & ALLOCATEFROMPEOF) 3963 length += filebytes; 3964 else if (ap->a_flags & ALLOCATEFROMVOL) 3965 blockHint = ap->a_offset / VTOVCB(vp)->blockSize; 3966 3967 /* If no changes are necesary, then we're done */ 3968 if (filebytes == length) 3969 goto Std_Exit; 3970 3971 /* 3972 * Lengthen the size of the file. We must ensure that the 3973 * last byte of the file is allocated. Since the smallest 3974 * value of filebytes is 0, length will be at least 1. 3975 */ 3976 if (length > filebytes) { 3977 off_t total_bytes_added = 0, orig_request_size; 3978 3979 orig_request_size = moreBytesRequested = length - filebytes; 3980 3981#if QUOTA 3982 retval = hfs_chkdq(cp, 3983 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 3984 cred, 0); 3985 if (retval) 3986 goto Err_Exit; 3987 3988#endif /* QUOTA */ 3989 /* 3990 * Metadata zone checks. 3991 */ 3992 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { 3993 /* 3994 * Allocate Journal and Quota files in metadata zone. 3995 */ 3996 if (hfs_virtualmetafile(cp)) { 3997 blockHint = hfsmp->hfs_metazone_start; 3998 } else if ((blockHint >= hfsmp->hfs_metazone_start) && 3999 (blockHint <= hfsmp->hfs_metazone_end)) { 4000 /* 4001 * Move blockHint outside metadata zone. 4002 */ 4003 blockHint = hfsmp->hfs_metazone_end + 1; 4004 } 4005 } 4006 4007 4008 while ((length > filebytes) && (retval == E_NONE)) { 4009 off_t bytesRequested; 4010 4011 if (hfs_start_transaction(hfsmp) != 0) { 4012 retval = EINVAL; 4013 goto Err_Exit; 4014 } 4015 4016 /* Protect extents b-tree and allocation bitmap */ 4017 lockflags = SFL_BITMAP; 4018 if (overflow_extents(fp)) 4019 lockflags |= SFL_EXTENTS; 4020 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4021 4022 if (moreBytesRequested >= HFS_BIGFILE_SIZE) { 4023 bytesRequested = HFS_BIGFILE_SIZE; 4024 } else { 4025 bytesRequested = moreBytesRequested; 4026 } 4027 4028 if (extendFlags & kEFContigMask) { 4029 // if we're on a sparse device, this will force it to do a 4030 // full scan to find the space needed. 4031 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN; 4032 } 4033 4034 retval = MacToVFSError(ExtendFileC(vcb, 4035 (FCB*)fp, 4036 bytesRequested, 4037 blockHint, 4038 extendFlags, 4039 &actualBytesAdded)); 4040 4041 if (retval == E_NONE) { 4042 *(ap->a_bytesallocated) += actualBytesAdded; 4043 total_bytes_added += actualBytesAdded; 4044 moreBytesRequested -= actualBytesAdded; 4045 if (blockHint != 0) { 4046 blockHint += actualBytesAdded / vcb->blockSize; 4047 } 4048 } 4049 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 4050 4051 hfs_systemfile_unlock(hfsmp, lockflags); 4052 4053 if (hfsmp->jnl) { 4054 (void) hfs_update(vp, TRUE); 4055 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); 4056 } 4057 4058 hfs_end_transaction(hfsmp); 4059 } 4060 4061 4062 /* 4063 * if we get an error and no changes were made then exit 4064 * otherwise we must do the hfs_update to reflect the changes 4065 */ 4066 if (retval && (startingPEOF == filebytes)) 4067 goto Err_Exit; 4068 4069 /* 4070 * Adjust actualBytesAdded to be allocation block aligned, not 4071 * clump size aligned. 4072 * NOTE: So what we are reporting does not affect reality 4073 * until the file is closed, when we truncate the file to allocation 4074 * block size. 4075 */ 4076 if (total_bytes_added != 0 && orig_request_size < total_bytes_added) 4077 *(ap->a_bytesallocated) = 4078 roundup(orig_request_size, (off_t)vcb->blockSize); 4079 4080 } else { /* Shorten the size of the file */ 4081 4082 if (fp->ff_size > length) { 4083 /* 4084 * Any buffers that are past the truncation point need to be 4085 * invalidated (to maintain buffer cache consistency). 4086 */ 4087 } 4088 4089 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); 4090 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; 4091 4092 /* 4093 * if we get an error and no changes were made then exit 4094 * otherwise we must do the hfs_update to reflect the changes 4095 */ 4096 if (retval && (startingPEOF == filebytes)) goto Err_Exit; 4097#if QUOTA 4098 /* These are bytesreleased */ 4099 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0); 4100#endif /* QUOTA */ 4101 4102 if (fp->ff_size > filebytes) { 4103 fp->ff_size = filebytes; 4104 4105 hfs_unlock(cp); 4106 ubc_setsize(vp, fp->ff_size); 4107 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 4108 } 4109 } 4110 4111Std_Exit: 4112 cp->c_touch_chgtime = TRUE; 4113 cp->c_touch_modtime = TRUE; 4114 retval2 = hfs_update(vp, MNT_WAIT); 4115 4116 if (retval == 0) 4117 retval = retval2; 4118Err_Exit: 4119 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4120 hfs_unlock(cp); 4121 return (retval); 4122} 4123 4124 4125/* 4126 * Pagein for HFS filesystem 4127 */ 4128int 4129hfs_vnop_pagein(struct vnop_pagein_args *ap) 4130/* 4131 struct vnop_pagein_args { 4132 vnode_t a_vp, 4133 upl_t a_pl, 4134 vm_offset_t a_pl_offset, 4135 off_t a_f_offset, 4136 size_t a_size, 4137 int a_flags 4138 vfs_context_t a_context; 4139 }; 4140*/ 4141{ 4142 vnode_t vp; 4143 struct cnode *cp; 4144 struct filefork *fp; 4145 int error = 0; 4146 upl_t upl; 4147 upl_page_info_t *pl; 4148 off_t f_offset; 4149 int offset; 4150 int isize; 4151 int pg_index; 4152 boolean_t truncate_lock_held = FALSE; 4153 boolean_t file_converted = FALSE; 4154 kern_return_t kret; 4155 4156 vp = ap->a_vp; 4157 cp = VTOC(vp); 4158 fp = VTOF(vp); 4159 4160#if CONFIG_PROTECT 4161 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) { 4162 /* 4163 * If we errored here, then this means that one of two things occurred: 4164 * 1. there was a problem with the decryption of the key. 4165 * 2. the device is locked and we are not allowed to access this particular file. 4166 * 4167 * Either way, this means that we need to shut down this upl now. As long as 4168 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves) 4169 * then we create a upl and immediately abort it. 4170 */ 4171 if (ap->a_pl == NULL) { 4172 /* create the upl */ 4173 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl, 4174 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 4175 /* mark the range as needed so it doesn't immediately get discarded upon abort */ 4176 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1); 4177 4178 /* Abort the range */ 4179 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); 4180 } 4181 4182 4183 return error; 4184 } 4185#endif /* CONFIG_PROTECT */ 4186 4187 if (ap->a_pl != NULL) { 4188 /* 4189 * this can only happen for swap files now that 4190 * we're asking for V2 paging behavior... 4191 * so don't need to worry about decompression, or 4192 * keeping track of blocks read or taking the truncate lock 4193 */ 4194 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, 4195 ap->a_size, (off_t)fp->ff_size, ap->a_flags); 4196 goto pagein_done; 4197 } 4198 4199retry_pagein: 4200 /* 4201 * take truncate lock (shared/recursive) to guard against 4202 * zero-fill thru fsync interfering, but only for v2 4203 * 4204 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 4205 * lock shared and we are allowed to recurse 1 level if this thread already 4206 * owns the lock exclusively... this can legally occur 4207 * if we are doing a shrinking ftruncate against a file 4208 * that is mapped private, and the pages being truncated 4209 * do not currently exist in the cache... in that case 4210 * we will have to page-in the missing pages in order 4211 * to provide them to the private mapping... we must 4212 * also call hfs_unlock_truncate with a postive been_recursed 4213 * arg to indicate that if we have recursed, there is no need to drop 4214 * the lock. Allowing this simple recursion is necessary 4215 * in order to avoid a certain deadlock... since the ftruncate 4216 * already holds the truncate lock exclusively, if we try 4217 * to acquire it shared to protect the pagein path, we will 4218 * hang this thread 4219 * 4220 * NOTE: The if () block below is a workaround in order to prevent a 4221 * VM deadlock. See rdar://7853471. 4222 * 4223 * If we are in a forced unmount, then launchd will still have the 4224 * dyld_shared_cache file mapped as it is trying to reboot. If we 4225 * take the truncate lock here to service a page fault, then our 4226 * thread could deadlock with the forced-unmount. The forced unmount 4227 * thread will try to reclaim the dyld_shared_cache vnode, but since it's 4228 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount 4229 * thread will think it needs to copy all of the data out of the file 4230 * and into a VM copy object. If we hold the cnode lock here, then that 4231 * VM operation will not be able to proceed, because we'll set a busy page 4232 * before attempting to grab the lock. Note that this isn't as simple as "don't 4233 * call ubc_setsize" because doing that would just shift the problem to the 4234 * ubc_msync done before the vnode is reclaimed. 4235 * 4236 * So, if a forced unmount on this volume is in flight AND the cnode is 4237 * marked C_DELETED, then just go ahead and do the page in without taking 4238 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file 4239 * that is not going to be available on the next mount, this seems like a 4240 * OK solution from a correctness point of view, even though it is hacky. 4241 */ 4242 if (vfs_isforce(vp->v_mount)) { 4243 if (cp->c_flag & C_DELETED) { 4244 /* If we don't get it, then just go ahead and operate without the lock */ 4245 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4246 } 4247 } 4248 else { 4249 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4250 truncate_lock_held = TRUE; 4251 } 4252 4253 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 4254 4255 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 4256 error = EINVAL; 4257 goto pagein_done; 4258 } 4259 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1); 4260 4261 isize = ap->a_size; 4262 4263 /* 4264 * Scan from the back to find the last page in the UPL, so that we 4265 * aren't looking at a UPL that may have already been freed by the 4266 * preceding aborts/completions. 4267 */ 4268 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 4269 if (upl_page_present(pl, --pg_index)) 4270 break; 4271 if (pg_index == 0) { 4272 /* 4273 * no absent pages were found in the range specified 4274 * just abort the UPL to get rid of it and then we're done 4275 */ 4276 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 4277 goto pagein_done; 4278 } 4279 } 4280 /* 4281 * initialize the offset variables before we touch the UPL. 4282 * f_offset is the position into the file, in bytes 4283 * offset is the position into the UPL, in bytes 4284 * pg_index is the pg# of the UPL we're operating on 4285 * isize is the offset into the UPL of the last page that is present. 4286 */ 4287 isize = ((pg_index + 1) * PAGE_SIZE); 4288 pg_index = 0; 4289 offset = 0; 4290 f_offset = ap->a_f_offset; 4291 4292 while (isize) { 4293 int xsize; 4294 int num_of_pages; 4295 4296 if ( !upl_page_present(pl, pg_index)) { 4297 /* 4298 * we asked for RET_ONLY_ABSENT, so it's possible 4299 * to get back empty slots in the UPL. 4300 * just skip over them 4301 */ 4302 f_offset += PAGE_SIZE; 4303 offset += PAGE_SIZE; 4304 isize -= PAGE_SIZE; 4305 pg_index++; 4306 4307 continue; 4308 } 4309 /* 4310 * We know that we have at least one absent page. 4311 * Now checking to see how many in a row we have 4312 */ 4313 num_of_pages = 1; 4314 xsize = isize - PAGE_SIZE; 4315 4316 while (xsize) { 4317 if ( !upl_page_present(pl, pg_index + num_of_pages)) 4318 break; 4319 num_of_pages++; 4320 xsize -= PAGE_SIZE; 4321 } 4322 xsize = num_of_pages * PAGE_SIZE; 4323 4324#if HFS_COMPRESSION 4325 if (VNODE_IS_RSRC(vp)) { 4326 /* allow pageins of the resource fork */ 4327 } else { 4328 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ 4329 4330 if (compressed) { 4331 if (truncate_lock_held) { 4332 /* 4333 * can't hold the truncate lock when calling into the decmpfs layer 4334 * since it calls back into this layer... even though we're only 4335 * holding the lock in shared mode, and the re-entrant path only 4336 * takes the lock shared, we can deadlock if some other thread 4337 * tries to grab the lock exclusively in between. 4338 */ 4339 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4340 truncate_lock_held = FALSE; 4341 } 4342 ap->a_pl = upl; 4343 ap->a_pl_offset = offset; 4344 ap->a_f_offset = f_offset; 4345 ap->a_size = xsize; 4346 4347 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp)); 4348 /* 4349 * note that decpfs_pagein_compressed can change the state of 4350 * 'compressed'... it will set it to 0 if the file is no longer 4351 * compressed once the compression lock is successfully taken 4352 * i.e. we would block on that lock while the file is being inflated 4353 */ 4354 if (compressed) { 4355 if (error == 0) { 4356 /* successful page-in, update the access time */ 4357 VTOC(vp)->c_touch_acctime = TRUE; 4358 4359 /* compressed files are not hot file candidates */ 4360 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { 4361 fp->ff_bytesread = 0; 4362 } 4363 } else if (error == EAGAIN) { 4364 /* 4365 * EAGAIN indicates someone else already holds the compression lock... 4366 * to avoid deadlocking, we'll abort this range of pages with an 4367 * indication that the pagein needs to be redriven 4368 */ 4369 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); 4370 } 4371 goto pagein_next_range; 4372 } 4373 else { 4374 /* 4375 * Set file_converted only if the file became decompressed while we were 4376 * paging in. If it were still compressed, we would re-start the loop using the goto 4377 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein 4378 * condition below, since we could have avoided taking the truncate lock to prevent 4379 * a deadlock in the force unmount case. 4380 */ 4381 file_converted = TRUE; 4382 } 4383 } 4384 if (file_converted == TRUE) { 4385 /* 4386 * the file was converted back to a regular file after we first saw it as compressed 4387 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over 4388 * reset a_size so that we consider what remains of the original request 4389 * and null out a_upl and a_pl_offset. 4390 * 4391 * We should only be able to get into this block if the decmpfs_pagein_compressed 4392 * successfully decompressed the range in question for this file. 4393 */ 4394 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); 4395 4396 ap->a_size = isize; 4397 ap->a_pl = NULL; 4398 ap->a_pl_offset = 0; 4399 4400 /* Reset file_converted back to false so that we don't infinite-loop. */ 4401 file_converted = FALSE; 4402 goto retry_pagein; 4403 } 4404 } 4405#endif 4406 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags); 4407 4408 /* 4409 * Keep track of blocks read. 4410 */ 4411 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { 4412 int bytesread; 4413 int took_cnode_lock = 0; 4414 4415 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE) 4416 bytesread = fp->ff_size; 4417 else 4418 bytesread = xsize; 4419 4420 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ 4421 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) { 4422 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 4423 took_cnode_lock = 1; 4424 } 4425 /* 4426 * If this file hasn't been seen since the start of 4427 * the current sampling period then start over. 4428 */ 4429 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { 4430 struct timeval tv; 4431 4432 fp->ff_bytesread = bytesread; 4433 microtime(&tv); 4434 cp->c_atime = tv.tv_sec; 4435 } else { 4436 fp->ff_bytesread += bytesread; 4437 } 4438 cp->c_touch_acctime = TRUE; 4439 if (took_cnode_lock) 4440 hfs_unlock(cp); 4441 } 4442pagein_next_range: 4443 f_offset += xsize; 4444 offset += xsize; 4445 isize -= xsize; 4446 pg_index += num_of_pages; 4447 4448 error = 0; 4449 } 4450 4451pagein_done: 4452 if (truncate_lock_held == TRUE) { 4453 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */ 4454 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4455 } 4456 4457 return (error); 4458} 4459 4460/* 4461 * Pageout for HFS filesystem. 4462 */ 4463int 4464hfs_vnop_pageout(struct vnop_pageout_args *ap) 4465/* 4466 struct vnop_pageout_args { 4467 vnode_t a_vp, 4468 upl_t a_pl, 4469 vm_offset_t a_pl_offset, 4470 off_t a_f_offset, 4471 size_t a_size, 4472 int a_flags 4473 vfs_context_t a_context; 4474 }; 4475*/ 4476{ 4477 vnode_t vp = ap->a_vp; 4478 struct cnode *cp; 4479 struct filefork *fp; 4480 int retval = 0; 4481 off_t filesize; 4482 upl_t upl; 4483 upl_page_info_t* pl; 4484 vm_offset_t a_pl_offset; 4485 int a_flags; 4486 int is_pageoutv2 = 0; 4487 kern_return_t kret; 4488 4489 cp = VTOC(vp); 4490 fp = VTOF(vp); 4491 4492 /* 4493 * Figure out where the file ends, for pageout purposes. If 4494 * ff_new_size > ff_size, then we're in the middle of extending the 4495 * file via a write, so it is safe (and necessary) that we be able 4496 * to pageout up to that point. 4497 */ 4498 filesize = fp->ff_size; 4499 if (fp->ff_new_size > filesize) 4500 filesize = fp->ff_new_size; 4501 4502 a_flags = ap->a_flags; 4503 a_pl_offset = ap->a_pl_offset; 4504 4505 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { 4506 hfs_incr_gencount (cp); 4507 } 4508 4509 /* 4510 * we can tell if we're getting the new or old behavior from the UPL 4511 */ 4512 if ((upl = ap->a_pl) == NULL) { 4513 int request_flags; 4514 4515 is_pageoutv2 = 1; 4516 /* 4517 * we're in control of any UPL we commit 4518 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 4519 */ 4520 a_flags &= ~UPL_NOCOMMIT; 4521 a_pl_offset = 0; 4522 4523 /* 4524 * For V2 semantics, we want to take the cnode truncate lock 4525 * shared to guard against the file size changing via zero-filling. 4526 * 4527 * However, we have to be careful because we may be invoked 4528 * via the ubc_msync path to write out dirty mmap'd pages 4529 * in response to a lock event on a content-protected 4530 * filesystem (e.g. to write out class A files). 4531 * As a result, we want to take the truncate lock 'SHARED' with 4532 * the mini-recursion locktype so that we don't deadlock/panic 4533 * because we may be already holding the truncate lock exclusive to force any other 4534 * IOs to have blocked behind us. 4535 */ 4536 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4537 4538 if (a_flags & UPL_MSYNC) { 4539 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; 4540 } 4541 else { 4542 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; 4543 } 4544 4545 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 4546 4547 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) { 4548 retval = EINVAL; 4549 goto pageout_done; 4550 } 4551 } 4552 /* 4553 * from this point forward upl points at the UPL we're working with 4554 * it was either passed in or we succesfully created it 4555 */ 4556 4557 /* 4558 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own 4559 * UPL instead of relying on the UPL passed into us. We go ahead and do that here, 4560 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for 4561 * N dirty ranges in the UPL. Note that this is almost a direct copy of the 4562 * logic in vnode_pageout except that we need to do it after grabbing the truncate 4563 * lock in HFS so that we don't lock invert ourselves. 4564 * 4565 * Note that we can still get into this function on behalf of the default pager with 4566 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above 4567 * since fsync and other writing threads will grab the locks, then mark the 4568 * relevant pages as busy. But the pageout codepath marks the pages as busy, 4569 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So 4570 * we do not try to grab anything for the pre-V2 case, which should only be accessed 4571 * by the paging/VM system. 4572 */ 4573 4574 if (is_pageoutv2) { 4575 off_t f_offset; 4576 int offset; 4577 int isize; 4578 int pg_index; 4579 int error; 4580 int error_ret = 0; 4581 4582 isize = ap->a_size; 4583 f_offset = ap->a_f_offset; 4584 4585 /* 4586 * Scan from the back to find the last page in the UPL, so that we 4587 * aren't looking at a UPL that may have already been freed by the 4588 * preceding aborts/completions. 4589 */ 4590 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { 4591 if (upl_page_present(pl, --pg_index)) 4592 break; 4593 if (pg_index == 0) { 4594 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); 4595 goto pageout_done; 4596 } 4597 } 4598 4599 /* 4600 * initialize the offset variables before we touch the UPL. 4601 * a_f_offset is the position into the file, in bytes 4602 * offset is the position into the UPL, in bytes 4603 * pg_index is the pg# of the UPL we're operating on. 4604 * isize is the offset into the UPL of the last non-clean page. 4605 */ 4606 isize = ((pg_index + 1) * PAGE_SIZE); 4607 4608 offset = 0; 4609 pg_index = 0; 4610 4611 while (isize) { 4612 int xsize; 4613 int num_of_pages; 4614 4615 if ( !upl_page_present(pl, pg_index)) { 4616 /* 4617 * we asked for RET_ONLY_DIRTY, so it's possible 4618 * to get back empty slots in the UPL. 4619 * just skip over them 4620 */ 4621 f_offset += PAGE_SIZE; 4622 offset += PAGE_SIZE; 4623 isize -= PAGE_SIZE; 4624 pg_index++; 4625 4626 continue; 4627 } 4628 if ( !upl_dirty_page(pl, pg_index)) { 4629 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl); 4630 } 4631 4632 /* 4633 * We know that we have at least one dirty page. 4634 * Now checking to see how many in a row we have 4635 */ 4636 num_of_pages = 1; 4637 xsize = isize - PAGE_SIZE; 4638 4639 while (xsize) { 4640 if ( !upl_dirty_page(pl, pg_index + num_of_pages)) 4641 break; 4642 num_of_pages++; 4643 xsize -= PAGE_SIZE; 4644 } 4645 xsize = num_of_pages * PAGE_SIZE; 4646 4647 if (!vnode_isswap(vp)) { 4648 off_t end_of_range; 4649 int tooklock; 4650 4651 tooklock = 0; 4652 4653 if (cp->c_lockowner != current_thread()) { 4654 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 4655 /* 4656 * we're in the v2 path, so we are the 4657 * owner of the UPL... we may have already 4658 * processed some of the UPL, so abort it 4659 * from the current working offset to the 4660 * end of the UPL 4661 */ 4662 ubc_upl_abort_range(upl, 4663 offset, 4664 ap->a_size - offset, 4665 UPL_ABORT_FREE_ON_EMPTY); 4666 goto pageout_done; 4667 } 4668 tooklock = 1; 4669 } 4670 end_of_range = f_offset + xsize - 1; 4671 4672 if (end_of_range >= filesize) { 4673 end_of_range = (off_t)(filesize - 1); 4674 } 4675 if (f_offset < filesize) { 4676 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); 4677 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4678 } 4679 if (tooklock) { 4680 hfs_unlock(cp); 4681 } 4682 } 4683 if ((error = cluster_pageout(vp, upl, offset, f_offset, 4684 xsize, filesize, a_flags))) { 4685 if (error_ret == 0) 4686 error_ret = error; 4687 } 4688 f_offset += xsize; 4689 offset += xsize; 4690 isize -= xsize; 4691 pg_index += num_of_pages; 4692 } 4693 /* capture errnos bubbled out of cluster_pageout if they occurred */ 4694 if (error_ret != 0) { 4695 retval = error_ret; 4696 } 4697 } /* end block for v2 pageout behavior */ 4698 else { 4699 if (!vnode_isswap(vp)) { 4700 off_t end_of_range; 4701 int tooklock = 0; 4702 4703 if (cp->c_lockowner != current_thread()) { 4704 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { 4705 if (!(a_flags & UPL_NOCOMMIT)) { 4706 ubc_upl_abort_range(upl, 4707 a_pl_offset, 4708 ap->a_size, 4709 UPL_ABORT_FREE_ON_EMPTY); 4710 } 4711 goto pageout_done; 4712 } 4713 tooklock = 1; 4714 } 4715 end_of_range = ap->a_f_offset + ap->a_size - 1; 4716 4717 if (end_of_range >= filesize) { 4718 end_of_range = (off_t)(filesize - 1); 4719 } 4720 if (ap->a_f_offset < filesize) { 4721 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); 4722 cp->c_flag |= C_MODIFIED; /* leof is dirty */ 4723 } 4724 4725 if (tooklock) { 4726 hfs_unlock(cp); 4727 } 4728 } 4729 /* 4730 * just call cluster_pageout for old pre-v2 behavior 4731 */ 4732 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset, 4733 ap->a_size, filesize, a_flags); 4734 } 4735 4736 /* 4737 * If data was written, update the modification time of the file. 4738 * If setuid or setgid bits are set and this process is not the 4739 * superuser then clear the setuid and setgid bits as a precaution 4740 * against tampering. 4741 */ 4742 if (retval == 0) { 4743 cp->c_touch_modtime = TRUE; 4744 cp->c_touch_chgtime = TRUE; 4745 if ((cp->c_mode & (S_ISUID | S_ISGID)) && 4746 (vfs_context_suser(ap->a_context) != 0)) { 4747 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 4748 cp->c_mode &= ~(S_ISUID | S_ISGID); 4749 hfs_unlock(cp); 4750 } 4751 } 4752 4753pageout_done: 4754 if (is_pageoutv2) { 4755 /* 4756 * Release the truncate lock. Note that because 4757 * we may have taken the lock recursively by 4758 * being invoked via ubc_msync due to lockdown, 4759 * we should release it recursively, too. 4760 */ 4761 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); 4762 } 4763 return (retval); 4764} 4765 4766/* 4767 * Intercept B-Tree node writes to unswap them if necessary. 4768 */ 4769int 4770hfs_vnop_bwrite(struct vnop_bwrite_args *ap) 4771{ 4772 int retval = 0; 4773 register struct buf *bp = ap->a_bp; 4774 register struct vnode *vp = buf_vnode(bp); 4775 BlockDescriptor block; 4776 4777 /* Trap B-Tree writes */ 4778 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || 4779 (VTOC(vp)->c_fileid == kHFSCatalogFileID) || 4780 (VTOC(vp)->c_fileid == kHFSAttributesFileID) || 4781 (vp == VTOHFS(vp)->hfc_filevp)) { 4782 4783 /* 4784 * Swap and validate the node if it is in native byte order. 4785 * This is always be true on big endian, so we always validate 4786 * before writing here. On little endian, the node typically has 4787 * been swapped and validated when it was written to the journal, 4788 * so we won't do anything here. 4789 */ 4790 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) { 4791 /* Prepare the block pointer */ 4792 block.blockHeader = bp; 4793 block.buffer = (char *)buf_dataptr(bp); 4794 block.blockNum = buf_lblkno(bp); 4795 /* not found in cache ==> came from disk */ 4796 block.blockReadFromDisk = (buf_fromcache(bp) == 0); 4797 block.blockSize = buf_count(bp); 4798 4799 /* Endian un-swap B-Tree node */ 4800 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false); 4801 if (retval) 4802 panic("hfs_vnop_bwrite: about to write corrupt node!\n"); 4803 } 4804 } 4805 4806 /* This buffer shouldn't be locked anymore but if it is clear it */ 4807 if ((buf_flags(bp) & B_LOCKED)) { 4808 // XXXdbg 4809 if (VTOHFS(vp)->jnl) { 4810 panic("hfs: CLEARING the lock bit on bp %p\n", bp); 4811 } 4812 buf_clearflags(bp, B_LOCKED); 4813 } 4814 retval = vn_bwrite (ap); 4815 4816 return (retval); 4817} 4818 4819/* 4820 * Relocate a file to a new location on disk 4821 * cnode must be locked on entry 4822 * 4823 * Relocation occurs by cloning the file's data from its 4824 * current set of blocks to a new set of blocks. During 4825 * the relocation all of the blocks (old and new) are 4826 * owned by the file. 4827 * 4828 * ----------------- 4829 * |///////////////| 4830 * ----------------- 4831 * 0 N (file offset) 4832 * 4833 * ----------------- ----------------- 4834 * |///////////////| | | STEP 1 (acquire new blocks) 4835 * ----------------- ----------------- 4836 * 0 N N+1 2N 4837 * 4838 * ----------------- ----------------- 4839 * |///////////////| |///////////////| STEP 2 (clone data) 4840 * ----------------- ----------------- 4841 * 0 N N+1 2N 4842 * 4843 * ----------------- 4844 * |///////////////| STEP 3 (head truncate blocks) 4845 * ----------------- 4846 * 0 N 4847 * 4848 * During steps 2 and 3 page-outs to file offsets less 4849 * than or equal to N are suspended. 4850 * 4851 * During step 3 page-ins to the file get suspended. 4852 */ 4853int 4854hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, 4855 struct proc *p) 4856{ 4857 struct cnode *cp; 4858 struct filefork *fp; 4859 struct hfsmount *hfsmp; 4860 u_int32_t headblks; 4861 u_int32_t datablks; 4862 u_int32_t blksize; 4863 u_int32_t growsize; 4864 u_int32_t nextallocsave; 4865 daddr64_t sector_a, sector_b; 4866 int eflags; 4867 off_t newbytes; 4868 int retval; 4869 int lockflags = 0; 4870 int took_trunc_lock = 0; 4871 int started_tr = 0; 4872 enum vtype vnodetype; 4873 4874 vnodetype = vnode_vtype(vp); 4875 if (vnodetype != VREG) { 4876 /* Not allowed to move symlinks. */ 4877 return (EPERM); 4878 } 4879 4880 hfsmp = VTOHFS(vp); 4881 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) { 4882 return (ENOSPC); 4883 } 4884 4885 cp = VTOC(vp); 4886 fp = VTOF(vp); 4887 if (fp->ff_unallocblocks) 4888 return (EINVAL); 4889 4890#if CONFIG_PROTECT 4891 /* 4892 * <rdar://problem/9118426> 4893 * Disable HFS file relocation on content-protected filesystems 4894 */ 4895 if (cp_fs_protected (hfsmp->hfs_mp)) { 4896 return EINVAL; 4897 } 4898#endif 4899 /* If it's an SSD, also disable HFS relocation */ 4900 if (hfsmp->hfs_flags & HFS_SSD) { 4901 return EINVAL; 4902 } 4903 4904 4905 blksize = hfsmp->blockSize; 4906 if (blockHint == 0) 4907 blockHint = hfsmp->nextAllocation; 4908 4909 if (fp->ff_size > 0x7fffffff) { 4910 return (EFBIG); 4911 } 4912 4913 // 4914 // We do not believe that this call to hfs_fsync() is 4915 // necessary and it causes a journal transaction 4916 // deadlock so we are removing it. 4917 // 4918 //if (vnodetype == VREG && !vnode_issystem(vp)) { 4919 // retval = hfs_fsync(vp, MNT_WAIT, 0, p); 4920 // if (retval) 4921 // return (retval); 4922 //} 4923 4924 if (!vnode_issystem(vp) && (vnodetype != VLNK)) { 4925 hfs_unlock(cp); 4926 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); 4927 /* Force lock since callers expects lock to be held. */ 4928 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) { 4929 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4930 return (retval); 4931 } 4932 /* No need to continue if file was removed. */ 4933 if (cp->c_flag & C_NOEXISTS) { 4934 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4935 return (ENOENT); 4936 } 4937 took_trunc_lock = 1; 4938 } 4939 headblks = fp->ff_blocks; 4940 datablks = howmany(fp->ff_size, blksize); 4941 growsize = datablks * blksize; 4942 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; 4943 if (blockHint >= hfsmp->hfs_metazone_start && 4944 blockHint <= hfsmp->hfs_metazone_end) 4945 eflags |= kEFMetadataMask; 4946 4947 if (hfs_start_transaction(hfsmp) != 0) { 4948 if (took_trunc_lock) 4949 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 4950 return (EINVAL); 4951 } 4952 started_tr = 1; 4953 /* 4954 * Protect the extents b-tree and the allocation bitmap 4955 * during MapFileBlockC and ExtendFileC operations. 4956 */ 4957 lockflags = SFL_BITMAP; 4958 if (overflow_extents(fp)) 4959 lockflags |= SFL_EXTENTS; 4960 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 4961 4962 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, §or_a, NULL); 4963 if (retval) { 4964 retval = MacToVFSError(retval); 4965 goto out; 4966 } 4967 4968 /* 4969 * STEP 1 - acquire new allocation blocks. 4970 */ 4971 nextallocsave = hfsmp->nextAllocation; 4972 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes); 4973 if (eflags & kEFMetadataMask) { 4974 hfs_lock_mount(hfsmp); 4975 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave); 4976 MarkVCBDirty(hfsmp); 4977 hfs_unlock_mount(hfsmp); 4978 } 4979 4980 retval = MacToVFSError(retval); 4981 if (retval == 0) { 4982 cp->c_flag |= C_MODIFIED; 4983 if (newbytes < growsize) { 4984 retval = ENOSPC; 4985 goto restore; 4986 } else if (fp->ff_blocks < (headblks + datablks)) { 4987 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN); 4988 retval = ENOSPC; 4989 goto restore; 4990 } 4991 4992 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, §or_b, NULL); 4993 if (retval) { 4994 retval = MacToVFSError(retval); 4995 } else if ((sector_a + 1) == sector_b) { 4996 retval = ENOSPC; 4997 goto restore; 4998 } else if ((eflags & kEFMetadataMask) && 4999 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) > 5000 hfsmp->hfs_metazone_end)) { 5001#if 0 5002 const char * filestr; 5003 char emptystr = '\0'; 5004 5005 if (cp->c_desc.cd_nameptr != NULL) { 5006 filestr = (const char *)&cp->c_desc.cd_nameptr[0]; 5007 } else if (vnode_name(vp) != NULL) { 5008 filestr = vnode_name(vp); 5009 } else { 5010 filestr = &emptystr; 5011 } 5012#endif 5013 retval = ENOSPC; 5014 goto restore; 5015 } 5016 } 5017 /* Done with system locks and journal for now. */ 5018 hfs_systemfile_unlock(hfsmp, lockflags); 5019 lockflags = 0; 5020 hfs_end_transaction(hfsmp); 5021 started_tr = 0; 5022 5023 if (retval) { 5024 /* 5025 * Check to see if failure is due to excessive fragmentation. 5026 */ 5027 if ((retval == ENOSPC) && 5028 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) { 5029 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; 5030 } 5031 goto out; 5032 } 5033 /* 5034 * STEP 2 - clone file data into the new allocation blocks. 5035 */ 5036 5037 if (vnodetype == VLNK) 5038 retval = EPERM; 5039 else if (vnode_issystem(vp)) 5040 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); 5041 else 5042 retval = hfs_clonefile(vp, headblks, datablks, blksize); 5043 5044 /* Start transaction for step 3 or for a restore. */ 5045 if (hfs_start_transaction(hfsmp) != 0) { 5046 retval = EINVAL; 5047 goto out; 5048 } 5049 started_tr = 1; 5050 if (retval) 5051 goto restore; 5052 5053 /* 5054 * STEP 3 - switch to cloned data and remove old blocks. 5055 */ 5056 lockflags = SFL_BITMAP; 5057 if (overflow_extents(fp)) 5058 lockflags |= SFL_EXTENTS; 5059 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5060 5061 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks); 5062 5063 hfs_systemfile_unlock(hfsmp, lockflags); 5064 lockflags = 0; 5065 if (retval) 5066 goto restore; 5067out: 5068 if (took_trunc_lock) 5069 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5070 5071 if (lockflags) { 5072 hfs_systemfile_unlock(hfsmp, lockflags); 5073 lockflags = 0; 5074 } 5075 5076 /* Push cnode's new extent data to disk. */ 5077 if (retval == 0) { 5078 (void) hfs_update(vp, MNT_WAIT); 5079 } 5080 if (hfsmp->jnl) { 5081 if (cp->c_cnid < kHFSFirstUserCatalogNodeID) 5082 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); 5083 else 5084 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); 5085 } 5086exit: 5087 if (started_tr) 5088 hfs_end_transaction(hfsmp); 5089 5090 return (retval); 5091 5092restore: 5093 if (fp->ff_blocks == headblks) { 5094 if (took_trunc_lock) 5095 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5096 goto exit; 5097 } 5098 /* 5099 * Give back any newly allocated space. 5100 */ 5101 if (lockflags == 0) { 5102 lockflags = SFL_BITMAP; 5103 if (overflow_extents(fp)) 5104 lockflags |= SFL_EXTENTS; 5105 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); 5106 } 5107 5108 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 5109 FTOC(fp)->c_fileid, false); 5110 5111 hfs_systemfile_unlock(hfsmp, lockflags); 5112 lockflags = 0; 5113 5114 if (took_trunc_lock) 5115 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); 5116 goto exit; 5117} 5118 5119 5120/* 5121 * Clone a file's data within the file. 5122 * 5123 */ 5124static int 5125hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) 5126{ 5127 caddr_t bufp; 5128 size_t bufsize; 5129 size_t copysize; 5130 size_t iosize; 5131 size_t offset; 5132 off_t writebase; 5133 uio_t auio; 5134 int error = 0; 5135 5136 writebase = blkstart * blksize; 5137 copysize = blkcnt * blksize; 5138 iosize = bufsize = MIN(copysize, 128 * 1024); 5139 offset = 0; 5140 5141 hfs_unlock(VTOC(vp)); 5142 5143#if CONFIG_PROTECT 5144 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 5145 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5146 return (error); 5147 } 5148#endif /* CONFIG_PROTECT */ 5149 5150 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 5151 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5152 return (ENOMEM); 5153 } 5154 5155 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); 5156 5157 while (offset < copysize) { 5158 iosize = MIN(copysize - offset, iosize); 5159 5160 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); 5161 uio_addiov(auio, (uintptr_t)bufp, iosize); 5162 5163 error = cluster_read(vp, auio, copysize, IO_NOCACHE); 5164 if (error) { 5165 printf("hfs_clonefile: cluster_read failed - %d\n", error); 5166 break; 5167 } 5168 if (uio_resid(auio) != 0) { 5169 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio)); 5170 error = EIO; 5171 break; 5172 } 5173 5174 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE); 5175 uio_addiov(auio, (uintptr_t)bufp, iosize); 5176 5177 error = cluster_write(vp, auio, writebase + offset, 5178 writebase + offset + iosize, 5179 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); 5180 if (error) { 5181 printf("hfs_clonefile: cluster_write failed - %d\n", error); 5182 break; 5183 } 5184 if (uio_resid(auio) != 0) { 5185 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); 5186 error = EIO; 5187 break; 5188 } 5189 offset += iosize; 5190 } 5191 uio_free(auio); 5192 5193 if ((blksize & PAGE_MASK)) { 5194 /* 5195 * since the copy may not have started on a PAGE 5196 * boundary (or may not have ended on one), we 5197 * may have pages left in the cache since NOCACHE 5198 * will let partially written pages linger... 5199 * lets just flush the entire range to make sure 5200 * we don't have any pages left that are beyond 5201 * (or intersect) the real LEOF of this file 5202 */ 5203 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); 5204 } else { 5205 /* 5206 * No need to call ubc_sync_range or hfs_invalbuf 5207 * since the file was copied using IO_NOCACHE and 5208 * the copy was done starting and ending on a page 5209 * boundary in the file. 5210 */ 5211 } 5212 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 5213 5214 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); 5215 return (error); 5216} 5217 5218/* 5219 * Clone a system (metadata) file. 5220 * 5221 */ 5222static int 5223hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, 5224 kauth_cred_t cred, struct proc *p) 5225{ 5226 caddr_t bufp; 5227 char * offset; 5228 size_t bufsize; 5229 size_t iosize; 5230 struct buf *bp = NULL; 5231 daddr64_t blkno; 5232 daddr64_t blk; 5233 daddr64_t start_blk; 5234 daddr64_t last_blk; 5235 int breadcnt; 5236 int i; 5237 int error = 0; 5238 5239 5240 iosize = GetLogicalBlockSize(vp); 5241 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); 5242 breadcnt = bufsize / iosize; 5243 5244 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { 5245 return (ENOMEM); 5246 } 5247 start_blk = ((daddr64_t)blkstart * blksize) / iosize; 5248 last_blk = ((daddr64_t)blkcnt * blksize) / iosize; 5249 blkno = 0; 5250 5251 while (blkno < last_blk) { 5252 /* 5253 * Read up to a megabyte 5254 */ 5255 offset = bufp; 5256 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) { 5257 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp); 5258 if (error) { 5259 printf("hfs_clonesysfile: meta_bread error %d\n", error); 5260 goto out; 5261 } 5262 if (buf_count(bp) != iosize) { 5263 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp)); 5264 goto out; 5265 } 5266 bcopy((char *)buf_dataptr(bp), offset, iosize); 5267 5268 buf_markinvalid(bp); 5269 buf_brelse(bp); 5270 bp = NULL; 5271 5272 offset += iosize; 5273 } 5274 5275 /* 5276 * Write up to a megabyte 5277 */ 5278 offset = bufp; 5279 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) { 5280 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META); 5281 if (bp == NULL) { 5282 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno); 5283 error = EIO; 5284 goto out; 5285 } 5286 bcopy(offset, (char *)buf_dataptr(bp), iosize); 5287 error = (int)buf_bwrite(bp); 5288 bp = NULL; 5289 if (error) 5290 goto out; 5291 offset += iosize; 5292 } 5293 } 5294out: 5295 if (bp) { 5296 buf_brelse(bp); 5297 } 5298 5299 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); 5300 5301 error = hfs_fsync(vp, MNT_WAIT, 0, p); 5302 5303 return (error); 5304} 5305