1/* 2 * ntfs_vnops.c - NTFS kernel vnode operations. 3 * 4 * Copyright (c) 2006-2011 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2011 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/attr.h> 39#include <sys/buf.h> 40#include <sys/errno.h> 41#include <sys/param.h> 42#include <sys/stat.h> 43#include <sys/syslimits.h> 44#include <sys/time.h> 45#include <sys/ubc.h> 46#include <sys/ucred.h> 47#include <sys/uio.h> 48#include <sys/unistd.h> 49#include <sys/vnode.h> 50#include <sys/vnode_if.h> 51#include <sys/xattr.h> 52 53#include <string.h> 54 55#include <mach/kern_return.h> 56#include <mach/memory_object_types.h> 57 58#include <kern/debug.h> 59#include <kern/locks.h> 60 61#include <vfs/vfs_support.h> 62 63#include "ntfs.h" 64#include "ntfs_attr.h" 65#include "ntfs_bitmap.h" 66#include "ntfs_compress.h" 67#include "ntfs_debug.h" 68#include "ntfs_dir.h" 69#include "ntfs_endian.h" 70#include "ntfs_hash.h" 71#include "ntfs_inode.h" 72#include "ntfs_layout.h" 73#include "ntfs_lcnalloc.h" 74#include "ntfs_mft.h" 75#include "ntfs_mst.h" 76#include "ntfs_page.h" 77#include "ntfs_sfm.h" 78#include "ntfs_time.h" 79#include "ntfs_unistr.h" 80#include "ntfs_vnops.h" 81#include "ntfs_volume.h" 82 83/* Global ntfs vnode operations. */ 84vnop_t **ntfs_vnodeop_p; 85 86/** 87 * ntfs_cluster_iodone - complete i/o on a memory region 88 * @cbp: cluster head buffer for which i/o is being completed 89 * @arg: callback argument, we do not use it at present 90 * 91 * In the read case: 92 * 93 * For an mst protected attribute we do the post read mst deprotection and for 94 * an encrypted attribute we do the decryption (not supported at present). 95 * Note we ignore mst fixup errors as those are detected when 96 * ntfs_mft_record_map() is called later which gives us per record granularity. 97 * 98 * In the write case: 99 * 100 * For an mst protected attribute we do the post write mst deprotection. 101 * Writing to encrypted attributes is not supported at present. 102 * 103 * Return 0 on success and errno on error. 104 */ 105int ntfs_cluster_iodone(buf_t cbp, void *arg __unused) 106{ 107 long size; 108 ntfs_inode *ni; 109 u8 *kend, *kaddr; 110 errno_t err, err2; 111 BOOL is_read = buf_flags(cbp) & B_READ; 112 113 ni = NTFS_I(buf_vnode(cbp)); 114 size = buf_count(cbp); 115 if (size & (ni->block_size - 1)) 116 panic("%s(): Called with size not a multiple of the inode " 117 "block size.\n", __FUNCTION__); 118 err = buf_map(cbp, (caddr_t*)&kaddr); 119 if (err) { 120 ntfs_error(ni->vol->mp, "Failed to map buffer (error %d).", 121 err); 122 goto err; 123 } 124 kend = kaddr + size; 125 if (NInoMstProtected(ni)) { 126 s64 ofs, data_size, init_size; 127 u32 rec_size = ni->block_size; 128 NTFS_RECORD_TYPE magic = 0; 129 130 if (!is_read) { 131 if (ni->type == AT_INDEX_ALLOCATION) 132 magic = magic_INDX; 133 else 134 panic("%s(): Unknown mst protected inode " 135 "0x%llx, type 0x%x, name_len " 136 "0x%x.", __FUNCTION__, 137 (unsigned long long)ni->mft_no, 138 (unsigned)le32_to_cpu(ni->type), 139 (unsigned)ni->name_len); 140 } 141 /* The offset in the attribute at which this buffer begins. */ 142 ofs = (s64)buf_lblkno(cbp) << PAGE_SHIFT; 143 lck_spin_lock(&ni->size_lock); 144 data_size = ni->data_size; 145 init_size = ni->initialized_size; 146 lck_spin_unlock(&ni->size_lock); 147 /* 148 * Limit mst deprotection to the initialized size as beyond 149 * that the data is zero and deprotection will fail. And worse 150 * in the write case it will lead to a kernel panic. 151 */ 152 if (ofs + size > init_size) { 153 if (ofs > data_size) { 154 ntfs_error(ni->vol->mp, "Buffer begins past " 155 "the end of the data of the " 156 "attribute (mft_no 0x%llx).", 157 (unsigned long long)ni->mft_no); 158 err = EINVAL; 159 goto unm_err; 160 } 161 if (ofs > init_size) { 162 ntfs_debug("Buffer begins past the end of the " 163 "initialized data of the " 164 "attribute (mft_no 0x%llx).", 165 (unsigned long long)ni->mft_no); 166 goto unm_err; 167 } 168 size = init_size - ofs; 169 kend = kaddr + size; 170 } 171 /* 172 * Do the mst deprotection ignoring errors and make sure we do 173 * not go past the initialized size should an error somehow 174 * have caused the last record to straddle the initialized 175 * size. 176 */ 177 while (kaddr + rec_size <= kend) { 178 if (is_read) 179 (void)ntfs_mst_fixup_post_read( 180 (NTFS_RECORD*)kaddr, rec_size); 181 else if (__ntfs_is_magic(((NTFS_RECORD*)kaddr)->magic, 182 magic)) 183 ntfs_mst_fixup_post_write((NTFS_RECORD*)kaddr); 184 kaddr += rec_size; 185 } 186 } else if (NInoEncrypted(ni)) { 187 // TODO: Need to decrypt the encrypted sectors here. This 188 // cannot happen at present as we deny opening/reading/writing/ 189 // paging encrypted vnodes. 190 panic("%s(): Called for encrypted vnode.\n", __FUNCTION__); 191 } else 192 panic("%s(): Called for normal vnode.\n", __FUNCTION__); 193unm_err: 194 err2 = buf_unmap(cbp); 195 if (err2) { 196 if (!err) 197 err = err2; 198 ntfs_error(ni->vol->mp, "Failed to unmap buffer (error %d).", 199 err2); 200 } 201err: 202 return err; 203} 204 205/** 206 * ntfs_buf_iodone - remove the MST fixups when i/o is complete on a buffer 207 * @buf: buffer for which to remove the MST fixups 208 * @arg: unused, always NULL 209 * 210 * ntfs_buf_iodone() is an i/o completion handler which is called when i/o is 211 * completed on a buffer belonging to $MFT/$DATA. It removes the MST fixups 212 * and returns after which the buffer busy state (BL_BUSY flag) is cleared and 213 * others can access the buffer again. 214 * 215 * ntfs_buf_iodone() is called both when the i/o was successful and when it 216 * failed thus we have to deal with that as appropriate. 217 * 218 * Note that ntfs_buf_iodone() is called deep from within the driver stack and 219 * thus there are limitations on what it is allowed to do. In particular it is 220 * not allowed to initiate new i/o operations nor to allocate/free memory. 221 * 222 * WARNING: This function can be called whilst an unmount is in progress and 223 * thus it may not look up nor use the ntfs_volume structure to which the inode 224 * belongs. 225 */ 226static void ntfs_buf_iodone(buf_t buf, void *arg __unused) 227{ 228 s64 ofs, data_size, init_size; 229 vnode_t vn; 230 mount_t mp; 231 ntfs_inode *ni; 232 unsigned size, b_flags; 233 errno_t err; 234 235 vn = buf_vnode(buf); 236 mp = vnode_mount(vn); 237 ni = NTFS_I(vn); 238 ntfs_debug("Entering for mft_no 0x%llx, lblkno 0x%llx.", 239 (unsigned long long)ni->mft_no, 240 (unsigned long long)buf_lblkno(buf)); 241 if (!NInoMstProtected(ni) || ni->mft_no || NInoAttr(ni)) 242 panic("%s(): Called not for $MFT!\n", __FUNCTION__); 243 /* The size and offset in the attribute at which this buffer begins. */ 244 size = buf_count(buf); 245 if (size != ni->block_size) 246 panic("%s(): size != ni->block_size\n", __FUNCTION__); 247 ofs = (s64)buf_lblkno(buf) << ni->block_size_shift; 248 lck_spin_lock(&ni->size_lock); 249 data_size = ni->data_size; 250 init_size = ni->initialized_size; 251 lck_spin_unlock(&ni->size_lock); 252 b_flags = buf_flags(buf); 253 /* 254 * Limit mst deprotection to the initialized size as beyond that the 255 * data is zero and deprotection will fail. And worse in the write 256 * case it will lead to a kernel panic. 257 */ 258 if (ofs + size > init_size) { 259 if (ofs > data_size) { 260 ntfs_error(mp, "Buffer begins past the end of the " 261 "data of the attribute (mft_no " 262 "0x%llx).", 263 (unsigned long long)ni->mft_no); 264 err = EINVAL; 265 goto err; 266 } 267 if (ofs > init_size) { 268 ntfs_error(mp, "Buffer begins past the end of the " 269 "initialized data of the attribute " 270 "(mft_no 0x%llx).", 271 (unsigned long long)ni->mft_no); 272 err = EINVAL; 273 goto err; 274 } 275 } 276 /* 277 * Do not try to remove the fixups if a read failed as there will be 278 * nothing to remove. 279 */ 280 if (!buf_error(buf) || !(b_flags & B_READ)) { 281 NTFS_RECORD *rec; 282 283 err = buf_map(buf, (caddr_t*)&rec); 284 if (err) { 285 ntfs_error(mp, "Failed to map buffer (error %d).", 286 err); 287 goto err; 288 } 289 if (b_flags & B_READ) { 290 err = ntfs_mst_fixup_post_read(rec, size); 291 if (err) { 292 ntfs_error(mp, "Multi sector transfer error " 293 "detected in mft_no 0x%llx " 294 "(error %d). Run chkdsk", 295 (unsigned long long)ni->mft_no, 296 err); 297 buf_seterror(buf, err); 298 } 299 } else 300 ntfs_mst_fixup_post_write(rec); 301 err = buf_unmap(buf); 302 if (err) { 303 ntfs_error(mp, "Failed to unmap buffer (error %d).", 304 err); 305 goto err; 306 } 307 } 308 ntfs_debug("Done."); 309 return; 310err: 311 if (!buf_error(buf)) 312 buf_seterror(buf, err); 313 ntfs_debug("Failed."); 314 return; 315} 316 317/** 318 * ntfs_vnop_strategy - prepare and issue the i/o described by a buffer 319 * @a: arguments to strategy function 320 * 321 * @a contains: 322 * buf_t a_bp; buffer for which to prepare and issue the i/o 323 * 324 * Prepare and issue the i/o described by the buffer @a->a_bp. Adapted from 325 * buf_strategy(). 326 * 327 * In NTFS, we only ever get called for buffers which have a page list 328 * attached. The page list is mapped and the address of the mapping is stored 329 * in (u8*)buf_dataptr(@a->a_bp). The exception to this is i/o for $MFT/$DATA 330 * and $MFTMirr/$DATA which is issued via buf_meta_bread(), etc, and thus does 331 * not involve a page list at all. 332 * 333 * Return 0 on success and errno on error. 334 */ 335static int ntfs_vnop_strategy(struct vnop_strategy_args *a) 336{ 337 s64 ofs, max_end_io; 338 daddr64_t lblkno; 339 buf_t buf = a->a_bp; 340 vnode_t vn = buf_vnode(buf); 341 ntfs_inode *ni; 342 ntfs_volume *vol; 343 void (*old_iodone)(buf_t, void *); 344 void *old_transact; 345 unsigned b_flags; 346 errno_t err, err2; 347 BOOL do_fixup; 348 349 /* Same checks as in buf_strategy(). */ 350 if (!vn || vnode_ischr(vn) || vnode_isblk(vn)) 351 panic("%s(): !vn || vnode_ischr(vn) || vnode_isblk(vn)\n", 352 __FUNCTION__); 353 ni = NTFS_I(vn); 354 if (!ni) { 355 err = EIO; 356 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 357 goto err; 358 } 359 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len 0x%x, " 360 "logical block 0x%llx.", (unsigned long long)ni->mft_no, 361 le32_to_cpu(ni->type), (unsigned)ni->name_len, 362 (unsigned long long)buf_lblkno(buf)); 363 if (S_ISDIR(ni->mode)) 364 panic("%s(): Called for directory vnode.\n", __FUNCTION__); 365 vol = ni->vol; 366 b_flags = buf_flags(buf); 367 /* 368 * If we are called from cluster_io() then pass the request down to the 369 * underlying device containing the NTFS volume. We have no KPI way of 370 * doing this directly so we invoke buf_strategy() and rely on the fact 371 * that it does not do anything other than associate the physical 372 * device with the buffer and then pass the buffer down to the device. 373 */ 374 if (b_flags & B_CLUSTER) 375 goto done; 376 /* 377 * If this i/o is for $MFTMirr/$DATA send it through straight without 378 * modifications. This is because we keep the $MFTMirr/$DATA buffers 379 * in memory with the fixups applied for simplicity. 380 */ 381 if (ni->mft_no == FILE_MFTMirr && !NInoAttr(ni)) 382 goto done; 383 /* 384 * Except for $MFT/$DATA we never do i/o via file system buffers thus 385 * we should never get here. 386 */ 387 if (ni->mft_no != FILE_MFT || NInoAttr(ni)) 388 panic("%s(): Called for non-cluster i/o buffer.\n", 389 __FUNCTION__); 390 /* 391 * We are reading/writing $MFT/$DATA. 392 * 393 * For reads, i/o is allowed up to the data_size whilst for writes, i/o 394 * is only allowed up to the initialized_size. 395 * 396 * Further when reading past the initialized size we do not need to do 397 * i/o at all as we can simply clear the buffer and return success. 398 */ 399 lblkno = buf_lblkno(buf); 400 ofs = lblkno << ni->block_size_shift; 401 lck_spin_lock(&ni->size_lock); 402 max_end_io = ni->initialized_size; 403 do_fixup = FALSE; 404 if (b_flags & B_READ) { 405 if (ofs >= max_end_io) { 406 if (max_end_io > ni->data_size) 407 panic("%s() initialized_size > data_size\n", 408 __FUNCTION__); 409 if (ofs < ni->data_size) { 410 lck_spin_unlock(&ni->size_lock); 411 buf_clear(buf); 412 buf_biodone(buf); 413 ntfs_debug("Read past initialized size. " 414 "Clearing buffer."); 415 return 0; 416 } 417 } 418 max_end_io = ni->data_size; 419 do_fixup = TRUE; 420 } 421 lck_spin_unlock(&ni->size_lock); 422 if (ofs >= max_end_io) { 423 /* I/o is out of range. This should never happen. */ 424 ntfs_error(vol->mp, "Trying to %s buffer for $MFT/$DATA which " 425 "is out of range, aborting.", 426 b_flags & B_READ ? "read" : "write"); 427 err = EIO; 428 goto err; 429 } 430 /* 431 * For writes we need to apply the MST fixups before calling 432 * buf_strategy() which will perform the i/o and if the write is for an 433 * mft record that is also in the mft mirror we now need to write it to 434 * the mft mirror as well. 435 * 436 * Note B_WRITE is a pseudo flag and cannot be used for checking thus 437 * check that B_READ is not set which implies it is a write. 438 */ 439 if (!(b_flags & B_READ)) { 440 NTFS_RECORD *rec; 441 NTFS_RECORD_TYPE magic; 442 BOOL need_mirr_sync; 443 444 err = buf_map(buf, (caddr_t*)&rec); 445 if (err) { 446 ntfs_error(vol->mp, "Failed to map buffer (error %d).", 447 err); 448 goto err; 449 } 450 if (!rec) 451 panic("%s(): buf_map() returned NULL.\n", __FUNCTION__); 452#if 0 453 need_mirr_sync = FALSE; 454 if (ni->type == AT_INDEX_ALLOCATION) 455 magic = magic_INDX; 456 else if (ni == mft_ni || ni == vol->mftmirr_ni) { 457 magic = magic_FILE; 458 if (ni == mft_ni) 459 need_mirr_sync = (lblkno < vol->mftmirr_size); 460 } else 461 panic("%s(): Unknown mst protected inode 0x%llx, type " 462 "0x%x, name_len 0x%x.", __FUNCTION__, 463 (unsigned long long)ni->mft_no, 464 (unsigned)le32_to_cpu(ni->type), 465 (unsigned)ni->name_len); 466#else 467 need_mirr_sync = (lblkno < vol->mftmirr_size); 468 magic = magic_FILE; 469#endif 470 /* 471 * Only apply fixups if the record has the correct magic. We 472 * may have detected a multi sector transfer error and are thus 473 * now writing a BAAD record in which case we do not want to 474 * touch its contents. 475 * 476 * Further, if there is an error do not sync the record to the 477 * mft mirror as that may still be intact and we do not want to 478 * overwrite the correct data with corrupt data. 479 */ 480 if (__ntfs_is_magic(rec->magic, magic)) { 481 err = ntfs_mst_fixup_pre_write(rec, ni->block_size); 482 if (err) { 483 /* The record is corrupt, do not write it. */ 484 ntfs_error(vol->mp, "Failed to apply mst " 485 "fixups (mft_no 0x%llx, type " 486 "0x%x, offset 0x%llx).", 487 (unsigned long long)ni->mft_no, 488 (unsigned)le32_to_cpu(ni->type), 489 (unsigned long long)ofs); 490 err = EIO; 491 goto unm_err; 492 } 493 do_fixup = TRUE; 494 if (need_mirr_sync) { 495 /* 496 * Note we continue despite an error as we may 497 * succeed to write the actual mft record. 498 */ 499 err = ntfs_mft_mirror_sync(vol, lblkno, 500 (MFT_RECORD*)rec, 501 !(b_flags & B_ASYNC)); 502 if (err) 503 ntfs_error(vol->mp, "Failed to sync " 504 "mft mirror (error " 505 "%d). Run chkdsk.", 506 err); 507 } 508 } 509 err = buf_unmap(buf); 510 if (err) 511 ntfs_error(vol->mp, "Failed to unmap buffer (error " 512 "%d).", err); 513 } 514 /* 515 * For both reads and writes we need to register our i/o completion 516 * handler which will be called after i/o is complete (including on i/o 517 * failure) and in which we will remove the MST fixups so the buffer in 518 * memory never has MST fixups applied unless it is under i/o in which 519 * case it is BL_BUSY and thus cannot be accessed by anyone so it is 520 * safe to have the MST fixups applied whilst i/o is in flight. 521 */ 522 if (do_fixup) { 523 buf_setfilter(buf, ntfs_buf_iodone, NULL, &old_iodone, 524 &old_transact); 525 if (old_iodone || old_transact) 526 panic("%s(): Buffer for $MFT/$DATA already had an i/o " 527 "completion handler assigned!\n", 528 __FUNCTION__); 529 } 530 /* 531 * Everything is set up. Pass the i/o onto the buffer layer. 532 * 533 * When the i/o is done it will call our i/o completion handler which 534 * will remove the mst fixups. 535 */ 536done: 537 return buf_strategy(vol->dev_vn, a); 538unm_err: 539 err2 = buf_unmap(buf); 540 if (err2) 541 ntfs_error(vol->mp, "Failed to unmap buffer in error code " 542 "path (error %d).", err2); 543err: 544 buf_seterror(buf, err); 545 buf_biodone(buf); 546 return err; 547} 548 549/** 550 * ntfs_vnop_lookup - find a vnode inside an ntfs directory given its name 551 * @a: arguments to lookup function 552 * 553 * @a contains: 554 * vnode_t a_dvp; directory vnode in which to search 555 * vnode_t *a_vpp; destination pointer for the found vnode 556 * struct componentname *a_cnp; name to find in the directory vnode 557 * vfs_context_t a_context; 558 * 559 * In short, ntfs_vnop_lookup() looks for the vnode represented by the name 560 * @a->a_cnp in the directory vnode @a->a_dvp and if found returns the vnode in 561 * *@a->a_vpp. 562 * 563 * Return 0 on success and the error code on error. A return value of ENOENT 564 * does not signify an error as such but merely the fact that the name 565 * @a->a_cnp is not present in the directory @a->a_dvp. When the lookup is 566 * done for purposes of create, including for the destination of a rename, we 567 * return EJUSTRETURNED instead of ENOENT when the name is not found. This 568 * allows the VFS to proceed with the create/rename. 569 * 570 * To simplify matters for us, we do not treat the DOS and WIN32 filenames as 571 * two hard links but instead if the lookup matches a DOS filename, we return 572 * the corresponding WIN32 filename instead. 573 * 574 * There are three cases we need to distinguish here: 575 * 576 * 1) The name perfectly matches (i.e. including case) a directory entry with a 577 * filename in the WIN32 or POSIX namespaces. In this case 578 * ntfs_lookup_inode_by_name() will return with name set to NULL and we 579 * just use the name as supplied in @a->a_cnp. 580 * 2) The name matches (not including case) a directory entry with a filename 581 * in the WIN32 or POSIX namespaces. In this case 582 * ntfs_lookup_inode_by_name() will return with name set to point to an 583 * allocated ntfs_dir_lookup_name structure containing the properly cased 584 * little endian Unicode name. We convert the name to decomposed UTF-8 and 585 * use that name. 586 * 3) The name matches either perfectly or not (i.e. we do not care about case) 587 * a directory entry with a filename in the DOS namespace. In this case 588 * ntfs_lookup_inode_by_name() will return with name set to point to an 589 * allocated ntfs_dir_lookup_name structure which just tells us that the 590 * name is in the DOS namespace. We read the inode and find the filename in 591 * the WIN32 namespace corresponding to the matched DOS name. We then 592 * convert the name to decomposed UTF-8 and use that name to update the 593 * vnode identity with. 594 */ 595static int ntfs_vnop_lookup(struct vnop_lookup_args *a) 596{ 597 MFT_REF mref; 598 ino64_t mft_no; 599 unsigned long op; 600 struct componentname *name_cn, *cn; 601 ntfs_inode *ni, *dir_ni = NTFS_I(a->a_dvp); 602 vnode_t vn; 603 ntfs_volume *vol; 604 ntfschar *ntfs_name; 605 ntfs_dir_lookup_name *name = NULL; 606 u8 *utf8_name = NULL; 607 size_t ntfs_name_size, utf8_size; 608 signed ntfs_name_len; 609 int err; 610 /* 611 * This is rather gross but several other file systems do it so perhaps 612 * the large stack (16kiB I believe) in the OS X kernel is big enough. 613 * If we do not want to do the static allocation then simply set 614 * ntfs_name to NULL and utf8_to_ntfs() will allocate the memory for 615 * us. (We then have to free it, see utf8_to_ntfs() description for 616 * details.) 617 */ 618 ntfschar ntfs_name_buf[NTFS_MAX_NAME_LEN]; 619 struct componentname cn_buf; 620#ifdef DEBUG 621 static const char *ops[4] = { "LOOKUP", "CREATE", "DELETE", "RENAME" }; 622#endif 623 624 if (!dir_ni) { 625 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 626 return EINVAL; 627 } 628 vol = dir_ni->vol; 629 name_cn = cn = a->a_cnp; 630 op = cn->cn_nameiop; 631 ntfs_debug("Looking up %.*s in directory inode 0x%llx for %s, flags " 632 "0x%lx.", (int)cn->cn_namelen, cn->cn_nameptr, 633 (unsigned long long)dir_ni->mft_no, 634 op < 4 ? ops[op] : "UNKNOWN", 635 (unsigned long)cn->cn_flags); 636 /* 637 * Ensure we are being called for a directory in case we are not being 638 * called from the VFS. 639 */ 640 if (!S_ISDIR(dir_ni->mode)) { 641 ntfs_error(vol->mp, "Not a directory."); 642 return ENOTDIR; 643 } 644 lck_rw_lock_shared(&dir_ni->lock); 645 /* Do not allow messing with the inode once it has been deleted. */ 646 if (NInoDeleted(dir_ni)) { 647 /* Remove the inode from the name cache. */ 648 cache_purge(dir_ni->vn); 649 lck_rw_unlock_shared(&dir_ni->lock); 650 ntfs_debug("Parent directory is deleted."); 651 return ENOENT; 652 } 653 /* 654 * First, look for the name in the name cache. cache_lookup() returns 655 * -1 if found and @vn is set to the vnode, ENOENT if found and it is a 656 * negative entry thus @vn is not set to anything, or 0 if the lookup 657 * failed in which case we need to do a file system based lookup. 658 * 659 * Note that if @op is CREATE and there is a negative entry in the name 660 * cache cache_lookup() will discard that name and return 0, i.e. the 661 * lookup failed. In this case we will automatically fall through and 662 * do the right thing during the real lookup. 663 */ 664 err = cache_lookup(dir_ni->vn, &vn, cn); 665 if (err) { 666 if (err == -1) { 667 ni = NTFS_I(vn); 668 lck_rw_lock_shared(&ni->lock); 669 /* 670 * Do not allow messing with the inode once it has been 671 * deleted. 672 */ 673 if (!NInoDeleted(ni)) { 674 lck_rw_unlock_shared(&ni->lock); 675 lck_rw_unlock_shared(&dir_ni->lock); 676 *a->a_vpp = vn; 677 ntfs_debug("Done (cached)."); 678 return 0; 679 } 680 lck_rw_unlock_shared(&ni->lock); 681 /* Remove the inode from the name cache. */ 682 cache_purge(vn); 683 vnode_put(vn); 684 ntfs_warning(vol->mp, "Cached but deleted vnode " 685 "found, purged from cache and doing " 686 "real lookup."); 687 } else { 688 lck_rw_unlock_shared(&dir_ni->lock); 689 if (err == ENOENT) { 690 ntfs_debug("Done (cached, negative)."); 691 return err; 692 } 693 ntfs_error(vol->mp, "cache_lookup() failed (error " 694 "%d).", err); 695 return err; 696 } 697 } 698 /* We special case "." and ".." as they are emulated on NTFS. */ 699 if (cn->cn_namelen == 1 && cn->cn_nameptr[0] == '.') { 700 /* "." is not cached. */ 701 cn->cn_flags &= ~MAKEENTRY; 702 if (op == RENAME) { 703 lck_rw_unlock_shared(&dir_ni->lock); 704 ntfs_debug("Op is RENAME but name is \".\", returning " 705 "EISDIR."); 706 return EISDIR; 707 } 708 err = vnode_get(dir_ni->vn); 709 lck_rw_unlock_shared(&dir_ni->lock); 710 if (err) { 711 ntfs_error(vol->mp, "Failed to get iocount reference " 712 "on current directory (error %d).", 713 err); 714 return err; 715 } 716 ntfs_debug("Got \".\" directory 0x%llx.", 717 (unsigned long long)dir_ni->mft_no); 718 *a->a_vpp = dir_ni->vn; 719 return 0; 720 } else if (cn->cn_flags & ISDOTDOT) { 721 /* ".." is not cached. */ 722 cn->cn_flags &= ~MAKEENTRY; 723 vn = vnode_getparent(dir_ni->vn); 724 if (vn) { 725 lck_rw_unlock_shared(&dir_ni->lock); 726 ntfs_debug("Got \"..\" directory 0x%llx of directory " 727 "0x%llx.", 728 (unsigned long long)NTFS_I(vn)->mft_no, 729 (unsigned long long)dir_ni->mft_no); 730 *a->a_vpp = vn; 731 return 0; 732 } 733 /* 734 * Look up a filename attribute in the mft record of the 735 * directory @dir_ni and use its parent mft reference to run an 736 * ntfs_inode_get() on it to obtain an inode for "..". 737 */ 738 err = ntfs_inode_get_name_and_parent_mref(dir_ni, FALSE, &mref, 739 NULL); 740 lck_rw_unlock_shared(&dir_ni->lock); 741 if (err) { 742 ntfs_error(vol->mp, "Failed to obtain parent mft " 743 "reference for directory 0x%llx " 744 "(error %d).", 745 (unsigned long long)dir_ni->mft_no, 746 err); 747 return err; 748 } 749 mft_no = MREF(mref); 750 err = ntfs_inode_get(vol, mft_no, FALSE, LCK_RW_TYPE_SHARED, 751 &ni, NULL, NULL); 752 if (err) { 753 ntfs_error(vol->mp, "Failed to obtain parent inode " 754 "0x%llx for directory 0x%llx (error " 755 "%d).", (unsigned long long)mft_no, 756 (unsigned long long)dir_ni->mft_no, 757 err); 758 return err; 759 } 760 /* Consistency check. */ 761 if (MSEQNO(mref) != ni->seq_no) { 762 lck_rw_unlock_shared(&ni->lock); 763 (void)vnode_put(ni->vn); 764 ntfs_error(vol->mp, "Found stale parent mft reference " 765 "in filename of directory 0x%llx. " 766 "Volume is corrupt. Run chkdsk.", 767 (unsigned long long)dir_ni->mft_no); 768 return EIO; 769 } 770 if (!S_ISDIR(ni->mode)) { 771 lck_rw_unlock_shared(&ni->lock); 772 (void)vnode_put(ni->vn); 773 ntfs_error(vol->mp, "Found non-directory parent for " 774 "filename of directory 0x%llx. " 775 "Volume is corrupt. Run chkdsk.", 776 (unsigned long long)dir_ni->mft_no); 777 return EIO; 778 } 779 ntfs_debug("Got \"..\" directory 0x%llx of directory 0x%llx.", 780 (unsigned long long)mft_no, 781 (unsigned long long)dir_ni->mft_no); 782 *a->a_vpp = ni->vn; 783 lck_rw_unlock_shared(&ni->lock); 784 return 0; 785 } 786 /* Convert the name from utf8 to Unicode. */ 787 ntfs_name = ntfs_name_buf; 788 ntfs_name_size = sizeof(ntfs_name_buf); 789 ntfs_name_len = utf8_to_ntfs(vol, (u8*)cn->cn_nameptr, cn->cn_namelen, 790 &ntfs_name, &ntfs_name_size); 791 if (ntfs_name_len < 0) { 792 lck_rw_unlock_shared(&dir_ni->lock); 793 err = -ntfs_name_len; 794 if (err == ENAMETOOLONG) 795 ntfs_debug("Failed (name is too long)."); 796 else 797 ntfs_error(vol->mp, "Failed to convert name to " 798 "Unicode (error %d).", err); 799 return err; 800 } 801 /* Look up the converted name in the directory index. */ 802 err = ntfs_lookup_inode_by_name(dir_ni, ntfs_name, ntfs_name_len, 803 &mref, &name); 804 if (err) { 805 lck_rw_unlock_shared(&dir_ni->lock); 806 if (err != ENOENT) { 807 ntfs_error(vol->mp, "Failed to find name in directory " 808 "(error %d).", err); 809 return err; 810 } 811not_found: 812 /* 813 * The name does not exist in the directory @dir_ni. 814 * 815 * If creating (or renaming and the name is the destination 816 * name) and we are at the end of a pathname we can consider 817 * allowing the file to be created so return EJUSTRETURN 818 * instead of ENOENT. 819 */ 820 if (cn->cn_flags & ISLASTCN && (op == CREATE || op == RENAME)) { 821 ntfs_debug("Done (not found but for CREATE or RENAME, " 822 "returning EJUSTRETURN)."); 823 return EJUSTRETURN; 824 } 825 /* 826 * Insert a negative entry into the name cache if caching of 827 * this name is desired unless this is a create operation in 828 * which case we do not want to do that. 829 */ 830 if (cn->cn_flags & MAKEENTRY && op != CREATE) 831 cache_enter(dir_ni->vn, NULL, cn); 832 /* 833 * Prevent the caller from trying to add the name to the cache 834 * as well. 835 */ 836 cn->cn_flags &= ~MAKEENTRY; 837 ntfs_debug("Done (not found%s).", cn->cn_flags & MAKEENTRY ? 838 "adding negative name cache entry" : ""); 839 return err; 840 } 841 /* The lookup succeeded. */ 842 mft_no = MREF(mref); 843 ntfs_debug("Name matches inode number 0x%llx.", 844 (unsigned long long)mft_no); 845 /* 846 * Remove all NTFS core system files from the name space so we do not 847 * need to worry about users damaging a volume by writing to them or 848 * deleting/renaming them and so that we can return fsRtParID (1) as 849 * the inode number of the parent of the volume root directory and 850 * fsRtDirID (2) as the inode number of the volume root directory which 851 * are both expected by Carbon and various applications. 852 */ 853 if (mft_no < FILE_first_user) { 854 lck_rw_unlock_shared(&dir_ni->lock); 855 if (name) 856 OSFree(name, sizeof(*name), ntfs_malloc_tag); 857 ntfs_debug("Removing core NTFS system file (mft_no 0x%x) " 858 "from name space.", (unsigned)mft_no); 859 err = ENOENT; 860 goto not_found; 861 } 862 /* 863 * If the name is at the end of a pathname and is about to be deleted 864 * either directly or as a consequence of a rename with the name as the 865 * target, do not cache it. 866 */ 867 if (cn->cn_flags & ISLASTCN && (op == DELETE || op == RENAME)) 868 cn->cn_flags &= ~MAKEENTRY; 869 /* 870 * If a name was returned from the lookup and it is in the POSIX or 871 * WIN32 namespaces we need to convert it into a componentname so we 872 * can use it instead of the existing componentname @cn when getting 873 * the inode. 874 * 875 * If the returned name is in the DOS namespace we have to get the 876 * inode without a name as we need the inode in order to be able to 877 * find the WIN32 name corresponding to the DOS name. Once we have the 878 * name we will update the vnode identity with it. 879 * 880 * If no name was returned, the match was perfect and we just use the 881 * componentname that was passed in by the caller. 882 */ 883 if (name) { 884 if (name->type == FILENAME_DOS) { 885 name_cn = NULL; 886 /* 887 * We do not need @name any more but do not set it to 888 * NULL because we use that fact to distinguish between 889 * the DOS and WIN32/POSIX cases. 890 */ 891 OSFree(name, sizeof(*name), ntfs_malloc_tag); 892 } else { 893 signed res_size; 894 895 res_size = ntfs_to_utf8(vol, name->name, name->len << 896 NTFSCHAR_SIZE_SHIFT, &utf8_name, 897 &utf8_size); 898 OSFree(name, sizeof(*name), ntfs_malloc_tag); 899 if (res_size < 0) { 900 lck_rw_unlock_shared(&dir_ni->lock); 901 /* Failed to convert name. */ 902 err = -res_size; 903 ntfs_error(vol->mp, "Failed to convert inode " 904 "name to decomposed UTF-8 " 905 "(error %d).", err); 906 return err; 907 } 908 name = NULL; 909 cn_buf = (struct componentname) { 910 .cn_flags = cn->cn_flags, 911 .cn_nameptr = (char*)utf8_name, 912 .cn_namelen = res_size, 913 }; 914 name_cn = &cn_buf; 915 } 916 } 917 /* 918 * @name_cn now contains the correct name of the inode or is NULL. 919 * 920 * If @name_cn is not NULL and its cn_flags indicate that the name is 921 * to be entered into the name cache, ntfs_inode_get() will do this and 922 * clear the MAKEENTRY bit in the cn_flags. 923 * 924 * Note we only drop the directory lock after obtaining the inode 925 * otherwise someone could delete it under our feet. 926 */ 927 err = ntfs_inode_get(vol, mft_no, FALSE, LCK_RW_TYPE_SHARED, &ni, 928 dir_ni->vn, name_cn); 929 lck_rw_unlock_shared(&dir_ni->lock); 930 if (name_cn == &cn_buf) { 931 /* Pick up any modifications to the cn_flags. */ 932 cn->cn_flags = cn_buf.cn_flags; 933 OSFree(utf8_name, utf8_size, ntfs_malloc_tag); 934 } 935 if (!err) { 936 /* Consistency check. */ 937 // FIXME: I cannot remember why we need the "mft_no != 938 // FILE_MFT" test... 939 if (MSEQNO(mref) != ni->seq_no && mft_no != FILE_MFT) { 940 lck_rw_unlock_shared(&ni->lock); 941 (void)vnode_put(ni->vn); 942 ntfs_debug("Inode was deleted and reused under our " 943 "feet."); 944 err = ENOENT; 945 goto not_found; 946 } 947 /* 948 * We found it. Before we can return it, we have to check if 949 * returning this inode is a valid response to the requested 950 * lookup. To be more specific, if the lookup was for an 951 * intermediate path component and the inode is not a directory 952 * or symbolic link, it is not a valid response because it 953 * cannot be part of an intermediate path component. In that 954 * case return an error. 955 */ 956 if (cn->cn_flags & ISLASTCN || S_ISDIR(ni->mode) || 957 S_ISLNK(ni->mode)) { 958 /* 959 * Perfect WIN32/POSIX match or wrong case WIN32/POSIX 960 * match, i.e. cases 1 and 2, respectively. 961 */ 962 if (!name) { 963 *a->a_vpp = ni->vn; 964 ntfs_debug("Done (case %d).", 965 name_cn == &cn_buf ? 2 : 1); 966 lck_rw_unlock_shared(&ni->lock); 967 return 0; 968 } 969 /* 970 * We are too indented. Handle DOS matches further 971 * below. 972 */ 973 goto handle_dos_name; 974 } 975 lck_rw_unlock_shared(&ni->lock); 976 (void)vnode_put(ni->vn); 977 ntfs_debug("Done (intermediate path component requested but " 978 "found inode is not a directory or symbolic " 979 "link, returning ENOTDIR)."); 980 err = ENOTDIR; 981 } else { 982 if (err == ENOENT) { 983 ntfs_debug("Inode was deleted under our feet."); 984 goto not_found; 985 } 986 ntfs_error(vol->mp, "Failed to get inode 0x%llx (error %d).", 987 (unsigned long long)mft_no, err); 988 } 989 return err; 990 // TODO: Consider moving this lot to a separate function. 991handle_dos_name: 992 { 993 MFT_RECORD *m; 994 ntfs_attr_search_ctx *ctx; 995 FILENAME_ATTR *fn; 996 const char *old_name; 997 signed res_size; 998 999 vn = ni->vn; 1000 /* 1001 * DOS match. -- Case 3. 1002 * 1003 * Find the WIN32 name corresponding to the matched DOS name. 1004 * 1005 * At present @ni is guaranteed to be a base inode. 1006 */ 1007 err = ntfs_mft_record_map(ni, &m); 1008 if (err) { 1009 ntfs_error(vol->mp, "Failed to map mft record (error %d).", 1010 err); 1011 goto err; 1012 } 1013 ctx = ntfs_attr_search_ctx_get(ni, m); 1014 if (!ctx) { 1015 ntfs_error(vol->mp, "Failed to allocate search context."); 1016 err = ENOMEM; 1017 goto unm_err; 1018 } 1019 do { 1020 ATTR_RECORD *attr; 1021 u32 val_len; 1022 u16 val_ofs; 1023 1024 err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, NULL, 0, 1025 ctx); 1026 if (err) { 1027 if (err == ENOENT) { 1028 ntfs_error(vol->mp, "WIN32 namespace name is " 1029 "missing from inode. Run " 1030 "chkdsk."); 1031 err = EIO; 1032 } else 1033 ntfs_error(vol->mp, "Failed to find WIN32 " 1034 "namespace name in inode " 1035 "(error %d).", err); 1036 goto put_err; 1037 } 1038 /* Consistency checks. */ 1039 attr = ctx->a; 1040 if (attr->non_resident || attr->flags) 1041 goto attr_err; 1042 val_len = le32_to_cpu(attr->value_length); 1043 val_ofs = le16_to_cpu(attr->value_offset); 1044 if (val_ofs + val_len > le32_to_cpu(attr->length)) 1045 goto attr_err; 1046 fn = (FILENAME_ATTR*)((u8*)attr + val_ofs); 1047 if ((u32)(sizeof(FILENAME_ATTR) + (fn->filename_length << 1048 NTFSCHAR_SIZE_SHIFT)) > val_len) 1049 goto attr_err; 1050 } while (fn->filename_type != FILENAME_WIN32); 1051 /* Convert the name to decomposed UTF-8. */ 1052 res_size = ntfs_to_utf8(vol, fn->filename, fn->filename_length << 1053 NTFSCHAR_SIZE_SHIFT, &utf8_name, &utf8_size); 1054 ntfs_attr_search_ctx_put(ctx); 1055 ntfs_mft_record_unmap(ni); 1056 if (res_size < 0) { 1057 /* Failed to convert name. */ 1058 err = -res_size; 1059 ntfs_error(vol->mp, "Failed to convert inode name to " 1060 "decomposed UTF-8 (error %d).", err); 1061 goto err; 1062 } 1063 /* Update the vnode with the new name if it differs from the old one. */ 1064 old_name = vnode_getname(vn); 1065 if (!old_name || (ni->link_count > 1 && ((long)strlen(old_name) != 1066 res_size || bcmp(old_name, utf8_name, res_size)))) { 1067 vnode_update_identity(vn, NULL, (char*)utf8_name, res_size, 0, 1068 VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE); 1069 } 1070 if (old_name) 1071 vnode_putname(old_name); 1072 /* 1073 * Enter the name into the cache (if it is already there this is a 1074 * no-op) and prevent the caller from trying to add the name to the 1075 * cache as well. 1076 */ 1077 cn_buf = (struct componentname) { 1078 .cn_flags = cn->cn_flags, 1079 .cn_nameptr = (char*)utf8_name, 1080 .cn_namelen = res_size, 1081 }; 1082 cache_enter(dir_ni->vn, vn, &cn_buf); 1083 cn->cn_flags &= ~MAKEENTRY; 1084 OSFree(utf8_name, utf8_size, ntfs_malloc_tag); 1085 *a->a_vpp = ni->vn; 1086 lck_rw_unlock_shared(&ni->lock); 1087 ntfs_debug("Done (case 3)."); 1088 return 0; 1089attr_err: 1090 ntfs_error(vol->mp, "Filename attribute is corrupt. Run chkdsk."); 1091 err = EIO; 1092put_err: 1093 ntfs_attr_search_ctx_put(ctx); 1094unm_err: 1095 ntfs_mft_record_unmap(ni); 1096err: 1097 lck_rw_unlock_shared(&ni->lock); 1098 (void)vnode_put(vn); 1099 return err; 1100 } 1101} 1102 1103// TODO: Rename to ntfs_inode_create and move to ntfs_inode.[hc]? 1104/** 1105 * ntfs_create - create an inode on an ntfs volume 1106 * @dir_vn: vnode of directory in which to create the new inode 1107 * @vn: destination pointer for the vnode of the created inode 1108 * @cn: componentname specifying name of the inode to create 1109 * @va: vnode attributes to assign to the new inode 1110 * @lock: if true the ntfs inode of the returned vnode *@vn is locked 1111 * 1112 * Create an inode with name as specified in @cn in the directory specified by 1113 * the vnode @dir_vn. Assign the attributes @va to the created inode. Finally 1114 * return the vnode of the created inode in *@vn. 1115 * 1116 * @va is used to determine which type of inode is to be created, i.e. if 1117 * @va->va_type if VDIR create a directory, etc. 1118 * 1119 * If @lock is true the ntfs inode of the returned vnode is locked for writing 1120 * (NTFS_I(@vn)->lock). 1121 * 1122 * Called by the various inode creation ntfs functions (ntfs_vnop_create(), 1123 * ntfs_vnop_mkdir(), ntfs_vnop_symlink(), ntfs_vnop_mknod(), etc) which are 1124 * called by the VFS. 1125 * 1126 * Return 0 on success and errno on error. 1127 * 1128 * Note we always create inode names in the POSIX namespace. 1129 */ 1130static errno_t ntfs_create(vnode_t dir_vn, vnode_t *vn, 1131 struct componentname *cn, struct vnode_attr *va, 1132 const BOOL lock) 1133{ 1134 ntfs_inode *ni, *dir_ni = NTFS_I(dir_vn); 1135 ntfs_volume *vol; 1136 FILENAME_ATTR *fn; 1137 ntfschar *ntfs_name; 1138 MFT_RECORD *m; 1139 ATTR_RECORD *a; 1140 size_t ntfs_name_size; 1141 signed ntfs_name_len; 1142 unsigned fn_alloc, fn_size; 1143 errno_t err, err2; 1144 1145 if (!dir_ni) { 1146 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 1147 return EINVAL; 1148 } 1149 vol = dir_ni->vol; 1150 if (!S_ISDIR(dir_ni->mode)) { 1151 ntfs_debug("Parent inode is not a directory, returning " 1152 "ENOTDIR."); 1153 return ENOTDIR; 1154 } 1155 if (dir_ni->file_attributes & FILE_ATTR_REPARSE_POINT) { 1156 ntfs_error(vol->mp, "Parent inode is a reparse point and not " 1157 "a regular directory, returning ENOTSUP."); 1158 return ENOTDIR; 1159 } 1160 /* 1161 * Create a temporary copy of the filename attribute so we can release 1162 * the mft record before we add the directory entry. This is needed 1163 * because when we hold the mft record for the newly created inode and 1164 * we call ntfs_dir_entry_add() this would cause the mft record for the 1165 * directory to be mapped which would result in a deadlock in the event 1166 * that both mft records are in the same page. 1167 */ 1168 fn_alloc = sizeof(FILENAME_ATTR) + NTFS_MAX_NAME_LEN * sizeof(ntfschar); 1169 fn = OSMalloc(fn_alloc, ntfs_malloc_tag); 1170 if (!fn) { 1171 ntfs_error(vol->mp, "Failed to allocate memory for temporary " 1172 "filename attribute."); 1173 return ENOMEM; 1174 } 1175 bzero(fn, fn_alloc); 1176 /* Begin setting up the temporary filename attribute. */ 1177 fn->parent_directory = MK_LE_MREF(dir_ni->mft_no, dir_ni->seq_no); 1178 /* FILENAME_POSIX is zero and the attribute is already zeroed. */ 1179 /* fn->filename_type = FILENAME_POSIX; */ 1180 /* Convert the name from utf8 to Unicode. */ 1181 ntfs_name = fn->filename; 1182 ntfs_name_size = NTFS_MAX_NAME_LEN * sizeof(ntfschar); 1183 ntfs_name_len = utf8_to_ntfs(vol, (u8*)cn->cn_nameptr, cn->cn_namelen, 1184 &ntfs_name, &ntfs_name_size); 1185 if (ntfs_name_len < 0) { 1186 err = -ntfs_name_len; 1187 if (err == ENAMETOOLONG) 1188 ntfs_debug("Failed (name is too long)."); 1189 else 1190 ntfs_error(vol->mp, "Failed to convert name to " 1191 "Unicode (error %d).", err); 1192 goto err; 1193 } 1194 /* Set the filename length in the temporary filename attribute. */ 1195 fn->filename_length = ntfs_name_len; 1196 fn_size = sizeof(FILENAME_ATTR) + ntfs_name_len * sizeof(ntfschar); 1197 /* If no vnode type is specified default to VREG, i.e. regular file. */ 1198 if (va->va_type == VNON) 1199 va->va_type = VREG; 1200 /* 1201 * We support regular files, directories, symbolic links, sockets, 1202 * fifos, and block and character device special filesr. 1203 */ 1204 switch (va->va_type) { 1205 case VBLK: 1206 case VCHR: 1207 if (!VATTR_IS_ACTIVE(va, va_rdev)) { 1208 ntfs_error(vol->mp, "va_type is %s but va_rdev is not " 1209 "specified!", va->va_type == VBLK ? 1210 "VBLK" : "VCHR"); 1211 err = EINVAL; 1212 goto err; 1213 } 1214 case VREG: 1215 case VDIR: 1216 case VLNK: 1217 case VSOCK: 1218 case VFIFO: 1219 break; 1220 default: 1221 ntfs_error(vol->mp, "Tried to create inode of type 0x%x which " 1222 "is not supported at present.", va->va_type); 1223 err = ENOTSUP; 1224 goto err; 1225 } 1226 va->va_mode |= VTTOIF(va->va_type); 1227 /* If no create time is supplied default it to the current time. */ 1228 if (!VATTR_IS_ACTIVE(va, va_create_time)) 1229 nanotime(&va->va_create_time); 1230 /* 1231 * Round the time down to the nearest 100-nano-second interval as 1232 * needed for NTFS. 1233 */ 1234 va->va_create_time.tv_nsec -= va->va_create_time.tv_nsec % 100; 1235 /* Set the times in the temporary filename attribute. */ 1236 fn->last_access_time = fn->last_mft_change_time = 1237 fn->last_data_change_time = fn->creation_time = 1238 utc2ntfs(va->va_create_time); 1239 /* Set the bits for all the supported fields at once. */ 1240 va->va_supported |= 1241 VNODE_ATTR_BIT(va_mode) | 1242 VNODE_ATTR_BIT(va_flags) | 1243 VNODE_ATTR_BIT(va_create_time) | 1244 VNODE_ATTR_BIT(va_type); 1245again: 1246 /* Lock the target directory and check that it has not been deleted. */ 1247 lck_rw_lock_exclusive(&dir_ni->lock); 1248 if (!dir_ni->link_count) { 1249 /* Remove the target directory from the name cache. */ 1250 cache_purge(dir_vn); 1251 err = ENOENT; 1252 goto unl_err; 1253 } 1254 /* Allocate and map a new mft record. */ 1255 err = ntfs_mft_record_alloc(vol, va, cn, dir_ni, &ni, &m, &a); 1256 if (err) { 1257 if (err != ENOSPC) 1258 ntfs_error(vol->mp, "Failed to allocate a new on-disk " 1259 "inode (error %d).", err); 1260 goto unl_err; 1261 } 1262 /* 1263 * If requested by the caller, take the ntfs inode lock on the 1264 * allocated ntfs inode for writing so no-one can start using it before 1265 * it is ready. For example if it is a symbolic link we cannot allow 1266 * anyone to look at it until we have set the data size to the symbolic 1267 * link target size otherwise a concurrent ntfs_vnop_readlink() would 1268 * return EINVAL as it would see a target size of zero. 1269 * 1270 * Also, if the inode is a symbolic link we need to take the lock so 1271 * that we can create the AFP_AfpInfo attribute when we have finished 1272 * setting up the inode. 1273 */ 1274 if (lock || S_ISLNK(ni->mode)) 1275 lck_rw_lock_exclusive(&ni->lock); 1276 /* 1277 * @a now points to the location in the allocated mft record at which 1278 * we need to insert the filename attribute so we can insert it without 1279 * having to do a lookup first. 1280 * 1281 * Insert the filename attribute and initialize the value to zero. 1282 * This cannot fail as we are dealing with a newly allocated mft record 1283 * so there must be enough space for a filename attribute even if the 1284 * filename is of the maximum allowed length. 1285 */ 1286 err = ntfs_resident_attr_record_insert_internal(m, a, AT_FILENAME, 1287 NULL, 0, fn_size); 1288 if (err) 1289 panic("%s(): err\n", __FUNCTION__); 1290 /* Finish setting up the filename attribute value. */ 1291 fn->file_attributes = ni->file_attributes; 1292 /* 1293 * Directories need the FILE_ATTR_DUP_FILENAME_INDEX_PRESENT flag set 1294 * in their filename attributes both in their mft records and in the 1295 * index entries pointing to them but not in the standard information 1296 * attribute which is why it is not set in @ni->file_attributes. 1297 */ 1298 if (va->va_type == VDIR) 1299 fn->file_attributes |= FILE_ATTR_DUP_FILENAME_INDEX_PRESENT; 1300 /* 1301 * Update the data_size in the temporary filename attribute from the 1302 * created ntfs inode. This will not be zero for fifos and block and 1303 * character device special files for example. 1304 */ 1305 fn->data_size = ni->data_size; 1306 /* 1307 * Copy the created filename attribute into place in the attribute 1308 * record. 1309 */ 1310 memcpy((u8*)a + le16_to_cpu(a->value_offset), fn, fn_size); 1311 /* 1312 * Set the link count to one to indicate there is one filename 1313 * attribute inside the mft record. 1314 */ 1315 m->link_count = const_cpu_to_le16(1); 1316 ni->link_count = 1; 1317 /* 1318 * Ensure the mft record is written to disk. 1319 * 1320 * Note we do not set any of the NInoDirty*() flags because we have 1321 * just created the inode thus all the fields are in sync between the 1322 * ntfs_inode @ni and its mft record @m. 1323 */ 1324 NInoSetMrecNeedsDirtying(ni); 1325 /* 1326 * Release the mft record. It is safe to do so even though the 1327 * directory entry has not been added yet because the inode is still 1328 * locked and marked new thus it is not a candidate for syncing yet. 1329 */ 1330 ntfs_mft_record_unmap(ni); 1331 /* 1332 * If the inode is a symbolic link now create the AFP_AfpInfo attribute 1333 * with the Finder Info specifying that this is a symbolic link. 1334 */ 1335 if (S_ISLNK(ni->mode)) { 1336 err = ntfs_inode_afpinfo_write(ni); 1337 /* 1338 * If the caller has not requested that the inode be returned 1339 * locked unlock it now. 1340 */ 1341 if (!lock) 1342 lck_rw_unlock_exclusive(&ni->lock); 1343 if (err) { 1344 ntfs_error(vol->mp, "Failed to create AFP_AfpInfo " 1345 "attribute in allocated inode 0x%llx " 1346 "(error %d).", 1347 (unsigned long long)ni->mft_no, err); 1348 goto rm_err; 1349 } 1350 } 1351 /* Add the created filename attribute to the parent directory index. */ 1352 err = ntfs_dir_entry_add(dir_ni, fn, fn_size, 1353 MK_LE_MREF(ni->mft_no, ni->seq_no)); 1354 if (!err) { 1355 /* Free the temporary filename attribute. */ 1356 OSFree(fn, fn_alloc, ntfs_malloc_tag); 1357 /* 1358 * Invalidate negative cache entries in the directory. We need 1359 * to do this because there may be negative cache entries 1360 * which would match the name of the just created inode but in 1361 * a different case. Such negative cache entries would now be 1362 * incorrect thus we need to throw away all negative cache 1363 * entries to ensure there cannot be any incorrectly negative 1364 * entries in the name cache. 1365 */ 1366 cache_purge_negatives(dir_vn); 1367 /* 1368 * Add the inode to the name cache. Note that 1369 * ntfs_vnop_lookup() will have caused the name to not be 1370 * cached because it will have cleared the MAKEENTRY flag. 1371 */ 1372 cache_enter(dir_ni->vn, ni->vn, cn); 1373 /* We are done with the directory so unlock it. */ 1374 lck_rw_unlock_exclusive(&dir_ni->lock); 1375 /* 1376 * We can finally unlock and unmark as new the new ntfs inode 1377 * thus rendering the inode a full member of society. 1378 */ 1379 ntfs_inode_unlock_alloc(ni); 1380 ntfs_debug("Done (new mft_no 0x%llx).", 1381 (unsigned long long)ni->mft_no); 1382 *vn = ni->vn; 1383 return 0; 1384 } 1385 /* 1386 * We failed to add the directory entry thus we have to effectively 1387 * delete the created inode again. To do this we need to map the mft 1388 * record and mark it as no longer in use. 1389 * 1390 * We then also need to set the link count in the ntfs inode to zero to 1391 * reflect that it is deleted and to ensure that the subsequent 1392 * vnode_put() results in ntfs_delete_inode() being called (via 1393 * VNOP_INACTIVE() and ntfs_vnop_inactive() respectively). 1394 * 1395 * But first, unlock the allocated ntfs inode if we locked it above. 1396 * No-one can get to it now as it does not have a directory entry 1397 * pointing to it. 1398 */ 1399rm_err: 1400 if (lock) 1401 lck_rw_unlock_exclusive(&ni->lock); 1402 err2 = ntfs_mft_record_map(ni, &m); 1403 if (err2) { 1404 ntfs_error(vol->mp, "Failed to map mft record in error code " 1405 "path (error %d). Run chkdsk to recover the " 1406 "lost mft record.", err2); 1407 NVolSetErrors(vol); 1408 } else { 1409 m->flags &= ~MFT_RECORD_IN_USE; 1410 NInoSetMrecNeedsDirtying(ni); 1411 ntfs_mft_record_unmap(ni); 1412 } 1413 ni->link_count = 0; 1414 lck_rw_unlock_exclusive(&dir_ni->lock); 1415 ntfs_inode_unlock_alloc(ni); 1416 cache_purge(ni->vn); 1417 (void)vnode_put(ni->vn); 1418 if (err == EEXIST) { 1419 /* 1420 * There are two possible reasons why the directory entry 1421 * already exists. Either someone created it under our feet in 1422 * which case we try to look up the existing vnode and retrn 1423 * that instead and failing that we try to create the inode 1424 * again or the name really does exist but we have removed it 1425 * from the name space thus ntfs_vnop_lookup() will always 1426 * return ENOENT/EJUSTRETURN for it. This is the case for the 1427 * core system files for example. This would cause an infinite 1428 * loop thus we need to check for this case by checking that 1429 * the name being created does not match one of the core system 1430 * filenames and if it does we return EEXIST. 1431 */ 1432 if (dir_ni == vol->root_ni) { 1433 /* Catch the "." entry. */ 1434 if (cn->cn_namelen == 1 && cn->cn_nameptr[0] == '.') 1435 goto is_system; 1436 /* 1437 * Catch the core system files which all start with the 1438 * '$' character. 1439 */ 1440 if (cn->cn_nameptr[0] == '$') { 1441 char *n = (char*)cn->cn_nameptr + 1; 1442 int l = cn->cn_namelen; 1443 1444 if ((l == 4 && !strncmp(n, "MFT", 3)) || 1445 (l == 5 && !strncmp(n, "Boot", 1446 4)) || 1447 (l == 6 && !strncmp(n, "Quota", 1448 5)) || 1449 (l == 7 && ( 1450 !strncmp(n, "Volume", 6) || 1451 !strncmp(n, "Bitmap", 6) || 1452 !strncmp(n, "Secure", 6) || 1453 !strncmp(n, "UpCase", 6) || 1454 !strncmp(n, "Extend", 6))) || 1455 (l == 8 && ( 1456 !strncmp(n, "MFTMirr", 7) || 1457 !strncmp(n, "LogFile", 7) || 1458 !strncmp(n, "AttrDef", 7) || 1459 !strncmp(n, "BadClus", 7)))) 1460 goto is_system; 1461 } 1462 } 1463 ntfs_debug("Inode was created under our feet."); 1464 /* 1465 * If the inode was created under our feet, we are creating a 1466 * regular file, and the caller did not want an exclusive 1467 * create, simply look up the inode and return that. 1468 */ 1469 if (va->va_type == VREG && !(va->va_vaflags & VA_EXCLUSIVE)) { 1470 struct vnop_lookup_args la; 1471 1472 cn->cn_nameiop = LOOKUP; 1473 la = (struct vnop_lookup_args) { 1474 .a_desc = &vnop_lookup_desc, 1475 .a_dvp = dir_vn, 1476 .a_vpp = vn, 1477 .a_cnp = cn, 1478 }; 1479 err = ntfs_vnop_lookup(&la); 1480 cn->cn_nameiop = CREATE; 1481 /* 1482 * If the inode that was created under our feet was 1483 * also deleted under our feet, repeat the whole 1484 * process. 1485 */ 1486 if (err == ENOENT || err == EJUSTRETURN) { 1487 *vn = NULL; 1488 goto again; 1489 } 1490 /* 1491 * Make sure the vnode we looked up is a regular file 1492 * as we would not want to return a directory instead 1493 * of a file for example. 1494 */ 1495 if (!err && vnode_vtype(*vn) != VREG) { 1496 (void)vnode_put(*vn); 1497 *vn = NULL; 1498 err = EEXIST; 1499 } 1500 } 1501 } else 1502 ntfs_error(vol->mp, "Failed to add directory entry (error " 1503 "%d).", err); 1504err: 1505 OSFree(fn, fn_alloc, ntfs_malloc_tag); 1506 return err; 1507unl_err: 1508 lck_rw_unlock_exclusive(&dir_ni->lock); 1509 goto err; 1510is_system: 1511 ntfs_error(vol->mp, "Cannot create inode with name %.*s in the volume " 1512 "root directory as the name clashes with the name of " 1513 "a core system file. Returning EEXIST.", 1514 (int)cn->cn_namelen, cn->cn_nameptr); 1515 err = EEXIST; 1516 *vn = NULL; 1517 goto err; 1518} 1519 1520/** 1521 * ntfs_vnop_create - create a regular file 1522 * @a: arguments to create function 1523 * 1524 * @a contains: 1525 * vnode_t a_dvp; directory in which to create the file 1526 * vnode_t *a_vpp; destination pointer for the created file 1527 * struct componentname *a_cnp; name of the file to create 1528 * struct vnode_attr *a_vap; attributes to set on the created file 1529 * vfs_context_t a_context; 1530 * 1531 * Create a regular file with name as specified in @a->a_cnp in the directory 1532 * specified by the vnode @a->a_dvp. Assign the attributes @a->a_vap to the 1533 * created file. Finally return the vnode of the created file in *@a->a_vpp. 1534 * 1535 * Return 0 on success and errno on error. 1536 * 1537 * Note we always create filenames in the POSIX namespace. 1538 */ 1539static int ntfs_vnop_create(struct vnop_create_args *a) 1540{ 1541 errno_t err; 1542#ifdef DEBUG 1543 ntfs_inode *ni = NTFS_I(a->a_dvp); 1544 1545 if (ni) 1546 ntfs_debug("Creating a file named %.*s in directory mft_no " 1547 "0x%llx.", (int)a->a_cnp->cn_namelen, 1548 a->a_cnp->cn_nameptr, 1549 (unsigned long long)ni->mft_no); 1550#endif 1551 err = ntfs_create(a->a_dvp, a->a_vpp, a->a_cnp, a->a_vap, FALSE); 1552 ntfs_debug("Done (error %d).", (int)err); 1553 return err; 1554} 1555 1556/** 1557 * ntfs_vnop_mknod - create a special file node 1558 * @a: arguments to mknod function 1559 * 1560 * @a contains: 1561 * vnode_t a_dvp; directory in which to create the file 1562 * vnode_t *a_vpp; destination pointer for the created file 1563 * struct componentname *a_cnp; name of the file to create 1564 * struct vnode_attr *a_vap; attributes to set on the created file 1565 * vfs_context_t a_context; 1566 * 1567 * Create a special file node with name as specified in @a->a_cnp in the 1568 * directory specified by the vnode @a->a_dvp. Assign the attributes @a->a_vap 1569 * to the created node. Finally return the vnode of the created file in 1570 * *@a->a_vpp. 1571 * 1572 * The type of special file node to create is specified by the caller in 1573 * @a->a_vap->va_type and can be one of: 1574 * VSOCK - create a socket 1575 * VFIFO - create a fifo 1576 * VBLK - create a block special device 1577 * VCHR - create a character special device 1578 * 1579 * Return 0 on success and errno on error. 1580 * 1581 * Note we always create filenames in the POSIX namespace. 1582 */ 1583static int ntfs_vnop_mknod(struct vnop_mknod_args *a) 1584{ 1585 errno_t err; 1586#ifdef DEBUG 1587 ntfs_inode *ni = NTFS_I(a->a_dvp); 1588 1589 if (ni) 1590 ntfs_debug("Creating a special inode of type 0x%x named %.*s " 1591 "in directory mft_no 0x%llx.", 1592 a->a_vap->va_type, (int)a->a_cnp->cn_namelen, 1593 a->a_cnp->cn_nameptr, 1594 (unsigned long long)ni->mft_no); 1595#endif 1596 err = ntfs_create(a->a_dvp, a->a_vpp, a->a_cnp, a->a_vap, FALSE); 1597 ntfs_debug("Done (error %d).", (int)err); 1598 return err; 1599} 1600 1601/** 1602 * ntfs_vnop_open - open a vnode 1603 * @a: arguments to open function 1604 * 1605 * @a contains: 1606 * vnode_t a_vp; vnode to open 1607 * int a_mode; mode to open the file with 1608 * vfs_context_t a_context; 1609 * 1610 * Open the vnode @a->a_vp with mode @a->a_mode. 1611 * 1612 * Note the VFS does a lot of checking before ntfs_vnop_open() is called 1613 * including permissions and checking for a read-only file system thus we do 1614 * not need to worry about the case where the driver is compiled read-only as 1615 * the volume is then mounted read-only so the vfs catches all write accesses 1616 * very early on and denies them. 1617 * 1618 * Return 0 on success and errno on error. 1619 */ 1620static int ntfs_vnop_open(struct vnop_open_args *a) 1621{ 1622 ntfs_inode *base_ni, *ni = NTFS_I(a->a_vp); 1623 errno_t err = 0; 1624 1625 if (!ni) { 1626 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 1627 return EINVAL; 1628 } 1629 ntfs_debug("Entering for mft_no 0x%llx, mode 0x%x.", 1630 (unsigned long long)ni->mft_no, (unsigned)a->a_mode); 1631 base_ni = ni; 1632 if (NInoAttr(ni)) 1633 base_ni = ni->base_ni; 1634 /* 1635 * All the core system files cannot possibly be opened because they are 1636 * removed from the name space thus it is impossible for a process to 1637 * obtain a vnode to them thus VNOP_OPEN() can never be called for 1638 * them. The only exception is the root directory which we of course 1639 * allow access to. 1640 */ 1641 if (ni->mft_no < FILE_first_user && ni != ni->vol->root_ni) 1642 panic("%s(): Called for a system inode. This is not " 1643 "possible.\n", __FUNCTION__); 1644 lck_rw_lock_shared(&ni->lock); 1645 /* Do not allow messing with the inode once it has been deleted. */ 1646 if (NInoDeleted(ni)) { 1647 lck_rw_unlock_shared(&ni->lock); 1648 /* Remove the inode from the name cache. */ 1649 cache_purge(ni->vn); 1650 ntfs_debug("Cannot open deleted mft_no 0x%llx, returning " 1651 "ENOENT.", (unsigned long long)ni->mft_no); 1652 return ENOENT; 1653 } 1654 /* 1655 * Do not allow opening encrpyted files as we do not support reading, 1656 * writing, nor mmap()ing them. 1657 */ 1658 if (NInoEncrypted(ni)) { 1659 lck_rw_unlock_shared(&ni->lock); 1660 ntfs_debug("Cannot open encrypted mft_no 0x%llx, returning " 1661 "EACCES.", (unsigned long long)ni->mft_no); 1662 return EACCES; 1663 } 1664 lck_rw_unlock_shared(&ni->lock); 1665 /* 1666 * We keep track of how many times the base vnode has been opened and 1667 * we count other vnodes towards the base vnode open count to ensure 1668 * we do the right thing in ntfs_unlink(). 1669 */ 1670 OSIncrementAtomic(&base_ni->nr_opens); 1671 ntfs_debug("Done (error %d).", (int)err); 1672 return err; 1673} 1674 1675/** 1676 * ntfs_vnop_close - close a vnode 1677 * @a: arguments to close function 1678 * 1679 * @a contains: 1680 * vnode_t a_vp; vnode to close 1681 * int a_fflag; close flags (FREAD and/or FWRITE for example) 1682 * vfs_context_t a_context; 1683 * 1684 * Close the vnode @a->a_vp with flags @a->a_fflag. 1685 * 1686 * Return 0 on success and errno on error. 1687 */ 1688static int ntfs_vnop_close(struct vnop_close_args *a) 1689{ 1690 vnode_t vn = a->a_vp; 1691 ntfs_inode *base_ni, *ni = NTFS_I(vn); 1692 1693 if (!ni) { 1694 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 1695 return 0; 1696 } 1697 ntfs_debug("Entering for mft_no 0x%llx, fflag 0x%x.", 1698 (unsigned long long)ni->mft_no, a->a_fflag); 1699 base_ni = ni; 1700 if (NInoAttr(ni)) 1701 base_ni = ni->base_ni; 1702 /* 1703 * We keep track of how many times the base vnode has been opened and 1704 * we count other vnodes towards the base vnode open count to ensure 1705 * we do the right thing in ntfs_unlink(). 1706 */ 1707 OSDecrementAtomic(&base_ni->nr_opens); 1708 /* 1709 * If the vnode is still in use release any expired directory hints. 1710 * 1711 * If the vnode is no longer in use release all directory hints. 1712 * 1713 * Note we check for presence of directory hints outside the locks as 1714 * an optimization. It is not a disaster if we miss any as all will be 1715 * released in ntfs_inode_free() before the inode is thrown away at the 1716 * latest. 1717 */ 1718 if (ni != base_ni && ni->type == AT_INDEX_ALLOCATION && 1719 ni->nr_dirhints) { 1720 int busy; 1721 1722 busy = vnode_isinuse(vn, ni->nr_refs + 1); 1723 lck_rw_lock_exclusive(&ni->lock); 1724 ntfs_dirhints_put(ni, busy); 1725 lck_rw_unlock_exclusive(&ni->lock); 1726 } 1727 ntfs_debug("Done."); 1728 return 0; 1729} 1730 1731/** 1732 * ntfs_vnop_access - 1733 * 1734 */ 1735static int ntfs_vnop_access(struct vnop_access_args *a) 1736{ 1737 errno_t err; 1738 1739 ntfs_debug("Entering."); 1740 // TODO: 1741 err = ENOTSUP; 1742 ntfs_debug("Done (error %d).", (int)err); 1743 return err; 1744} 1745 1746/** 1747 * ntfs_vnop_getattr - get attributes about a vnode or about the mounted volume 1748 * @a: arguments to getattr function 1749 * 1750 * @a contains: 1751 * vnode_t a_vp; vnode for which to return attributes 1752 * struct vnode_attr *a_vap; attributes to return and destination 1753 * vfs_context_t a_context; 1754 * 1755 * Return the attributes described in @a_vap about the vnode @a_vp. Some 1756 * attributes are intercepted by the VFS in getattrlist() and getvolattrlist() 1757 * so we do not bother with them. 1758 * 1759 * At present we do not support all attributes. We declare what we support to 1760 * the world in our VFS_GETATTR() function (ntfs_vfsops.c::ntfs_getattr()) so 1761 * do not forget to update that when support for further attributes is added 1762 * here. 1763 * 1764 * Return 0 on success and errno on error. 1765 * 1766 * TODO: Implement more attributes. 1767 */ 1768static int ntfs_vnop_getattr(struct vnop_getattr_args *a) 1769{ 1770 MFT_REF parent_mref; 1771 ino64_t mft_no; 1772 s64 on_disk_size; 1773 struct vnode_attr *va = a->a_vap; 1774 ntfs_inode *ni, *base_ni; 1775 ntfs_volume *vol; 1776 const char *name; 1777 FILE_ATTR_FLAGS file_attributes; 1778 unsigned flags; 1779 errno_t err; 1780 lck_rw_type_t lock; 1781 BOOL is_root, name_is_done, have_parent; 1782 1783 ni = NTFS_I(a->a_vp); 1784 if (!ni) { 1785 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 1786 return 0; 1787 } 1788 vol = ni->vol; 1789 mft_no = ni->mft_no; 1790 have_parent = name_is_done = is_root = FALSE; 1791 ntfs_debug("Entering for mft_no 0x%llx.", (unsigned long long)mft_no); 1792 base_ni = ni; 1793 if (NInoAttr(ni)) { 1794 base_ni = ni->base_ni; 1795 lck_rw_lock_shared(&base_ni->lock); 1796 } 1797 lck_rw_lock_shared(&ni->lock); 1798 lock = LCK_RW_TYPE_SHARED; 1799 /* Do not allow messing with the inode once it has been deleted. */ 1800 if (NInoDeleted(ni)) { 1801 /* Remove the inode from the name cache. */ 1802 cache_purge(ni->vn); 1803 err = ENOENT; 1804 goto err; 1805 } 1806 /* 1807 * If this is the root directory, leave it to the VFS to get the name 1808 * from the mountpoint (see below). 1809 */ 1810 if (base_ni == vol->root_ni) 1811 name_is_done = is_root = TRUE; 1812 /* For directories always return a link count of 1. */ 1813 va->va_nlink = 1; 1814 if (!S_ISDIR(ni->mode)) 1815 va->va_nlink = ni->link_count; 1816 va->va_rdev = (dev_t)0; 1817 switch (ni->mode & S_IFMT) { 1818 case S_IFBLK: 1819 case S_IFCHR: 1820 /* 1821 * For block and character device special inodes return the 1822 * device. 1823 */ 1824 va->va_rdev = ni->rdev; 1825 case S_IFIFO: 1826 case S_IFSOCK: 1827 /* 1828 * For fifos, sockets, block and character device special files 1829 * return all sizes set to zero. 1830 */ 1831 va->va_total_alloc = va->va_data_alloc = va->va_total_size = 1832 va->va_data_size = 0; 1833 break; 1834 default: 1835 lck_spin_lock(&ni->size_lock); 1836 /* 1837 * We cheat for both the total size and the total allocated 1838 * size and just return the attribute size rather than looping 1839 * over all ($DATA?) attributes and adding up their sizes. 1840 */ 1841 va->va_total_size = va->va_data_size = ni->data_size; 1842 /* 1843 * Resident attributes reside inside the on-disk inode and thus 1844 * have no on-disk allocation because the on-disk inode itself 1845 * is already accounted for in the allocated size of the $MFT 1846 * system file which contains the table of on-disk inodes. 1847 * Perhaps more importantly, if we delete a resident file no 1848 * space would be freed up on the volume, thus we definitely 1849 * need to return zero for the allocated size of such resident 1850 * files. 1851 */ 1852 on_disk_size = 0; 1853 if (NInoNonResident(ni)) { 1854 if (ni->type == AT_DATA && (NInoCompressed(ni) || 1855 NInoSparse(ni))) 1856 on_disk_size = ni->compressed_size; 1857 else 1858 on_disk_size = ni->allocated_size; 1859 } 1860 va->va_total_alloc = va->va_data_alloc = on_disk_size; 1861 lck_spin_unlock(&ni->size_lock); 1862 } 1863 va->va_iosize = ubc_upl_maxbufsize(); 1864 va->va_uid = ni->uid; 1865 va->va_gid = ni->gid; 1866 va->va_mode = ni->mode; 1867 file_attributes = base_ni->file_attributes; 1868 /* 1869 * Do not allow the volume root directory to be read-only or hidden and 1870 * do not allow directories in general to be read-only as Windows uses 1871 * the read-only bit on directories for completely different purposes 1872 * like customized/specialized folder views which are lost when you 1873 * clear the read-only bit. 1874 */ 1875 if (S_ISDIR(base_ni->mode)) { 1876 file_attributes &= ~FILE_ATTR_READONLY; 1877 if (is_root) 1878 file_attributes &= ~FILE_ATTR_HIDDEN; 1879 } 1880 flags = 0; 1881/* 1882 * if (NInoCompressed(ni)) 1883 * flags |= SF_COMPRESSED; 1884 */ 1885 if (file_attributes & FILE_ATTR_READONLY) 1886 flags |= UF_IMMUTABLE; 1887 if (file_attributes & FILE_ATTR_HIDDEN) 1888 flags |= UF_HIDDEN; 1889 /* 1890 * Windows does not set the "needs archiving" bit on directories 1891 * except for encrypted directories where it does set the bit. 1892 */ 1893 if ((!S_ISDIR(base_ni->mode) || 1894 file_attributes & FILE_ATTR_ENCRYPTED) && 1895 !(file_attributes & FILE_ATTR_ARCHIVE)) 1896 flags |= SF_ARCHIVED; 1897 va->va_flags = flags; 1898 va->va_create_time = base_ni->creation_time; 1899 va->va_access_time = base_ni->last_access_time; 1900 va->va_modify_time = base_ni->last_data_change_time; 1901 va->va_change_time = base_ni->last_mft_change_time; 1902 /* 1903 * NTFS does not distinguish between the inode and its hard links. 1904 * 1905 * We have to remap the root directory inode to inode number 2, i.e. 1906 * fsRtDirID, for compatibility with Carbon. 1907 */ 1908 if (!is_root) 1909 va->va_fileid = mft_no; 1910 else 1911 va->va_fileid = 2; 1912 va->va_fsid = vol->dev; 1913 /* FIXME: What is the difference between the below two? */ 1914 va->va_filerev = base_ni->seq_no; 1915 va->va_gen = base_ni->seq_no; 1916 va->va_encoding = 0x7e; /* = kTextEncodingMacUnicode */ 1917 va->va_supported |= 1918 VNODE_ATTR_BIT(va_rdev) | 1919 VNODE_ATTR_BIT(va_nlink) | 1920 VNODE_ATTR_BIT(va_total_size) | 1921 VNODE_ATTR_BIT(va_total_alloc) | 1922 VNODE_ATTR_BIT(va_data_size) | 1923 VNODE_ATTR_BIT(va_data_alloc) | 1924 VNODE_ATTR_BIT(va_iosize) | 1925 VNODE_ATTR_BIT(va_uid) | 1926 VNODE_ATTR_BIT(va_gid) | 1927 VNODE_ATTR_BIT(va_mode) | 1928 VNODE_ATTR_BIT(va_flags) | 1929 VNODE_ATTR_BIT(va_create_time) | 1930 VNODE_ATTR_BIT(va_access_time) | 1931 VNODE_ATTR_BIT(va_modify_time) | 1932 VNODE_ATTR_BIT(va_change_time) | 1933 VNODE_ATTR_BIT(va_fileid) | 1934 VNODE_ATTR_BIT(va_fsid) | 1935 VNODE_ATTR_BIT(va_filerev) | 1936 VNODE_ATTR_BIT(va_gen) | 1937 VNODE_ATTR_BIT(va_encoding) | 1938 0; 1939 /* 1940 * Return va_parentid, i.e. the mft record number of the parent of the 1941 * inode, if it was requested. 1942 * 1943 * We have to return 1, i.e. fsRtParID, for the parent inode number of 1944 * the root directory inode for compatibility with Carbon. Simillarly 1945 * we have to return 2, i.e. fsRtDirID, if the parent inode is the root 1946 * directory inode. 1947 * 1948 * For all other inodes we try to get the parent from the vnode and if 1949 * it does not have the vnode cached then if the inode is an attribute 1950 * inode we return the inode number of the base inode (in line with how 1951 * named streams work on Mac OS X) and otherwise we obtain the parent 1952 * mft reference by looking up a filename attribute record in the mft 1953 * record of the inode and obtaining the parent mft record reference 1954 * from there. 1955 * 1956 * There is one pitfall with this approach for files and that is that a 1957 * file may have multiple parents and we are returning a random one but 1958 * that is the best we can do. 1959 * 1960 * To make this a little better we get the name at the same time as we 1961 * get the parent mft reference so we can at least return a parent id 1962 * and name that match, i.e. the name is present in the parent id. 1963 * 1964 * And to make this even better, when the parent is requested and a 1965 * name is cached in the vnode, we use the name in the vnode to find 1966 * the parent that matches that name if it exists. If it does not 1967 * exist we revert to finding a random parent. 1968 */ 1969 if (VATTR_IS_ACTIVE(va, va_parentid)) { 1970 ino64_t parent_mft_no; 1971 vnode_t parent_vn; 1972 1973 if (is_root && base_ni == ni) 1974 VATTR_RETURN(va, va_parentid, 1); 1975 else if ((parent_vn = vnode_getparent(ni->vn))) { 1976 parent_mft_no = NTFS_I(parent_vn)->mft_no; 1977 (void)vnode_put(parent_vn); 1978 have_parent = TRUE; 1979 if (parent_mft_no == FILE_root) 1980 parent_mft_no = 2; 1981 VATTR_RETURN(va, va_parentid, parent_mft_no); 1982 } else if (ni != base_ni) { 1983 parent_mft_no = base_ni->mft_no; 1984 if (parent_mft_no == FILE_root) 1985 parent_mft_no = 2; 1986 VATTR_RETURN(va, va_parentid, parent_mft_no); 1987 } else /* if (ni == base_ni) */ { 1988 name_is_done = TRUE; 1989 name = NULL; 1990 if (VATTR_IS_ACTIVE(va, va_name)) 1991 name = va->va_name; 1992 err = ntfs_inode_get_name_and_parent_mref(base_ni, 1993 FALSE, &parent_mref, name); 1994 if (err) { 1995 ntfs_error(base_ni->vol->mp, "Failed to obtain " 1996 "parent mft reference for " 1997 "mft_no 0x%llx (error %d).", 1998 (unsigned long long) 1999 base_ni->mft_no, err); 2000 goto err; 2001 } 2002 parent_mft_no = MREF(parent_mref); 2003 if (parent_mft_no == FILE_root) 2004 parent_mft_no = 2; 2005 va->va_parentid = parent_mft_no; 2006 va->va_supported |= VNODE_ATTR_BIT(va_parentid) | 2007 (name ? VNODE_ATTR_BIT(va_name) : 0); 2008 } 2009 } 2010 /* 2011 * Return va_name, i.e. the name of the inode, if it was requested. 2012 * 2013 * If this is the root directory of the volume, leave it to the VFS to 2014 * find the mounted-on name, which is different from the real volume 2015 * root directory name of "." (this is ensured by the fact that 2016 * @name_is_done was set to TRUE for the root directory earlier). 2017 * 2018 * For all other inodes we try to get the name from the vnode and if it 2019 * does not have the name cached we obtain the name by looking up a 2020 * filename attribute record in the mft record of the inode and using 2021 * that. 2022 * 2023 * Note we do not need to do anything if we dealt with the name as part 2024 * of dealing with va_parentid above. In this case @name_is_done will 2025 * be set to true. 2026 * 2027 * Also we do not need to do anything if we tried to deal with 2028 * va_parentid above and failed as we would only fail again here. This 2029 * means that if @err is not zero we skip the call to 2030 * ntfs_inode_get_name_and_parent_mref(). 2031 * 2032 * TODO: What do we return for attribute inodes? Shall we exclude them 2033 * from VNOP_GETATTR() altogether? For now we simply do not return a 2034 * name for them. 2035 */ 2036 if (!name_is_done && VATTR_IS_ACTIVE(va, va_name) && ni == base_ni) { 2037 name = vnode_getname(base_ni->vn); 2038 if (name) { 2039 (void)strlcpy(va->va_name, name, MAXPATHLEN - 1); 2040 VATTR_SET_SUPPORTED(va, va_name); 2041 (void)vnode_putname(name); 2042 } else { 2043 err = ntfs_inode_get_name_and_parent_mref(base_ni, 2044 have_parent, &parent_mref, va->va_name); 2045 if (err) { 2046 ntfs_error(base_ni->vol->mp, "Failed to obtain " 2047 "parent mft reference for " 2048 "mft_no 0x%llx (error %d).", 2049 (unsigned long long) 2050 base_ni->mft_no, err); 2051 goto err; 2052 } 2053 /* 2054 * We forcibly overwrite the parent id with the 2055 * possibly new parent id here to be consistent with 2056 * the name, i.e. we want the name we return to 2057 * actually exist in the returned parent. 2058 * 2059 * If we already had the parent id from before then 2060 * ntfs_inode_get_name_and_parent_mref() will have 2061 * found the name matching this parent id thus our 2062 * setting of the parent id here will be a no-op. 2063 */ 2064 va->va_parentid = MREF(parent_mref); 2065 if (va->va_parentid == FILE_root) 2066 va->va_parentid = 2; 2067 va->va_supported |= VNODE_ATTR_BIT(va_parentid) | 2068 VNODE_ATTR_BIT(va_name); 2069 } 2070 } 2071 /* 2072 * Unlock the attribute inode as we do not need it any more and so we 2073 * cannot deadlock with converting the lock on the base inode to 2074 * exclusive and with the call to ntfs_inode_afpinfo_read() below. 2075 */ 2076 if (ni != base_ni) 2077 lck_rw_unlock_shared(&ni->lock); 2078 if (VATTR_IS_ACTIVE(va, va_backup_time)) { 2079 if (!NInoValidBackupTime(base_ni)) { 2080 if (!lck_rw_lock_shared_to_exclusive(&base_ni->lock)) { 2081 lck_rw_lock_exclusive(&base_ni->lock); 2082 if (NInoDeleted(base_ni)) { 2083 cache_purge(base_ni->vn); 2084 lck_rw_unlock_exclusive(&base_ni->lock); 2085 return ENOENT; 2086 } 2087 } 2088 lock = LCK_RW_TYPE_EXCLUSIVE; 2089 /* 2090 * Load the AFP_AfpInfo stream and initialize the 2091 * backup time and Finder Info (if they are not already 2092 * valid). 2093 */ 2094 err = ntfs_inode_afpinfo_read(base_ni); 2095 if (err) { 2096 ntfs_error(base_ni->vol->mp, "Failed to " 2097 "read AFP_AfpInfo attribute " 2098 "from inode 0x%llx (error " 2099 "%d).", (unsigned long long) 2100 base_ni->mft_no, err); 2101 lck_rw_unlock_exclusive(&base_ni->lock); 2102 return err; 2103 } 2104 if (!NInoValidBackupTime(base_ni)) 2105 panic("%s(): !NInoValidBackupTime(base_ni)\n", 2106 __FUNCTION__); 2107 } 2108 VATTR_RETURN(va, va_backup_time, base_ni->backup_time); 2109 } 2110 if (lock == LCK_RW_TYPE_SHARED) 2111 lck_rw_unlock_shared(&base_ni->lock); 2112 else 2113 lck_rw_unlock_exclusive(&base_ni->lock); 2114 ntfs_debug("Done."); 2115 return 0; 2116err: 2117 lck_rw_unlock_shared(&ni->lock); 2118 if (ni != base_ni) 2119 lck_rw_unlock_shared(&base_ni->lock); 2120 return err; 2121} 2122 2123/** 2124 * ntfs_vnop_setattr - set attributes of a vnode or of the mounted volume 2125 * @a: arguments to setattr function 2126 * 2127 * @a contains: 2128 * vnode_t a_vp; vnode of which to set attributes 2129 * struct vnode_attr *a_vap; attributes to set and source 2130 * vfs_context_t a_context; 2131 * 2132 * Set the attributes described by @a_vap in the vnode @a_vp. Some attributes 2133 * are intercepted by the VFS in setattrlist() and setvolattrlist() so we do 2134 * not bother with them. 2135 * 2136 * At present we do not support all attributes. We declare what we support to 2137 * the world in our VFS_GETATTR() function (ntfs_vfsops.c::ntfs_getattr()) so 2138 * do not forget to update that when support for further attributes is added 2139 * here. 2140 * 2141 * Return 0 on success and errno on error. 2142 * 2143 * TODO: Implement more attributes. 2144 */ 2145static int ntfs_vnop_setattr(struct vnop_setattr_args *a) 2146{ 2147 ntfs_inode *base_ni, *ni = NTFS_I(a->a_vp); 2148 ntfs_volume *vol; 2149 struct vnode_attr *va = a->a_vap; 2150 errno_t err = 0; 2151 BOOL dirty_times = FALSE; 2152 2153 if (!ni) { 2154 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 2155 return EINVAL; 2156 } 2157 vol = ni->vol; 2158 ntfs_debug("Entering for mft_no 0x%llx.", 2159 (unsigned long long)ni->mft_no); 2160 base_ni = ni; 2161 if (NInoAttr(ni)) { 2162 base_ni = ni->base_ni; 2163 lck_rw_lock_exclusive(&base_ni->lock); 2164 } 2165 lck_rw_lock_exclusive(&ni->lock); 2166 /* Do not allow messing with the inode once it has been deleted. */ 2167 if (NInoDeleted(ni)) { 2168 /* Remove the inode from the name cache. */ 2169 cache_purge(ni->vn); 2170 err = ENOENT; 2171 goto unl_err; 2172 } 2173 if (VATTR_IS_ACTIVE(va, va_data_size)) { 2174 ntfs_debug("Changing size for mft_no 0x%llx to 0x%llx.", 2175 (unsigned long long)ni->mft_no, 2176 (unsigned long long)va->va_data_size); 2177#if 1 // TODO: Remove this when sparse support is done... 2178 if (NInoSparse(ni)) { 2179 err = ENOTSUP; 2180 goto unl_err; 2181 } 2182#endif 2183 /* 2184 * Do not allow calling for $MFT/$DATA as it would destroy the 2185 * volume. 2186 * 2187 * Also only allow setting the size of VREG vnodes as that 2188 * covers both regular files and named streams whilst excluding 2189 * symbolic links for example. 2190 */ 2191 if (vnode_vtype(ni->vn) != VREG || 2192 (!ni->mft_no && !NInoAttr(ni))) 2193 err = EPERM; 2194 else 2195 err = ntfs_attr_resize(ni, va->va_data_size, 2196 va->va_vaflags & 0xffff, NULL); 2197 if (err) { 2198 ntfs_error(vol->mp, "Failed to set inode size (error " 2199 "%d).", err); 2200 goto unl_err; 2201 } 2202 VATTR_SET_SUPPORTED(va, va_data_size); 2203 } 2204 /* 2205 * Unlock the attribute inode as we do not need it any more and so we 2206 * cannot deadlock with the call to ntfs_inode_afpinfo_write() below. 2207 */ 2208 if (ni != base_ni) 2209 lck_rw_unlock_exclusive(&ni->lock); 2210 if (VATTR_IS_ACTIVE(va, va_flags)) { 2211 u32 flags = va->va_flags; 2212 BOOL dirty_flags = FALSE; 2213 2214 /* 2215 * Only allow changing of supported flags. There are two 2216 * exceptions and those are the archived flag and read-only bit 2217 * on directories which are not supported on NTFS but we have 2218 * to ignore them or too many things break such as "cp -pr" 2219 * from a more sensible file system. 2220 */ 2221 if (flags & ~(SF_ARCHIVED | SF_IMMUTABLE | UF_IMMUTABLE | 2222 UF_HIDDEN /* | SF_COMPRESSED */)) { 2223 ntfs_error(vol->mp, "Cannot set unsupported flags " 2224 "0x%x.", 2225 (unsigned)(flags & ~(SF_ARCHIVED | 2226 SF_IMMUTABLE | UF_IMMUTABLE | 2227 UF_HIDDEN))); 2228 err = EINVAL; 2229 goto err; 2230 } 2231 /* 2232 * We do not allow modification for any of the core NTFS 2233 * system files which we want to remain as they are except that 2234 * we silently ignore changes to the root directory. 2235 */ 2236 if (base_ni->mft_no < FILE_first_user && 2237 base_ni != vol->root_ni) { 2238 ntfs_error(vol->mp, "Refusing to change flags on core " 2239 "NTFS system file (mft_no 0x%llx).", 2240 (unsigned long long)base_ni->mft_no); 2241 err = EPERM; 2242 goto err; 2243 } 2244 /* 2245 * We currently do not support changing the compression state 2246 * of a vnode. 2247 * 2248 * Further, only the base inode may be compressed. 2249 */ 2250/* 2251 * if (((flags & SF_COMPRESSED) && !NInoCompressed(ni)) || 2252 * (!(flags & SF_COMPRESSED) && 2253 * NInoCompressed(ni))) { 2254 * if (ni != base_ni) { 2255 * ntfs_error(vol->mp, "Only regular files and " 2256 * "directories may be " 2257 * "compressed, aborting."); 2258 * err = EINVAL; 2259 * goto err; 2260 * } 2261 * ntfs_warning(vol->mp, "Changing the compression state " 2262 * "is not supported at present, " 2263 * "returning ENOTSUP."); 2264 * err = ENOTSUP; 2265 * goto err; 2266 * } 2267 */ 2268 /* 2269 * The root directory of a volume always has the hidden bit set 2270 * but we pretend that it is not hidden to OS X and we do not 2271 * allow this bit to be modified for the root directory. 2272 */ 2273 if (base_ni != vol->root_ni) { 2274 /* 2275 * If the Finder info is valid need to update it as 2276 * well. Note setting or clearing the hidden flag in 2277 * the Finder info does not cause the Finder info to 2278 * become dirty as the hidden bit is not stored on disk 2279 * in the Finder info. 2280 */ 2281 if (flags & UF_HIDDEN) { 2282 base_ni->file_attributes |= FILE_ATTR_HIDDEN; 2283 if (NInoValidFinderInfo(base_ni)) 2284 base_ni->finder_info.attrs |= 2285 FINDER_ATTR_IS_HIDDEN; 2286 } else { 2287 base_ni->file_attributes &= ~FILE_ATTR_HIDDEN; 2288 if (NInoValidFinderInfo(base_ni)) 2289 base_ni->finder_info.attrs &= 2290 ~FINDER_ATTR_IS_HIDDEN; 2291 } 2292 dirty_flags = TRUE; 2293 } 2294 /* 2295 * Windows does not allow users to set/clear the read-only bit 2296 * on directories. In fact Windows uses the read-only bit on a 2297 * directory to signify that a customized or specialized folder 2298 * view is in effect thus we do not allow setting/clearing the 2299 * read-only bit on directories from OS X. 2300 * 2301 * Windows does not set the "needs archiving" bit on 2302 * directories. 2303 * 2304 * The only exception are encrypted directories which do have 2305 * the "needs archiving" bit set but we do not want to allow 2306 * this bit to be cleared so ignore them, too. 2307 */ 2308 if (!S_ISDIR(base_ni->mode)) { 2309 if (flags & (SF_IMMUTABLE | UF_IMMUTABLE)) 2310 base_ni->file_attributes |= FILE_ATTR_READONLY; 2311 else 2312 base_ni->file_attributes &= ~FILE_ATTR_READONLY; 2313 if (flags & SF_ARCHIVED) 2314 base_ni->file_attributes &= ~FILE_ATTR_ARCHIVE; 2315 else 2316 base_ni->file_attributes |= FILE_ATTR_ARCHIVE; 2317 dirty_flags = TRUE; 2318 } 2319 if (dirty_flags) 2320 NInoSetDirtyFileAttributes(base_ni); 2321 VATTR_SET_SUPPORTED(va, va_flags); 2322 } 2323 if (VATTR_IS_ACTIVE(va, va_create_time)) { 2324 base_ni->creation_time = va->va_create_time; 2325 VATTR_SET_SUPPORTED(va, va_create_time); 2326 dirty_times = TRUE; 2327 } 2328 if (VATTR_IS_ACTIVE(va, va_modify_time)) { 2329 base_ni->last_data_change_time = va->va_modify_time; 2330 VATTR_SET_SUPPORTED(va, va_modify_time); 2331 dirty_times = TRUE; 2332 /* 2333 * The following comment came from the HFS code: 2334 * 2335 * <quote>The utimes system call can reset the modification 2336 * time but it doesn't know about HFS create times. So we need 2337 * to ensure that the creation time is always at least as old 2338 * as the modification time.</quote> 2339 * 2340 * SMB also follows this behaviour and it also adds the 2341 * following comment: 2342 * 2343 * <quote>The HFS code also checks to make sure it was not the 2344 * root vnode. Don Brady said that the SMB code should not use 2345 * that part of the check.</quote> 2346 * 2347 * I assume the root vnode check is there in HFS as it does not 2348 * support times on the root vnode at all so the check is 2349 * needed for HFS only. 2350 * 2351 * The same applies for NTFS so follow the HFS/SMB behaviour. 2352 * 2353 * One salient point is that we only do the above if the 2354 * creation time is not being explicitly set already. 2355 */ 2356 if (!VATTR_IS_ACTIVE(va, va_create_time) && 2357 (va->va_modify_time.tv_sec < 2358 base_ni->creation_time.tv_sec || 2359 (va->va_modify_time.tv_sec == 2360 base_ni->creation_time.tv_sec && 2361 va->va_modify_time.tv_nsec < 2362 base_ni->creation_time.tv_nsec))) 2363 base_ni->creation_time = va->va_modify_time; 2364 } 2365 if (VATTR_IS_ACTIVE(va, va_change_time)) { 2366 base_ni->last_mft_change_time = va->va_change_time; 2367 VATTR_SET_SUPPORTED(va, va_change_time); 2368 dirty_times = TRUE; 2369 } 2370 if (VATTR_IS_ACTIVE(va, va_access_time)) { 2371 base_ni->last_access_time = va->va_access_time; 2372 VATTR_SET_SUPPORTED(va, va_access_time); 2373 dirty_times = TRUE; 2374 } 2375 if (dirty_times) 2376 NInoSetDirtyTimes(base_ni); 2377 if (VATTR_IS_ACTIVE(va, va_backup_time)) { 2378 base_ni->backup_time = va->va_backup_time; 2379 NInoSetValidBackupTime(base_ni); 2380 NInoSetDirtyBackupTime(base_ni); 2381 /* 2382 * Now write (if needed creating) the AFP_AfpInfo attribute 2383 * with the specified backup time. 2384 */ 2385 err = ntfs_inode_afpinfo_write(base_ni); 2386 if (err) { 2387 ntfs_error(vol->mp, "Failed to write/create " 2388 "AFP_AfpInfo attribute in inode " 2389 "0x%llx (error %d).", 2390 (unsigned long long)base_ni->mft_no, 2391 err); 2392 goto err; 2393 } 2394 VATTR_SET_SUPPORTED(va, va_backup_time); 2395 } 2396 ntfs_debug("Done."); 2397err: 2398 lck_rw_unlock_exclusive(&base_ni->lock); 2399 return err; 2400unl_err: 2401 if (ni != base_ni) 2402 lck_rw_unlock_exclusive(&ni->lock); 2403 goto err; 2404} 2405 2406/* Limit the internal i/o size so we can represent it in a 32-bit int. */ 2407#define NTFS_MAX_IO_REQUEST_SIZE (1024 * 1024 * 256) 2408 2409/** 2410 * ntfs_vnop_read_compressed - read from a compressed attribute 2411 * @ni: ntfs inode describing the compressed attribute to read 2412 * @uio: destination in which to return the read data 2413 * @data_size: data size of the compressed attribute 2414 * @ioflags: flags further describing the read request (see ntfs_vnop_read()) 2415 * 2416 * This is a helper function for ntfs_vnop_read() (see below). It is called 2417 * when a read request for a compressed attribute is received by 2418 * ntfs_vnop_read(). 2419 * 2420 * This function is somewhat similar to cluster_read() or to be more precise to 2421 * cluster_read_copy() in that it breaks up large i/os into smaller manageable 2422 * chunks, and for each chunk tries to get the data from the vm page cache and 2423 * return it in the destination buffer described by @uio and failing that, it 2424 * creates and maps a upl and causes it to be filled with data by calling 2425 * ntfs_read_compressed() which reads the compressed data via the raw inode and 2426 * decompresses it into our mapped upl and once that is done we now have the 2427 * data in the vm page cache and copy it into the destination buffer described 2428 * by @uio. 2429 * 2430 * Return 0 on success and errno on error. 2431 */ 2432static inline int ntfs_vnop_read_compressed(ntfs_inode *ni, uio_t uio, 2433 const s64 data_size, int ioflags) 2434{ 2435 s64 size; 2436 user_ssize_t start_count; 2437 off_t ofs; 2438 vnode_t vn = ni->vn; 2439 ntfs_inode *raw_ni; 2440 upl_t upl; 2441 upl_page_info_t *pl; 2442 kern_return_t kerr; 2443 int count, err, align_mask, cur_pg, last_pg; 2444 int max_upl_size = ubc_upl_maxbufsize(); 2445 2446 ofs = uio_offset(uio); 2447 start_count = uio_resid(uio); 2448 ntfs_debug("Entering for compressed file inode 0x%llx, offset 0x%llx, " 2449 "count 0x%llx, ioflags 0x%x.", 2450 (unsigned long long)ni->mft_no, 2451 (unsigned long long)ofs, 2452 (unsigned long long)start_count, ioflags); 2453 /* 2454 * We can only read from regular files and named streams that are 2455 * compressed and non-resident. We should never be called for anything 2456 * else. 2457 */ 2458 if (ni->type != AT_DATA || !NInoCompressed(ni) || 2459 !NInoNonResident(ni) || NInoEncrypted(ni) || 2460 NInoRaw(ni)) 2461 panic("%s(): Called for inappropriate inode.\n", __FUNCTION__); 2462 /* 2463 * Get the raw inode. We take the inode lock shared to protect against 2464 * concurrent writers as the compressed data is invalid whilst a write 2465 * is in progress. 2466 */ 2467 err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_SHARED, &raw_ni); 2468 if (err) { 2469 ntfs_error(ni->vol->mp, "Failed to get raw inode (error %d).", 2470 err); 2471 return err; 2472 } 2473 if (!NInoRaw(raw_ni)) 2474 panic("%s(): Requested raw inode but got non-raw one.\n", 2475 __FUNCTION__); 2476 lck_spin_lock(&raw_ni->size_lock); 2477 size = ubc_getsize(raw_ni->vn); 2478 if (size != raw_ni->data_size) 2479 panic("%s(): size != raw_ni->data_size\n", __FUNCTION__); 2480 lck_spin_unlock(&raw_ni->size_lock); 2481 /* 2482 * If nothing was requested or the request starts at or beyond the end 2483 * of the attribute, we do not need to do anything. 2484 */ 2485 if (!start_count || ofs >= data_size) { 2486 err = 0; 2487 goto err; 2488 } 2489 /* Cannot read from a negative offset. */ 2490 if (ofs < 0) { 2491 err = EINVAL; 2492 goto err; 2493 } 2494 if (vnode_isnocache(vn) || vnode_isnocache(raw_ni->vn)) 2495 ioflags |= IO_NOCACHE; 2496 if (vnode_isnoreadahead(vn) || vnode_isnoreadahead(raw_ni->vn)) 2497 ioflags |= IO_RAOFF; 2498 align_mask = ni->compression_block_size - 1; 2499 if (align_mask < PAGE_MASK) 2500 align_mask = PAGE_MASK; 2501 /* 2502 * Loop until we have finished the whole request or reached the end of 2503 * the attribute. 2504 * 2505 * FIXME: We do not bother with read-ahead on the uncompressed vnode 2506 * for now except to the extent that we always decompress full 2507 * compression blocks which may be larger than the current i/o request 2508 * so the next i/o request will find the whole compression block 2509 * decompressed in the vm page cache thus small reads will in effect 2510 * experience a certain amount of read-ahead in this way. 2511 */ 2512 do { 2513 u8 *kaddr; 2514 int delta, next_pg, orig_count; 2515 2516 size = data_size - ofs; 2517 if (size > start_count) 2518 size = start_count; 2519 count = size; 2520 /* 2521 * Break up the i/o in chunks that fit into a 32-bit int so 2522 * we can call cluster_copy_ubc_data(), etc. 2523 */ 2524 if (size > NTFS_MAX_IO_REQUEST_SIZE) 2525 count = NTFS_MAX_IO_REQUEST_SIZE; 2526 /* 2527 * First of all, try to copy the data from the vm page cache. 2528 * This will work on the second and all later reads so this is 2529 * the hot path. If the attribute has not been accessed at all 2530 * before or its cached pages were dropped due to vm pressure 2531 * this will fail to copy any data due to the lack of a valid 2532 * page and we will drop into the slow path. 2533 */ 2534 if (!(ioflags & IO_NOCACHE)) { 2535 err = cluster_copy_ubc_data(vn, uio, &count, 0); 2536 if (err) { 2537 /* 2538 * The copying (uiomove()) failed with an 2539 * error, abort. 2540 */ 2541 ntfs_error(ni->vol->mp, 2542 "cluster_copy_ubc_data() " 2543 "failed (error %d).", err); 2544 goto err; 2545 } 2546 /* 2547 * @count is now set to the number of bytes remaining 2548 * to be transferred. If it is zero, it means all the 2549 * pages were in the vm page cache so we can skip onto 2550 * the next part of the i/o. 2551 */ 2552 if (!count) 2553 continue; 2554 ofs = uio_offset(uio); 2555 } 2556 /* 2557 * Only some or none of the pages were in the vm page cache or 2558 * this is not a cached i/o. First align this i/o request to 2559 * compression block boundaries and to PAGE_SIZE boundaries and 2560 * truncate it to the maximum upl size then create and map a 2561 * page list so we can fill it with the data. 2562 */ 2563 delta = ofs & align_mask; 2564 ofs -= delta; 2565 orig_count = count; 2566 count += delta; 2567 count = (count + align_mask) & ~(off_t)align_mask; 2568 if (count > max_upl_size) 2569 count = max_upl_size; 2570 /* 2571 * Do not exceed the attribute size except for a final partial 2572 * page. 2573 */ 2574 size = (data_size - ofs + PAGE_MASK) & ~PAGE_MASK_64; 2575 if (count > size) 2576 count = size; 2577 start_count = count; 2578 kerr = ubc_create_upl(vn, ofs, count, &upl, &pl, UPL_SET_LITE); 2579 if (kerr != KERN_SUCCESS) 2580 panic("%s(): Failed to get page list (error %d).\n", 2581 __FUNCTION__, (int)kerr); 2582 kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); 2583 if (kerr != KERN_SUCCESS) { 2584 ntfs_error(ni->vol->mp, "Failed to map page list " 2585 "(error %d).", (int)kerr); 2586 err = EIO; 2587 goto abort_err; 2588 } 2589 /* 2590 * We know @ofs starts on both a compression block and a page 2591 * boundary. We read from the compressed raw vnode 2592 * decompressing the data into our mapped page list. Any 2593 * already valid pages are automatically skipped. 2594 */ 2595 err = ntfs_read_compressed(ni, raw_ni, ofs, count, kaddr, pl, 2596 ioflags); 2597 if (err) { 2598 ntfs_error(ni->vol->mp, "Failed to decompress data " 2599 "(error %d).", err); 2600 goto unm_err; 2601 } 2602 /* 2603 * We now have the entire page list filled with valid pages, 2604 * thus we can now copy from the mapped page list into the 2605 * destination buffer using uiomove(). We just need to make 2606 * sure not to copy past the end of the attribute. 2607 */ 2608 ofs += delta; 2609 count -= delta; 2610 if (count > orig_count) 2611 count = orig_count; 2612 if (ofs + count > data_size) 2613 count = data_size - ofs; 2614 err = uiomove((caddr_t)(kaddr + delta), count, uio); 2615 if (err) { 2616 ntfs_error(ni->vol->mp, "uiomove() failed (error %d).", 2617 err); 2618 goto unm_err; 2619 } 2620 kerr = ubc_upl_unmap(upl); 2621 if (kerr != KERN_SUCCESS) { 2622 ntfs_error(ni->vol->mp, "ubc_upl_unmap() failed " 2623 "(error %d).", (int)kerr); 2624 err = EIO; 2625 goto abort_err; 2626 } 2627 /* 2628 * We are done with the page list, commit and/or abort the 2629 * pages. 2630 */ 2631 next_pg = 0; 2632 last_pg = start_count >> PAGE_SHIFT; 2633 do { 2634 int commit_flags; 2635 BOOL was_valid, was_dirty; 2636 2637 cur_pg = next_pg; 2638 /* Determine the state of the current first page. */ 2639 was_valid = upl_valid_page(pl, cur_pg); 2640 was_dirty = (was_valid && upl_dirty_page(pl, cur_pg)); 2641 /* Find sequential pages of the same state. */ 2642 for (next_pg = cur_pg + 1; next_pg < last_pg; 2643 next_pg++) { 2644 if (was_valid != upl_valid_page(pl, next_pg)) 2645 break; 2646 if (was_valid) { 2647 if (was_dirty != upl_dirty_page(pl, 2648 next_pg)) 2649 break; 2650 } 2651 } 2652 count = (next_pg - cur_pg) << PAGE_SHIFT; 2653 /* 2654 * For a set of pages that were invalid and hence we 2655 * just filled them with data we commit and clean them 2656 * unless no caching is requested in which case we dump 2657 * them. 2658 * 2659 * For a set of pages that were already valid and hence 2660 * we did not touch we commit them taking care to 2661 * preserve any dirty state unless the pages were clean 2662 * and no caching is requested in which case we dump 2663 * them. 2664 */ 2665 if (ioflags & IO_NOCACHE && !was_dirty) { 2666 ubc_upl_abort_range(upl, cur_pg << PAGE_SHIFT, 2667 count, UPL_ABORT_DUMP_PAGES | 2668 UPL_ABORT_FREE_ON_EMPTY); 2669 continue; 2670 } 2671 commit_flags = UPL_COMMIT_FREE_ON_EMPTY | 2672 UPL_COMMIT_INACTIVATE; 2673 if (!was_valid) 2674 commit_flags |= UPL_COMMIT_CLEAR_DIRTY; 2675 else if (was_dirty) 2676 commit_flags |= UPL_COMMIT_SET_DIRTY; 2677 ubc_upl_commit_range(upl, cur_pg << PAGE_SHIFT, count, 2678 commit_flags); 2679 } while (next_pg < last_pg); 2680 } while ((start_count = uio_resid(uio)) && 2681 (ofs = uio_offset(uio)) < data_size); 2682 ntfs_debug("Done."); 2683err: 2684 lck_rw_unlock_shared(&raw_ni->lock); 2685 (void)vnode_put(raw_ni->vn); 2686 return err; 2687unm_err: 2688 kerr = ubc_upl_unmap(upl); 2689 if (kerr != KERN_SUCCESS) 2690 ntfs_error(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", 2691 (int)kerr); 2692abort_err: 2693 /* 2694 * We handle each page independently for simplicity. We do not care 2695 * for performance given this is an error code path. 2696 * 2697 * For a page that was not valid, we dump it as it still does not 2698 * contain valid data. For a page that was valid, we release it 2699 * without modification as we have not touched it unless no caching is 2700 * requested and the page was clean in which case we dump it. 2701 */ 2702 last_pg = start_count >> PAGE_SHIFT; 2703 for (cur_pg = 0; cur_pg < last_pg; cur_pg++) { 2704 int abort_flags; 2705 2706 abort_flags = UPL_ABORT_FREE_ON_EMPTY; 2707 if (!upl_valid_page(pl, cur_pg) || (ioflags & IO_NOCACHE && 2708 !upl_dirty_page(pl, cur_pg))) 2709 abort_flags |= UPL_ABORT_DUMP_PAGES; 2710 ubc_upl_abort_range(upl, cur_pg << PAGE_SHIFT, PAGE_SIZE, 2711 abort_flags); 2712 } 2713 goto err; 2714} 2715 2716// TODO: Rename to ntfs_inode_read and move to ntfs_inode.[hc]? 2717/** 2718 * ntfs_read - read a number of bytes from an inode into memory 2719 * @ni: ntfs inode whose data to read into memory 2720 * @uio: destination in which to return the read data 2721 * @ioflags: flags further describing the read request 2722 * @locked: if true the ntfs inode lock is already taken for reading 2723 * 2724 * Read uio_resid(@uio) bytes from the ntfs inode @ni, starting at byte offset 2725 * uio_offset(@uio) into the inode into the destination buffer pointed to by 2726 * @uio. 2727 * 2728 * The flags in @ioflags further describe the read request. The following 2729 * ioflags are currently defined in OS X kernel (a lot of them are not 2730 * applicable to VNOP_READ() however): 2731 * IO_UNIT - Do i/o as atomic unit. 2732 * IO_APPEND - Append write to end. 2733 * IO_SYNC - Do i/o synchronously. 2734 * IO_NODELOCKED - Underlying node already locked. 2735 * IO_NDELAY - FNDELAY flag set in file table. 2736 * IO_NOZEROFILL - F_SETSIZE fcntl uses this to prevent zero filling. 2737 * IO_TAILZEROFILL - Zero fills at the tail of write. 2738 * IO_HEADZEROFILL - Zero fills at the head of write. 2739 * IO_NOZEROVALID - Do not zero fill if valid page. 2740 * IO_NOZERODIRTY - Do not zero fill if page is dirty. 2741 * IO_CLOSE - The i/o was issued from close path. 2742 * IO_NOCACHE - Same effect as VNOCACHE_DATA, but only for this i/o. 2743 * IO_RAOFF - Same effect as VRAOFF, but only for this i/o. 2744 * IO_DEFWRITE - Defer write if vfs.defwrite is set. 2745 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 2746 * 2747 * For encrypted attributes we abort for now as we do not support them yet. 2748 * 2749 * For non-resident attributes we use cluster_read_ext() which deals with both 2750 * normal and multi sector transfer protected attributes and 2751 * ntfs_vnop_read_compressed() which deals with compressed attributes. 2752 * 2753 * For resident attributes we read the data from the vm page cache and if it is 2754 * not there we cause the vm page cache to be populated by reading the buffer 2755 * at offset 0 in the attribute. 2756 * 2757 * Return 0 on success and errno on error. 2758 * 2759 * Note it is up to the caller to verify that reading from the inode @ni makes 2760 * sense. We cannot do the verification inside ntfs_read() as it is called 2761 * from various VNOPs which all have different requirements. For example 2762 * VNOP_READLINK(), i.e. ntfs_vnop_readlink(), needs to only allow S_ISLNK() 2763 * inodes whilst VNOP_READ(), i.e. ntfs_vnop_read(), needs to not allow 2764 * S_ISLNK() but needs to allow S_IFREG() instead but only if it is not a 2765 * system file. 2766 */ 2767static errno_t ntfs_read(ntfs_inode *ni, uio_t uio, const int ioflags, 2768 const BOOL locked) 2769{ 2770 s64 size; 2771 user_ssize_t start_count; 2772 off_t ofs; 2773 vnode_t vn = ni->vn; 2774 ntfs_inode *base_ni; 2775 upl_t upl; 2776 upl_page_info_array_t pl; 2777 u8 *kaddr; 2778 int err, count; 2779 2780 ofs = uio_offset(uio); 2781 start_count = uio_resid(uio); 2782 base_ni = ni; 2783 if (NInoAttr(ni)) 2784 base_ni = ni->base_ni; 2785 ntfs_debug("Entering for file inode 0x%llx, offset 0x%llx, count " 2786 "0x%llx, ioflags 0x%x, locked is %s.", 2787 (unsigned long long)ni->mft_no, 2788 (unsigned long long)ofs, 2789 (unsigned long long)start_count, ioflags, 2790 locked ? "true" : "false"); 2791 /* 2792 * Protect against changes in initialized_size and thus against 2793 * truncation also. 2794 */ 2795 if (!locked) 2796 lck_rw_lock_shared(&ni->lock); 2797 /* Do not allow messing with the inode once it has been deleted. */ 2798 if (NInoDeleted(ni)) { 2799 if (!locked) 2800 lck_rw_unlock_shared(&ni->lock); 2801 /* Remove the inode from the name cache. */ 2802 cache_purge(ni->vn); 2803 return ENOENT; 2804 } 2805 /* 2806 * TODO: This check may no longer be necessary now that we lock against 2807 * changes in initialized size and thus truncation... Revisit this 2808 * issue when the write code has been written and remove the check if 2809 * appropriate simply using ubc_getsize(vn); without the size_lock. 2810 */ 2811 lck_spin_lock(&ni->size_lock); 2812 size = ubc_getsize(vn); 2813 if (size > ni->data_size) 2814 size = ni->data_size; 2815 lck_spin_unlock(&ni->size_lock); 2816 /* 2817 * If nothing was requested or the request starts at or beyond the end 2818 * of the attribute, we do not need to do anything. 2819 */ 2820 if (!start_count || ofs >= size) { 2821 err = 0; 2822 goto err; 2823 } 2824 /* Cannot read from a negative offset. */ 2825 if (ofs < 0) { 2826 err = EINVAL; 2827 goto err; 2828 } 2829 /* TODO: Deny access to encrypted attributes, just like NT4. */ 2830 if (NInoEncrypted(ni)) { 2831 ntfs_warning(ni->vol->mp, "Denying access to encrypted " 2832 "attribute (EACCES)."); 2833 err = EACCES; 2834 goto err; 2835 } 2836 if (NInoNonResident(ni)) { 2837 int (*callback)(buf_t, void *); 2838 2839 if (NInoCompressed(ni) && !NInoRaw(ni)) { 2840 err = ntfs_vnop_read_compressed(ni, uio, size, ioflags); 2841 if (!err) 2842 ntfs_debug("Done (ntfs_vnop_read_compressed()" 2843 ")."); 2844 else 2845 ntfs_error(ni->vol->mp, "Failed (" 2846 "ntfs_vnop_read_compressed(), " 2847 "error %d).", err); 2848 goto err; 2849 } 2850 callback = NULL; 2851 if (NInoMstProtected(ni) || NInoEncrypted(ni)) 2852 callback = ntfs_cluster_iodone; 2853 err = cluster_read_ext(vn, uio, size, ioflags, callback, NULL); 2854 if (!err) 2855 ntfs_debug("Done (cluster_read_ext())."); 2856 else 2857 ntfs_error(ni->vol->mp, "Failed for file inode " 2858 "0x%llx, start offset 0x%llx, start " 2859 "count 0x%llx, now offset 0x%llx, " 2860 "now count 0x%llx, ioflags 0x%x " 2861 "(cluster_read_ext(), error %d).", 2862 (unsigned long long)ni->mft_no, 2863 (unsigned long long)ofs, 2864 (unsigned long long)start_count, 2865 (unsigned long long)uio_offset(uio), 2866 (unsigned long long)uio_resid(uio), 2867 ioflags, err); 2868 goto err; 2869 } /* else if (!NInoNonResident(ni)) */ 2870 /* 2871 * That attribute is resident thus we have to deal with it by 2872 * ourselves. First of all, try to copy the data from the vm page 2873 * cache. This will work on the second and all later reads so this is 2874 * the hot path. If the attribute has not been accessed at all before 2875 * or its cached pages were dropped due to vm pressure this will fail 2876 * to copy any data due to the lack of a valid page and we will drop 2877 * into the slow path. 2878 */ 2879 size -= ofs; 2880 if (size > start_count) 2881 size = start_count; 2882 if (size > PAGE_SIZE) { 2883 ntfs_warning(ni->vol->mp, "Unexpected count 0x%llx > PAGE_SIZE " 2884 "0x%x, overriding it to PAGE_SIZE.", 2885 (unsigned long long)size, PAGE_SIZE); 2886 size = PAGE_SIZE; 2887 } 2888 count = size; 2889 err = cluster_copy_ubc_data(vn, uio, &count, 0); 2890 if (err) { 2891 /* The copying (uiomove()) failed with an error, abort. */ 2892 ntfs_error(ni->vol->mp, "cluster_copy_ubc_data() failed " 2893 "(error %d).", err); 2894 goto err; 2895 } 2896 /* 2897 * @count is now set to the number of bytes remaining to be 2898 * transferred. If it is zero, it means we are done. Note it is 2899 * possible that there is more data requested, i.e. uio_resid(uio) > 0, 2900 * but that just means the request goes beyond the end of the 2901 * attribute. 2902 */ 2903 if (!count) { 2904 ntfs_debug("Done (resident, cached, returned 0x%llx bytes).", 2905 (unsigned long long)size); 2906 goto err; 2907 } 2908 /* 2909 * We failed to transfer everything. That really means we failed to 2910 * transfer anything at all as we are guaranteed that a resident 2911 * attribute is smaller than a page thus either the page is there and 2912 * valid and we transfer everything or it is not and we transfer 2913 * nothing. 2914 */ 2915 if (count != size) { 2916 ntfs_warning(ni->vol->mp, "Unexpected partial transfer from " 2917 "cached page (size 0x%llx, count 0x%x).", 2918 (unsigned long long)size, count); 2919 ofs = uio_offset(uio); 2920 } 2921 /* 2922 * The page is not in cache or is not valid. We need to bring it into 2923 * cache and make it valid so we can then copy the data out. The 2924 * easiest way to do this is to just map the page which will take care 2925 * of everything for us. We can than uiomove() straight out of the 2926 * page into the @uio and then unmap the page again. 2927 * 2928 * Note this will take the inode lock again but this is ok as in both 2929 * cases the lock is taken shared. 2930 */ 2931 err = ntfs_page_map(ni, 0, &upl, &pl, &kaddr, FALSE); 2932 if (err) { 2933 ntfs_error(ni->vol->mp, "Failed to map page (error %d).", err); 2934 goto err; 2935 } 2936 err = uiomove((caddr_t)(kaddr + ofs), count, uio); 2937 ntfs_page_unmap(ni, upl, pl, FALSE); 2938 if (!err) 2939 ntfs_debug("Done (resident, not cached, returned 0x%llx " 2940 "bytes).", (unsigned long long)size - 2941 uio_resid(uio)); 2942 else 2943 ntfs_error(ni->vol->mp, "uiomove() failed (error %d).", err); 2944err: 2945 /* 2946 * Update the last_access_time (atime) if something was read and this 2947 * is the base ntfs inode or it is a named stream (this is what HFS+ 2948 * does, too). 2949 * 2950 * Skip the update if atime updates are disabled via the noatime mount 2951 * option or the volume is read only or this is a symbolic link. 2952 * 2953 * Also, skip the core system files except for the root directory. 2954 */ 2955 if (uio_resid(uio) < start_count && !NVolReadOnly(ni->vol) && 2956 !(vfs_flags(ni->vol->mp) & MNT_NOATIME) && 2957 !S_ISLNK(base_ni->mode) && 2958 (ni == base_ni || ni->type == AT_DATA)) { 2959 BOOL need_update_time; 2960 2961 need_update_time = TRUE; 2962 if (ni->vol->major_ver > 1) { 2963 if (base_ni->mft_no <= FILE_Extend && 2964 base_ni != ni->vol->root_ni) 2965 need_update_time = FALSE; 2966 } else { 2967 if (base_ni->mft_no <= FILE_UpCase && 2968 base_ni != ni->vol->root_ni) 2969 need_update_time = FALSE; 2970 } 2971 if (need_update_time) { 2972 base_ni->last_access_time = ntfs_utc_current_time(); 2973 NInoSetDirtyTimes(base_ni); 2974 } 2975 } 2976 if (!locked) 2977 lck_rw_unlock_shared(&ni->lock); 2978 return err; 2979} 2980 2981/** 2982 * ntfs_vnop_read - read a number of bytes from a file into memory 2983 * @a: arguments to read function 2984 * 2985 * @a contains: 2986 * vnode_t a_vp; vnode of file whose data to read into memory 2987 * uio_t a_uio; destination in which to return the read data 2988 * int a_ioflag; flags further describing the read request 2989 * vfs_context_t a_context; 2990 * 2991 * Read uio_resid(@a->a_uio) bytes from the vnode @a-a_vp, starting at byte 2992 * offset uio_offset(@a->a_uio) into the vnode into the destination buffer 2993 * pointed to by @uio. 2994 * 2995 * The flags in @a->a_ioflag further describe the read request. The following 2996 * ioflags are currently defined in OS X kernel (a lot of them are not 2997 * applicable to VNOP_READ() however): 2998 * IO_UNIT - Do i/o as atomic unit. 2999 * IO_APPEND - Append write to end. 3000 * IO_SYNC - Do i/o synchronously. 3001 * IO_NODELOCKED - Underlying node already locked. 3002 * IO_NDELAY - FNDELAY flag set in file table. 3003 * IO_NOZEROFILL - F_SETSIZE fcntl uses this to prevent zero filling. 3004 * IO_TAILZEROFILL - Zero fills at the tail of write. 3005 * IO_HEADZEROFILL - Zero fills at the head of write. 3006 * IO_NOZEROVALID - Do not zero fill if valid page. 3007 * IO_NOZERODIRTY - Do not zero fill if page is dirty. 3008 * IO_CLOSE - The i/o was issued from close path. 3009 * IO_NOCACHE - Same effect as VNOCACHE_DATA, but only for this i/o. 3010 * IO_RAOFF - Same effect as VRAOFF, but only for this i/o. 3011 * IO_DEFWRITE - Defer write if vfs.defwrite is set. 3012 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 3013 * 3014 * For encrypted attributes we abort for now as we do not support them yet. 3015 * 3016 * For non-resident attributes we use cluster_read_ext() which deals with both 3017 * normal and multi sector transfer protected attributes and 3018 * ntfs_vnop_read_compressed() which deals with compressed attributes. 3019 * 3020 * For resident attributes we read the data from the vm page cache and if it is 3021 * not there we cause the vm page cache to be populated by reading the buffer 3022 * at offset 0 in the attribute. 3023 * 3024 * Return 0 on success and errno on error. 3025 */ 3026static int ntfs_vnop_read(struct vnop_read_args *a) 3027{ 3028 vnode_t vn = a->a_vp; 3029 ntfs_inode *ni = NTFS_I(vn); 3030 3031 if (!ni) { 3032 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 3033 return EINVAL; 3034 } 3035 /* 3036 * We can only read from regular files and named streams. 3037 * 3038 * Also, do not allow reading from system files or mst protected 3039 * attributes. 3040 */ 3041 if (vnode_issystem(vn) || NInoMstProtected(ni) || 3042 (!S_ISREG(ni->mode) && !(NInoAttr(ni) && 3043 ni->type == AT_DATA))) { 3044 if (S_ISDIR(ni->mode)) 3045 return EISDIR; 3046 return EPERM; 3047 } 3048 return (int)ntfs_read(ni, a->a_uio, a->a_ioflag, FALSE); 3049} 3050 3051// TODO: Rename to ntfs_inode_write and move to ntfs_inode.[hc]? 3052/** 3053 * ntfs_write - write a number of bytes from a memory buffer into a file 3054 * @ni: ntfs inode to write to 3055 * @uio: source containing the data to write 3056 * @ioflags: flags further describing the write request 3057 * @write_locked: if true the ntfs inode lock is already taken for writing 3058 * 3059 * Write uio_resid(@uio) bytes from the source buffer specified by @uio to the 3060 * ntfs inode @ni, starting at byte offset uio_offset(@uio) into the inode. 3061 * 3062 * The flags in @ioflags further describe the write request. The following 3063 * ioflags are currently defined in OS X kernel (not all of them are applicable 3064 * to VNOP_WRITE() however): 3065 * IO_UNIT - Do i/o as atomic unit. 3066 * IO_APPEND - Append write to end. 3067 * IO_SYNC - Do i/o synchronously. 3068 * IO_NODELOCKED - Underlying node already locked. 3069 * IO_NDELAY - FNDELAY flag set in file table. 3070 * IO_NOZEROFILL - F_SETSIZE fcntl uses this to prevent zero filling. 3071 * IO_TAILZEROFILL - Zero fills at the tail of write. 3072 * IO_HEADZEROFILL - Zero fills at the head of write. 3073 * IO_NOZEROVALID - Do not zero fill if valid page. 3074 * IO_NOZERODIRTY - Do not zero fill if page is dirty. 3075 * IO_CLOSE - The i/o was issued from close path. 3076 * IO_NOCACHE - Same effect as VNOCACHE_DATA, but only for this i/o. 3077 * IO_RAOFF - Same effect as VRAOFF, but only for this i/o. 3078 * IO_DEFWRITE - Defer write if vfs.defwrite is set. 3079 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 3080 * 3081 * For compressed and encrypted attributes we abort for now as we do not 3082 * support them yet. 3083 * 3084 * For non-resident attributes we use cluster_write_ext() which deals with 3085 * normal attributes. 3086 * 3087 * Return 0 on success and errno on error. 3088 * 3089 * Note it is up to the caller to verify that writing to the inode @ni makes 3090 * sense. We cannot do the verification inside ntfs_write() as it is called 3091 * from various VNOPs which all have different requirements. For example 3092 * VNOP_SYMLINK(), i.e. ntfs_vnop_symlink(), needs to write to S_ISLNK() inodes 3093 * whilst VNOP_WRITE(), i.e. ntfs_vnop_write(), needs to not allow S_ISLNK() 3094 * but needs to allow S_IFREG() instead but only if it is not a system file. 3095 */ 3096static errno_t ntfs_write(ntfs_inode *ni, uio_t uio, int ioflags, 3097 BOOL write_locked) 3098{ 3099 s64 old_size, size, end, nr_truncated; 3100 user_ssize_t old_count, count; 3101 off_t old_ofs, ofs; 3102 vnode_t vn = ni->vn; 3103 ntfs_inode *base_ni; 3104 upl_t upl; 3105 upl_page_info_array_t pl; 3106 u8 *kaddr; 3107 int cnt; 3108 errno_t err; 3109 BOOL was_locked, need_uptodate; 3110 3111 /* Do not allow writing if mounted read-only. */ 3112 if (NVolReadOnly(ni->vol)) 3113 return EROFS; 3114 nr_truncated = 0; 3115 ofs = old_ofs = uio_offset(uio); 3116 count = old_count = uio_resid(uio); 3117 ntfs_debug("Entering for file inode 0x%llx, offset 0x%llx, count " 3118 "0x%llx, ioflags 0x%x, write_locked is %s.", 3119 (unsigned long long)ni->mft_no, 3120 (unsigned long long)ofs, 3121 (unsigned long long)count, ioflags, 3122 write_locked ? "true" : "false"); 3123 /* If nothing to do return success. */ 3124 if (!count) 3125 return 0; 3126 /* Cannot write to a negative offset. */ 3127 if (ofs < 0) 3128 return EINVAL; 3129 /* TODO: Deny access to encrypted attributes, just like NT4. */ 3130 if (NInoEncrypted(ni)) { 3131 ntfs_warning(ni->vol->mp, "Denying write to encrypted " 3132 "attribute (EACCES)."); 3133 return EACCES; 3134 } 3135 /* TODO: We do not support writing to compressed files. */ 3136 if (NInoCompressed(ni)) { 3137 ntfs_error(ni->vol->mp, "Writing to compressed files is not " 3138 "implemented yet. Sorry."); 3139 return ENOTSUP; 3140 } 3141#if 1 // TODO: Remove this when sparse support is done... 3142 if (NInoSparse(ni)) 3143 return ENOTSUP; 3144#endif 3145 base_ni = ni; 3146 if (NInoAttr(ni)) 3147 base_ni = ni->base_ni; 3148 /* The first byte after the write. */ 3149 end = ofs + count; 3150 /* 3151 * If we are going to extend the initialized size take the inode lock 3152 * for writing and take it for reading otherwise. 3153 * 3154 * Appending will always cause the initialized size to be extended thus 3155 * always take the lock for writing. 3156 * 3157 * Writing into holes requires us to take the lock for writing thus if 3158 * this is a sparse file take the lock for writing just in case. 3159 */ 3160 was_locked = write_locked; 3161 if (ioflags & IO_APPEND) { 3162 if (!was_locked) { 3163 lck_rw_lock_exclusive(&ni->lock); 3164 write_locked = TRUE; 3165 } 3166 /* 3167 * Do not allow messing with the inode once it has been 3168 * deleted. 3169 */ 3170 if (NInoDeleted(ni)) { 3171 if (!was_locked) 3172 lck_rw_unlock_exclusive(&ni->lock); 3173 /* Remove the inode from the name cache. */ 3174 cache_purge(ni->vn); 3175 return ENOENT; 3176 } 3177 lck_spin_lock(&ni->size_lock); 3178 ofs = ni->data_size; 3179 lck_spin_unlock(&ni->size_lock); 3180 uio_setoffset(uio, ofs); 3181 ntfs_debug("Write to mft_no 0x%llx, IO_APPEND flag is set, " 3182 "setting uio_offset() to file size 0x%llx.", 3183 (unsigned long long)ni->mft_no, 3184 (unsigned long long)ofs); 3185 /* Update the first byte after the write with the new offset. */ 3186 end = ofs + count; 3187 } else { 3188 if (!was_locked) { 3189 if (NInoSparse(ni)) { 3190 lck_rw_lock_exclusive(&ni->lock); 3191 write_locked = TRUE; 3192 } else { 3193 lck_rw_lock_shared(&ni->lock); 3194 write_locked = FALSE; 3195 } 3196 } 3197recheck_deleted: 3198 /* 3199 * Do not allow messing with the inode once it has been 3200 * deleted. 3201 */ 3202 if (NInoDeleted(ni)) { 3203 if (!was_locked) { 3204 if (write_locked) 3205 lck_rw_unlock_exclusive(&ni->lock); 3206 else 3207 lck_rw_unlock_shared(&ni->lock); 3208 } 3209 /* Remove the inode from the name cache. */ 3210 cache_purge(ni->vn); 3211 return ENOENT; 3212 } 3213 lck_spin_lock(&ni->size_lock); 3214 size = ni->initialized_size; 3215 lck_spin_unlock(&ni->size_lock); 3216 if (!write_locked && end > size) { 3217 /* If we fail to convert the lock, take it. */ 3218 if (!lck_rw_lock_shared_to_exclusive(&ni->lock)) 3219 lck_rw_lock_exclusive(&ni->lock); 3220 write_locked = TRUE; 3221 goto recheck_deleted; 3222 } 3223 ntfs_debug("Mft_no 0x%llx, inode lock taken for %s.", 3224 (unsigned long long)ni->mft_no, 3225 write_locked ? "writing" : "reading"); 3226 } 3227 /* 3228 * We do not want any form of zero filling to happen at the starting 3229 * offset of the write as we sort this out ourselves. 3230 * 3231 * Further, we never want to zero fill at the end of the write as this 3232 * is pointless. We automatically get zero filling at the end of the 3233 * page when a page is read in and when the initialized size is 3234 * extended. 3235 */ 3236 ioflags &= ~(IO_HEADZEROFILL | IO_TAILZEROFILL); 3237 /* 3238 * We do not want to zero any valid/dirty pages as they could already 3239 * have new data written via mmap() for example and we do not want to 3240 * lose that. 3241 */ 3242 ioflags |= IO_NOZEROVALID | IO_NOZERODIRTY; 3243 lck_spin_lock(&ni->size_lock); 3244 old_size = ni->data_size; 3245 size = ni->allocated_size; 3246 lck_spin_unlock(&ni->size_lock); 3247 /* 3248 * If this is a sparse attribute and the write overlaps the existing 3249 * allocated size we need to fill any holes overlapping the write. We 3250 * can skip resident attributes as they cannot have sparse regions. 3251 * 3252 * As allocated size goes in units of clusters we need to round down 3253 * the start offset to the nearest cluster boundary and we need to 3254 * round up the end offset to the next cluster boundary. 3255 */ 3256 if (NInoSparse(ni) && NInoNonResident(ni) && 3257 (ofs & ~ni->vol->cluster_size_mask) < size) { 3258 s64 aligned_end, new_end; 3259 3260 if (!write_locked) 3261 panic("%s(): !write_locked\n", __FUNCTION__); 3262 aligned_end = (end + ni->vol->cluster_size_mask) & 3263 ~ni->vol->cluster_size_mask; 3264 /* 3265 * Only need to instantiate holes up to the allocated size 3266 * itself. Everything else is an extension and will be dealt 3267 * with by ntfs_attr_extend_allocation() below. 3268 */ 3269 if (aligned_end > size) 3270 aligned_end = size; 3271 err = ntfs_attr_instantiate_holes(ni, 3272 ofs & ~ni->vol->cluster_size_mask, aligned_end, 3273 &new_end, ioflags & IO_UNIT); 3274 if (err) { 3275 ntfs_error(ni->vol->mp, "Cannot perform write to " 3276 "mft_no 0x%llx because instantiation " 3277 "of sparse regions failed (error %d).", 3278 (unsigned long long)ni->mft_no, err); 3279 uio_setoffset(uio, old_ofs); 3280 uio_setresid(uio, old_count); 3281 if (!was_locked) 3282 lck_rw_unlock_exclusive(&ni->lock); 3283 return err; 3284 } 3285 /* If the instantiation was partial, truncate the write. */ 3286 if (new_end < aligned_end) { 3287 s64 new_count; 3288 3289 if (ioflags & IO_UNIT) 3290 panic("%s(): new_end < aligned_end && " 3291 "ioflags & IO_UNIT\n", 3292 __FUNCTION__); 3293 ntfs_debug("Truncating write to mft_no 0x%llx because " 3294 "instantiation of sparse regions was " 3295 "only partially completed.", 3296 (unsigned long long)ni->mft_no); 3297 if (new_end > end) 3298 panic("%s(): new_end > end\n", __FUNCTION__); 3299 end = new_end; 3300 new_count = new_end - ofs; 3301 if (new_count >= count) 3302 panic("%s(): new_count >= count\n", 3303 __FUNCTION__); 3304 nr_truncated += count - new_count; 3305 count = new_count; 3306 uio_setresid(uio, new_count); 3307 } 3308 } 3309 /* 3310 * If the write goes beyond the allocated size, extend the allocation 3311 * to cover the whole of the write, rounded up to the nearest cluster. 3312 */ 3313 if (end > size) { 3314 if (!write_locked) 3315 panic("%s(): !write_locked\n", __FUNCTION__); 3316 /* Extend the allocation without changing the data size. */ 3317 err = ntfs_attr_extend_allocation(ni, end, -1, ofs, NULL, 3318 &size, ioflags & IO_UNIT); 3319 if (!err) { 3320 if (ofs >= size) 3321 panic("%s(): ofs >= size\n", __FUNCTION__); 3322 /* If the extension was partial truncate the write. */ 3323 if (end > size) { 3324 s64 new_count; 3325 3326 if (ioflags & IO_UNIT) 3327 panic("%s(): end > size && " 3328 "ioflags & IO_UNIT\n", 3329 __FUNCTION__); 3330 ntfs_debug("Truncating write to mft_no 0x%llx " 3331 "because the allocation was " 3332 "only partially extended.", 3333 (unsigned long long)ni->mft_no); 3334 end = size; 3335 new_count = size - ofs; 3336 if (new_count >= count) 3337 panic("%s(): new_count >= count\n", 3338 __FUNCTION__); 3339 nr_truncated += count - new_count; 3340 count = new_count; 3341 uio_setresid(uio, new_count); 3342 } 3343 } else /* if (err) */ { 3344 lck_spin_lock(&ni->size_lock); 3345 size = ni->allocated_size; 3346 lck_spin_unlock(&ni->size_lock); 3347 /* Perform a partial write if possible or fail. */ 3348 if (ofs < size && !(ioflags & IO_UNIT)) { 3349 s64 new_count; 3350 3351 ntfs_debug("Truncating write to mft_no 0x%llx " 3352 "because extending the " 3353 "allocation failed (error %d).", 3354 (unsigned long long)ni->mft_no, 3355 err); 3356 end = size; 3357 new_count = size - ofs; 3358 if (new_count >= count) 3359 panic("%s(): new_count >= count\n", 3360 __FUNCTION__); 3361 nr_truncated += count - new_count; 3362 count = new_count; 3363 uio_setresid(uio, new_count); 3364 } else { 3365 ntfs_error(ni->vol->mp, "Cannot perform write " 3366 "to mft_no 0x%llx because " 3367 "extending the allocation " 3368 "failed (error %d).", 3369 (unsigned long long)ni->mft_no, 3370 err); 3371 goto abort; 3372 } 3373 } 3374 } 3375 /* 3376 * If the write starts beyond the initialized size, extend it up to the 3377 * beginning of the write and initialize all non-sparse space between 3378 * the old initialized size and the new one. This automatically also 3379 * increments the data size as well as the ubc size to keep it above or 3380 * equal to the initialized size. 3381 */ 3382 lck_spin_lock(&ni->size_lock); 3383 size = ni->initialized_size; 3384 lck_spin_unlock(&ni->size_lock); 3385 if (ofs > size) { 3386 if (!write_locked) 3387 panic("%s(): !write_locked 2\n", __FUNCTION__); 3388 err = ntfs_attr_extend_initialized(ni, ofs); 3389 if (err) { 3390 ntfs_error(ni->vol->mp, "Cannot perform write to " 3391 "mft_no 0x%llx because extending the " 3392 "initialized size failed (error %d).", 3393 (unsigned long long)ni->mft_no, err); 3394 goto abort; 3395 } 3396 size = ofs; 3397 } 3398 if (NInoNonResident(ni)) { 3399 int (*callback)(buf_t, void *); 3400 3401 if (NInoCompressed(ni) && !NInoRaw(ni)) { 3402#if 0 3403 err = ntfs_vnop_write_compressed(ni, uio, size, 3404 ioflags); 3405 if (!err) 3406 ntfs_debug("Done (ntfs_vnop_write_compressed()" 3407 ")."); 3408 else 3409 ntfs_error(ni->vol->mp, "Failed (" 3410 "ntfs_vnop_write_compressed(), " 3411 "error %d).", err); 3412#endif 3413 /* 3414 * TODO: At present we should never get here for 3415 * compressed files as this case is aborted at the 3416 * start of the function. 3417 */ 3418 panic("%s(): NInoCompressed(ni) && !NInoRaw(ni)\n", 3419 __FUNCTION__); 3420 } 3421 callback = NULL; 3422 if (NInoEncrypted(ni)) { 3423 callback = ntfs_cluster_iodone; 3424 /* 3425 * TODO: At present we should never get here for 3426 * encrypted files as this case is aborted at the start 3427 * of the function. 3428 */ 3429 panic("%s(): NInoEncrypted(ni)\n", __FUNCTION__); 3430 } 3431 /* Determine the new file size. */ 3432 size = ubc_getsize(vn); 3433 if (end > size) 3434 size = end; 3435 /* 3436 * Note the first size is the original file size and the second 3437 * file size is the new file size when the write is complete. 3438 */ 3439 err = cluster_write_ext(vn, uio, ubc_getsize(vn), size, 0, 0, 3440 ioflags, callback, NULL); 3441 if (err) { 3442 /* 3443 * There was an error. We do not know where. Ensure 3444 * everything is set up as if the write never happened. 3445 */ 3446 ntfs_error(ni->vol->mp, "Failed (cluster_write_ext(), " 3447 "error %d).", err); 3448 goto abort; 3449 } 3450 goto done; 3451 } 3452 /* 3453 * The attribute is resident thus we have to deal with it by ourselves. 3454 * First of all, try to copy the data to the vm page cache. This will 3455 * work on the second and all later writes so this is the hot path. If 3456 * the attribute has not been accessed at all before or its cached 3457 * pages were dropped due to vm pressure this will fail to copy any 3458 * data due to the lack of a valid page and we will drop into the slow 3459 * path. 3460 */ 3461 if (ofs > PAGE_SIZE) 3462 panic("%s(): ofs > PAGE_SIZE\n", __FUNCTION__); 3463 cnt = (int)count; 3464 if (count > PAGE_SIZE - ofs) { 3465 cnt = PAGE_SIZE - ofs; 3466 ntfs_warning(ni->vol->mp, "Unexpected count (0x%llx) > " 3467 "PAGE_SIZE - ofs (0x%x), overriding it to " 3468 "PAGE_SIZE - ofs.", (unsigned long long)count, 3469 cnt); 3470 } 3471 /* 3472 * Note we pass mark_dirty = 1 (the last parameter) which means the 3473 * pages that are written to will be marked dirty. 3474 */ 3475 err = cluster_copy_ubc_data(vn, uio, &cnt, 1); 3476 if (err) { 3477 /* 3478 * The copying (uiomove()) failed with an error. Ensure 3479 * everything is set up as if the write never happened. 3480 */ 3481 ntfs_error(ni->vol->mp, "cluster_copy_ubc_data() failed " 3482 "(error %d).", err); 3483 goto abort; 3484 } 3485 /* 3486 * @cnt is now set to the number of bytes remaining to be transferred. 3487 * If it is zero, it means we are done. 3488 */ 3489 if (!cnt) 3490 goto done; 3491 /* 3492 * We failed to transfer everything. That really means we failed to 3493 * transfer anything at all as we are guaranteed that a resident 3494 * attribute is smaller than a page thus either the page is there and 3495 * valid and we transfer everything or it is not and we transfer 3496 * nothing. 3497 */ 3498 if (cnt != count) { 3499 ntfs_warning(ni->vol->mp, "Unexpected partial transfer to " 3500 "cached page (count 0x%llx, cnt 0x%x).", 3501 (unsigned long long)count, cnt); 3502 /* Ensure everything is as it was before. */ 3503 uio_setoffset(uio, old_ofs); 3504 uio_setresid(uio, old_count - nr_truncated); 3505 } 3506 /* 3507 * The page is not in cache or is not valid. We need to bring it into 3508 * cache and make it valid so we can then copy the data in. The 3509 * easiest way to do this is to just map the page which will take care 3510 * of everything for us. We can then uiomove() straight into the page 3511 * from the @uio and then mark the page dirty and unmap it again. 3512 * 3513 * As an optimization, if the write covers the whole existing attribute 3514 * we grab the page without bringing it uptodate if it is not valid 3515 * already thus saving a pagein from disk. 3516 */ 3517 need_uptodate = (ofs || end < size); 3518 err = ntfs_page_map_ext(ni, 0, &upl, &pl, &kaddr, need_uptodate, TRUE); 3519 if (err) { 3520 ntfs_error(ni->vol->mp, "Failed to map page (error %d).", err); 3521 goto abort; 3522 } 3523 err = uiomove((caddr_t)(kaddr + ofs), cnt, uio); 3524 if (err) { 3525 /* 3526 * If we just caused the page to exist and did not bring it 3527 * up-to-date or caching is disabled on the vnode or for this 3528 * i/o, dump the page. Otherwise release it back to the VM. 3529 */ 3530 if (upl_valid_page(pl, 0) || (need_uptodate && 3531 !vnode_isnocache(vn) && 3532 !(ioflags & IO_NOCACHE))) 3533 ntfs_page_unmap(ni, upl, pl, FALSE); 3534 else 3535 ntfs_page_dump(ni, upl, pl); 3536 /* 3537 * The copying (uiomove()) failed with an error. Ensure 3538 * everything is set up as if the write never happened. 3539 */ 3540 ntfs_error(ni->vol->mp, "uiomove() failed (error %d).", err); 3541 goto abort; 3542 } 3543 /* 3544 * If the page is not uptodate and we did not bring it up-to-date when 3545 * mapping it, zero the remainder of the page now thus bringing it 3546 * up-to-date. 3547 */ 3548 if (!need_uptodate && !upl_valid_page(pl, 0)) { 3549 const off_t cur_ofs = uio_offset(uio); 3550 if (cur_ofs > PAGE_SIZE) 3551 panic("%s(): cur_ofs > PAGE_SIZE\n", __FUNCTION__); 3552 bzero(kaddr + cur_ofs, PAGE_SIZE - cur_ofs); 3553 } 3554 /* 3555 * Unmap the page marking it dirty. 3556 * 3557 * Note we leave the page cached even if no caching is requested for 3558 * simplicity. That way we do not need to touch the mft record at all 3559 * and can instead rely on the next sync to propagate the dirty data 3560 * from the page into the mft record and then to disk. In the sync i/o 3561 * case we will call ntfs_inode_sync() at the end of this function. 3562 */ 3563 ntfs_page_unmap(ni, upl, pl, TRUE); 3564done: 3565 /* 3566 * If the write went past the end of the initialized size update it 3567 * both in the ntfs inode and in the base attribute record. 3568 * 3569 * Also update the data size and the ubc size if the write went past 3570 * the end of the data size. Note this is automatically done by 3571 * ntfs_attr_set_initialized_size() so we do not need to do it here. 3572 */ 3573 size = uio_offset(uio); 3574 lck_spin_lock(&ni->size_lock); 3575 if (size > ni->initialized_size) { 3576 lck_spin_unlock(&ni->size_lock); 3577 if (!write_locked) 3578 panic("%s(): !write_locked 3\n", __FUNCTION__); 3579 err = ntfs_attr_set_initialized_size(ni, size); 3580 if (err) { 3581 ntfs_error(ni->vol->mp, "Failed to update the " 3582 "initialized size of mft_no 0x%llx " 3583 "(error %d).", 3584 (unsigned long long)ni->mft_no, err); 3585 /* 3586 * If the write was meant to be atomic, the write 3587 * started beyond the end of the initialized size, or 3588 * nothing was written ensure everything is set up as 3589 * if the write never happened. 3590 */ 3591 lck_spin_lock(&ni->size_lock); 3592 size = ni->initialized_size; 3593 lck_spin_unlock(&ni->size_lock); 3594 if (ioflags & IO_UNIT || old_ofs >= size || 3595 uio_resid(uio) >= old_count) 3596 goto abort; 3597 /* 3598 * Something was written before the initialized size 3599 * thus turn the error into a partial, successful write 3600 * up to the initialized size. 3601 */ 3602 uio_setoffset(uio, size); 3603 uio_setresid(uio, size - old_ofs); 3604 err = 0; 3605 } 3606 } else 3607 lck_spin_unlock(&ni->size_lock); 3608 // TODO: If we wrote anything at all we have to clear the S_ISUID and 3609 // S_ISGID bits in the file mode as a precaution against tampering 3610 // (see xnu/bsd/hfs/hfs_readwrite.c::hfs_vnop_write()). 3611 /* 3612 * Update the last_data_change_time (mtime) and last_mft_change_time 3613 * (ctime) on the base ntfs inode @base_ni unless this is an attribute 3614 * inode update in which case only update the ctime as named stream/ 3615 * extended attribute semantics expect on OS X. 3616 */ 3617 base_ni->last_mft_change_time = ntfs_utc_current_time(); 3618 if (ni == base_ni) 3619 base_ni->last_data_change_time = base_ni->last_mft_change_time; 3620 NInoSetDirtyTimes(base_ni); 3621 /* 3622 * If this is not a directory or it is an encrypted directory, set the 3623 * needs archiving bit except for the core system files. 3624 */ 3625 if (!S_ISDIR(base_ni->mode) || NInoEncrypted(base_ni)) { 3626 BOOL need_set_archive_bit = TRUE; 3627 if (ni->vol->major_ver >= 2) { 3628 if (ni->mft_no <= FILE_Extend) 3629 need_set_archive_bit = FALSE; 3630 } else { 3631 if (ni->mft_no <= FILE_UpCase) 3632 need_set_archive_bit = FALSE; 3633 } 3634 if (need_set_archive_bit) { 3635 base_ni->file_attributes |= FILE_ATTR_ARCHIVE; 3636 NInoSetDirtyFileAttributes(base_ni); 3637 } 3638 } 3639 /* 3640 * If we truncated the write add back the number of truncated bytes to 3641 * the number of bytes remaining. 3642 */ 3643 if (nr_truncated > 0) { 3644 if (ioflags & IO_UNIT) 3645 panic("%s(): ioflags & IO_UNIT\n", __FUNCTION__); 3646 uio_setresid(uio, uio_resid(uio) + nr_truncated); 3647 } 3648 /* 3649 * If the write was partial we need to trim off any extra allocated 3650 * space by truncating the attribute to its old size. We can only have 3651 * extended the allocation if we hold the inode lock for writing so do 3652 * not bother going through this code if we only hold the lock for 3653 * reading. 3654 * 3655 * There is one exception and that is that if the write was meant to be 3656 * atomic a partial write is not acceptable thus we need to abort the 3657 * write completely in this case. 3658 */ 3659 size = uio_resid(uio); 3660 if (write_locked && size > nr_truncated) { 3661 s64 truncate_size; 3662 errno_t err2; 3663 int rflags; 3664 3665 /* 3666 * If the write was meant to be atomic or nothing was written 3667 * reset everything as if the write never happened thus 3668 * releasing any extra space we may have allocated. 3669 */ 3670 if (ioflags & IO_UNIT || size >= old_count) { 3671 if (size > old_count) 3672 panic("%s(): size > old_count\n", __FUNCTION__); 3673abort: 3674 uio_setoffset(uio, old_ofs); 3675 uio_setresid(uio, old_count); 3676 if (!write_locked) { 3677 if (!err) 3678 panic("%s(): !err\n", __FUNCTION__); 3679 goto skip_truncate; 3680 } 3681 truncate_size = old_size; 3682 } else /* if (uio_resid(uio) < old_count) */ { 3683 /* 3684 * At least something was written. Truncate the 3685 * attribute to the successfully written size thus 3686 * releasing any extra space we allocated but ensure we 3687 * do not truncate to less than the old size. 3688 */ 3689 truncate_size = uio_offset(uio); 3690 if (truncate_size < old_size) 3691 truncate_size = old_size; 3692 } 3693 /* 3694 * Truncate the attribute to @truncate_size. 3695 * 3696 * The truncate must be complete or no need to bother at all so 3697 * set the IO_UNIT flag. Also remove unwanted flags. 3698 */ 3699 rflags = (ioflags | IO_UNIT) & ~(IO_APPEND | IO_SYNC | 3700 IO_NOZEROFILL); 3701 err2 = ntfs_attr_resize(ni, truncate_size, rflags, NULL); 3702 if (err2) { 3703 BOOL is_dirty; 3704 3705 /* 3706 * If no other error has occured failing the truncate 3707 * will at worst mean that we have too much allocated 3708 * space which is not a disaster so carry on in this 3709 * case. 3710 * 3711 * If another error has occured any of a number of 3712 * things can now be wrong and in particular if the 3713 * data size is not equal to @truncate_size this is 3714 * very bad news so mark the volume dirty and warn the 3715 * user about it. 3716 */ 3717 is_dirty = (err); 3718 if (is_dirty) { 3719 lck_spin_lock(&ni->size_lock); 3720 if (truncate_size == ni->data_size) 3721 is_dirty = FALSE; 3722 lck_spin_unlock(&ni->size_lock); 3723 } 3724 ntfs_error(ni->vol->mp, "Truncate failed (error %d).%s", 3725 err2, is_dirty ? " Leaving " 3726 "inconsistent data on disk. Unmount " 3727 "and run chkdsk." : ""); 3728 if (is_dirty) 3729 NVolSetErrors(ni->vol); 3730 } 3731 } 3732skip_truncate: 3733 if (!was_locked) { 3734 if (!write_locked) 3735 lck_rw_unlock_shared(&ni->lock); 3736 else 3737 lck_rw_unlock_exclusive(&ni->lock); 3738 /* 3739 * If the write was successful and synchronous i/o was 3740 * requested, sync all changes to the backing store. We 3741 * dropped the inode lock already to be able to call 3742 * ntfs_inode_sync() thus if it fails we cannot do anything 3743 * about it so we just return the error even though the 3744 * operation has otherwise been performed. 3745 * 3746 * Note we cannot do this if the inode was already locked or 3747 * the call to ntfs_inode_sync() would cause a deadlock. 3748 */ 3749 if (!err && ioflags & IO_SYNC) { 3750 /* Mask out undersired @ioflags. */ 3751 ioflags &= ~(IO_UNIT | IO_APPEND | IO_DEFWRITE); 3752 err = ntfs_inode_sync(ni, ioflags, FALSE); 3753 } 3754 } 3755 return err; 3756} 3757 3758/** 3759 * ntfs_vnop_write - write a number of bytes from a memory buffer into a file 3760 * @a: arguments to write function 3761 * 3762 * @a contains: 3763 * vnode_t a_vp; vnode of file to write to 3764 * uio_t a_uio; source containing the data to write 3765 * int a_ioflag; flags further describing the write request 3766 * vfs_context_t a_context; 3767 * 3768 * Write uio_resid(@a->a_uio) bytes from the source buffer specified by 3769 * @a->a_uio to the vnode @a-a_vp, starting at byte offset 3770 * uio_offset(@a->a_uio) into the vnode. 3771 * 3772 * The flags in @a->a_ioflag further describe the write request. The following 3773 * ioflags are currently defined in OS X kernel (not all of them are applicable 3774 * to VNOP_WRITE() however): 3775 * IO_UNIT - Do i/o as atomic unit. 3776 * IO_APPEND - Append write to end. 3777 * IO_SYNC - Do i/o synchronously. 3778 * IO_NODELOCKED - Underlying node already locked. 3779 * IO_NDELAY - FNDELAY flag set in file table. 3780 * IO_NOZEROFILL - F_SETSIZE fcntl uses this to prevent zero filling. 3781 * IO_TAILZEROFILL - Zero fills at the tail of write. 3782 * IO_HEADZEROFILL - Zero fills at the head of write. 3783 * IO_NOZEROVALID - Do not zero fill if valid page. 3784 * IO_NOZERODIRTY - Do not zero fill if page is dirty. 3785 * IO_CLOSE - The i/o was issued from close path. 3786 * IO_NOCACHE - Same effect as VNOCACHE_DATA, but only for this i/o. 3787 * IO_RAOFF - Same effect as VRAOFF, but only for this i/o. 3788 * IO_DEFWRITE - Defer write if vfs.defwrite is set. 3789 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 3790 * 3791 * For compressed and encrypted attributes we abort for now as we do not 3792 * support them yet. 3793 * 3794 * For non-resident attributes we use cluster_write_ext() which deals with 3795 * normal attributes. 3796 * 3797 * Return 0 on success and errno on error. 3798 */ 3799static int ntfs_vnop_write(struct vnop_write_args *a) 3800{ 3801 vnode_t vn = a->a_vp; 3802 ntfs_inode *ni = NTFS_I(vn); 3803 3804 if (!ni) { 3805 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 3806 return EINVAL; 3807 } 3808 /* 3809 * We can only write to regular files and named streams. 3810 * 3811 * Also, do not allow writing to system files and mst protected 3812 * attributes. 3813 */ 3814 if (vnode_issystem(vn) || NInoMstProtected(ni) || 3815 (!S_ISREG(ni->mode) && !(NInoAttr(ni) && 3816 ni->type == AT_DATA))) { 3817 if (S_ISDIR(ni->mode)) 3818 return EISDIR; 3819 return EPERM; 3820 } 3821 return (int)ntfs_write(ni, a->a_uio, a->a_ioflag, FALSE); 3822} 3823 3824/** 3825 * ntfs_vnop_ioctl - 3826 * 3827 */ 3828static int ntfs_vnop_ioctl(struct vnop_ioctl_args *a) 3829{ 3830 errno_t err; 3831 3832 ntfs_debug("Entering."); 3833 // TODO: 3834 err = ENOTSUP; 3835 ntfs_debug("Done (error %d).", (int)err); 3836 return err; 3837} 3838 3839/** 3840 * ntfs_vnop_select - 3841 * 3842 */ 3843static int ntfs_vnop_select(struct vnop_select_args *a) 3844{ 3845 errno_t err; 3846 3847 ntfs_debug("Entering."); 3848 // TODO: 3849 err = ENOTSUP; 3850 ntfs_debug("Done (error %d).", (int)err); 3851 return err; 3852} 3853 3854/** 3855 * ntfs_vnop_exchange - 3856 * 3857 */ 3858static int ntfs_vnop_exchange(struct vnop_exchange_args *a) 3859{ 3860 errno_t err; 3861 3862 ntfs_debug("Entering."); 3863 // TODO: 3864 err = ENOTSUP; 3865 ntfs_debug("Done (error %d).", (int)err); 3866 return err; 3867} 3868 3869/** 3870 * ntfs_vnop_mmap - map a file (vnode) into memory 3871 * @a: arguments to mmap function 3872 * 3873 * @a contains: 3874 * vnode_t a_vp; file vnode which to map into memory 3875 * int a_fflags; mapping flags for the vnode 3876 * vfs_context_t a_context; 3877 * 3878 * Map the file vnode @a->a_vp into memory applying the mapping flags 3879 * @a->a_fflags which are a combination of one or more of PROT_READ, 3880 * PROT_WRITE, and PROT_EXEC. 3881 * 3882 * VNOP_MMAP() and hence ntfs_vnop_mmap() gets called from ubc_map() which in 3883 * turn gets called from the mmap() system call when a file is being mapped 3884 * into memory. 3885 * 3886 * The mmap() system call does the necessary permission checking and in fact 3887 * ignores the return value from ubc_map() and relies on things not working 3888 * later on for error handling. 3889 * 3890 * ubc_map() on the other hand does look at the return value of VNOP_MMAP() but 3891 * it only cares for one error code and that is EPERM. All other errors are 3892 * ignored and not passed to its caller. Thus for any return value not equal 3893 * to EPERM, ubc_map() takes an extra reference on the vnode and sets the flags 3894 * UI_ISMAPPED and UI_WASMAPPED in the ubc info of the vnode and for EPERM it 3895 * does not do anything and just returns EPERM to the caller. 3896 * 3897 * In effect neither class of return value (EPERM or not EPERM) actually has 3898 * any effect at all so we do not bother doing any checking here and defer all 3899 * checks to VNOP_PAGEIN() and hence ntfs_vnop_pagein(). 3900 * 3901 * FIXME: This is a huge problem because it means that anyone can use mmap() on 3902 * a system file and then write rubbish into the mapped memory and then trash 3903 * the metadata in the mapped memory by calling msync() to write the rubbish 3904 * out into the system file on disk! This will need to be fixed in the kernel 3905 * I think, i.e. the mmap() system call must fail if VNOP_MMAP() fails. This 3906 * is because we have no way to tell who is causing a page{in,out} at 3907 * ntfs_vnop_page{in,out}() time and for what reason so we have to always 3908 * permit page{in,out} to be called. 3909 * 3910 * Return 0 on success and EPERM on error. 3911 */ 3912static int ntfs_vnop_mmap(struct vnop_mmap_args *a) 3913{ 3914#ifdef DEBUG 3915 ntfs_inode *ni = NTFS_I(a->a_vp); 3916 3917 if (ni) 3918 ntfs_debug("Mapping mft_no 0x%llx, type 0x%x, name_len 0x%x, " 3919 "mapping flags 0x%x.", 3920 (unsigned long long)ni->mft_no, 3921 le32_to_cpu(ni->type), (unsigned)ni->name_len, 3922 a->a_fflags); 3923#endif 3924 /* Nothing to do. */ 3925 return 0; 3926} 3927 3928/** 3929 * ntfs_vnop_mnomap - unmap a file (vnode) from memory 3930 * @a: arguments to mnomap function 3931 * 3932 * @a contains: 3933 * vnode_t a_vp; file vnode which to unmap from memory 3934 * vfs_context_t a_context; 3935 * 3936 * Remove the memory mapping of the file vnode @a->a_vp that was previously 3937 * established via ntfs_vnop_mmap(). 3938 * 3939 * VNOP_MNOMAP() and hence ntfs_vnop_mnomap() gets called from ubc_unmap() when 3940 * a file is being unmapped from memory via the munmap() system call. 3941 * 3942 * ubc_unmap() only calls VNOP_MNOMAP() if the previous VNOP_MMAP() call did 3943 * not return EPERM. 3944 * 3945 * ubc_unmap() completely ignores the return value from VNOP_MNOMAP(). 3946 * 3947 * Always return 0 as the return value is always ignored. 3948 */ 3949static int ntfs_vnop_mnomap(struct vnop_mnomap_args *a) 3950{ 3951#ifdef DEBUG 3952 ntfs_inode *ni = NTFS_I(a->a_vp); 3953 3954 if (ni) 3955 ntfs_debug("Unmapping mft_no 0x%llx, type 0x%x, name_len " 3956 "0x%x.", (unsigned long long)ni->mft_no, 3957 le32_to_cpu(ni->type), (unsigned)ni->name_len); 3958#endif 3959 /* Nothing to do. */ 3960 return 0; 3961} 3962 3963/** 3964 * ntfs_vnop_fsync - synchronize a vnode's in-core state with that on disk 3965 * @a: arguments to fsync function 3966 * 3967 * @a contains: 3968 * vnode_t a_vp; vnode which to sync 3969 * int a_waitfor; if MNT_WAIT wait for i/o to complete 3970 * vfs_context_t a_context; 3971 * 3972 * Write all dirty cached data belonging/related to the vnode @a->a_vp to disk. 3973 * 3974 * If @a->a_waitfor is MNT_WAIT, wait for all i/o to complete before returning. 3975 * 3976 * Note: When called from reclaim, the vnode has a zero v_iocount and 3977 * v_usecount and vnode_isrecycled() is true. 3978 * 3979 * Return 0 on success and the error code on error. 3980 */ 3981static int ntfs_vnop_fsync(struct vnop_fsync_args *a) 3982{ 3983 vnode_t vn = a->a_vp; 3984 ntfs_inode *ni = NTFS_I(vn); 3985 int sync, err; 3986 3987 if (!ni) { 3988 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 3989 return 0; 3990 } 3991 /* If we are mounted read-only, we do not need to sync anything. */ 3992 if (NVolReadOnly(ni->vol)) 3993 return 0; 3994 sync = (a->a_waitfor == MNT_WAIT) ? IO_SYNC : 0; 3995 ntfs_debug("Entering for inode 0x%llx, waitfor 0x%x, %ssync i/o.", 3996 (unsigned long long)ni->mft_no, a->a_waitfor, 3997 (sync == IO_SYNC) ? "a" : ""); 3998 /* 3999 * We need to allow ENOENT errors since the unlink system call can call 4000 * VNOP_FSYNC() during vclean(). 4001 */ 4002 err = ntfs_inode_sync(ni, sync, FALSE); 4003 if (err == ENOENT) 4004 err = 0; 4005 ntfs_debug("Done (error %d).", err); 4006 return err; 4007} 4008 4009/** 4010 * ntfs_unlink_internal - unlink and ntfs inode from its parent directory 4011 * @dir_ni: directory ntfs inode from which to unlink the ntfs inode 4012 * @ni: base ntfs inode to unlink 4013 * @name: Unicode name of the inode to unlink 4014 * @name_len: length of the name in Unicode characters 4015 * @name_type: Namespace the name is in (i.e. FILENAME_{DOS,WIN32,POSIX,etc}) 4016 * @is_rename: if true ntfs_unlink_internal() is called for a rename operation 4017 * 4018 * Unlink an inode with the ntfs inode @ni and name @name with length @name_len 4019 * Unicode characters and of namespace @name_type from the directory with ntfs 4020 * inode @dir_ni. 4021 * 4022 * If @is_rename is true the caller was ntfs_vnop_rename() in which case the 4023 * link count of the inode to unlink @ni will be one higher than the link count 4024 * in the mft record. 4025 * 4026 * Return 0 on success and the error code on error. 4027 * 4028 * Note that if the name of the inode to be removed is in the WIN32 or DOS 4029 * namespaces, both the WIN32 and the corresponding DOS names are removed. 4030 * 4031 * Note that for a hard link this function simply removes the name and its 4032 * directory entry and decrements the hard link count whilst for the last name, 4033 * i.e. the last link to an inode, it only removes the directory entry, i.e. it 4034 * does not remove the name, however it does decrement the hard link count to 4035 * zero. This is so that the inode can be undeleted and its original name 4036 * restored. In any case, we do not actually delete the inode here as it may 4037 * still be open and UNIX semantics require an unlinked inode to be still 4038 * accessible through already opened file descriptors. When the last file 4039 * descriptor is closed, we causes the inode to be deleted when the VFS 4040 * notifies us of the last close by calling VNOP_INACTIVE(), i.e. 4041 * ntfs_vnop_inactive(). 4042 */ 4043static errno_t ntfs_unlink_internal(ntfs_inode *dir_ni, ntfs_inode *ni, 4044 ntfschar *name, signed name_len, FILENAME_TYPE_FLAGS name_type, 4045 const BOOL is_rename) 4046{ 4047 ntfs_volume *vol; 4048 ntfs_inode *objid_o_ni; 4049 ntfschar *ntfs_name; 4050 MFT_RECORD *m; 4051 ntfs_attr_search_ctx *actx; 4052 ATTR_RECORD *a; 4053 ntfs_index_context *ictx; 4054 FILENAME_ATTR *fn, *tfn; 4055 signed ntfs_name_len; 4056 unsigned fn_count, tfn_alloc; 4057 errno_t err; 4058 BOOL seen_dos; 4059 FILENAME_TYPE_FLAGS seek_type, fn_type; 4060 4061 vol = ni->vol; 4062 objid_o_ni = vol->objid_o_ni; 4063 ntfs_debug("Unlinking mft_no 0x%llx from directory mft_no 0x%llx, " 4064 "name type 0x%x.", (unsigned long long)ni->mft_no, 4065 (unsigned long long)dir_ni->mft_no, 4066 (unsigned)name_type); 4067 if (NInoAttr(ni)) 4068 panic("%s(): Target inode is an attribute inode.\n", 4069 __FUNCTION__); 4070 /* Start the unlink by evicting the target from the name cache. */ 4071 cache_purge(ni->vn); 4072 /* 4073 * We now need to look up the target name in the target mft record. 4074 * 4075 * If @name_type is FILENAME_POSIX then @name and @name_len contain the 4076 * correctly cased name and length in Unicode characters, respectively 4077 * so we simply set @ntfs_name and @ntfs_name_len to @name and 4078 * @name_len, respectively. 4079 * 4080 * If @name_type is anything else, i.e. FILENAME_WIN32, FILENAME_DOS, 4081 * or FILENAME_WIN32_AND_DOS we simply need to look for that type of 4082 * name in the target mft record as there can only be one filename 4083 * attribute of this type thus the name is uniquely identified by type 4084 * so the lookup can be optimized that way. 4085 */ 4086 seek_type = 0; 4087 if (name_type == FILENAME_POSIX) { 4088 ntfs_name = name; 4089 ntfs_name_len = name_len; 4090 } else { 4091 /* 4092 * Set @ntfs_name to NULL so we know to do the look up based on 4093 * the filename namespace @seek_type instead. 4094 */ 4095 ntfs_name = NULL; 4096 ntfs_name_len = 0; 4097 seek_type = name_type; 4098 /* 4099 * If the target name is the WIN32 name we first need to delete 4100 * the DOS name thus re-set @seek_type accordingly (see below 4101 * for details). 4102 */ 4103 if (seek_type == FILENAME_WIN32) 4104 seek_type = FILENAME_DOS; 4105 } 4106 /* 4107 * We know this is the base inode since we bailed out for attribute 4108 * inodes above. 4109 */ 4110 err = ntfs_mft_record_map(ni, &m); 4111 if (err) { 4112 ntfs_error(vol->mp, "Failed to map mft record 0x%llx (error " 4113 "%d).", (unsigned long long)ni->mft_no, err); 4114 goto err; 4115 } 4116 /* 4117 * Sanity check that the inode link count is in step with the mft 4118 * record link count. 4119 */ 4120 if ((!is_rename && ni->link_count != le16_to_cpu(m->link_count)) || 4121 (is_rename && ni->link_count != 4122 (unsigned)le16_to_cpu(m->link_count) + 1)) 4123 panic("%s(): ni->link_count != le16_to_cpu(m->link_count)\n", 4124 __FUNCTION__); 4125 actx = ntfs_attr_search_ctx_get(ni, m); 4126 if (!actx) { 4127 err = ENOMEM; 4128 goto unm_err; 4129 } 4130 /* 4131 * Find the name in the target mft record. 4132 * 4133 * If it is a name in the WIN32 or DOS namespace (but not both), we 4134 * remove the DOS name from both the directory index it is in and from 4135 * the mft record and we decrement the link count both in the base mft 4136 * record and in the ntfs inode. In the case of a WIN32 name, we find 4137 * the corresponding DOS name first and proceed as described. 4138 * 4139 * If the removal of the DOS name from the directory index is 4140 * successful, we change the namespace of the remaining WIN32 name to 4141 * the POSIX namespace, thus if we fail to remove the remaining name 4142 * after successfully removing the DOS name, we still have a consistent 4143 * file system. This also has the side effect of allowing undelete to 4144 * work properly as otherwise the undelete would restore a WIN32 name 4145 * without a corresponding DOS name which would result in an illegal 4146 * inode. 4147 * 4148 * We thus reduce the problem to a normal single name unlink and we can 4149 * now determine whether this unlink is just a hard link removal or the 4150 * final name removal, i.e. the inode is being deleted. 4151 */ 4152 seen_dos = FALSE; 4153restart_name: 4154 /* 4155 * Before looking for the last name and removing it from its directory 4156 * index entry, i.e. before unlinking the inode and targeting it for 4157 * deletion, we need to check if the inode has an object id and if so 4158 * we need to remove it from the object id index on the volume (present 4159 * in $O index of $Extend/$ObjId system file), so that the inode cannot 4160 * be found via its object id any more either. Also, when the deleted 4161 * inode gets reused for different purposes, we do not want the old 4162 * object id to still point at it. 4163 * 4164 * If the volume is pre-NTFS 3.0, i.e. it does not support object ids, 4165 * @vol->objid_o_ni will be NULL. It will also be NULL if the volume 4166 * is NTFS 3.0+ but no object ids are present on the volume, thus we 4167 * can make the check conditional on @objid_o_ni not being NULL. 4168 * 4169 * We do this before deleting the last directory entry so that we can 4170 * abort the unlink if we fail to remove the object id from the index 4171 * to ensure the volume does not become inconsistent. 4172 */ 4173 if (objid_o_ni && ni->link_count <= 1) { 4174 err = ntfs_attr_lookup(AT_OBJECT_ID, AT_UNNAMED, 0, 0, NULL, 0, 4175 actx); 4176 if (err) { 4177 if (err != ENOENT) { 4178 ntfs_error(vol->mp, "Failed to look up object " 4179 "id in mft_no 0x%llx (error " 4180 "%d).", 4181 (unsigned long long)ni->mft_no, 4182 err); 4183 goto put_err; 4184 } 4185 /* 4186 * The object id was not found which is fine. The 4187 * inode simply does not have an object id assigned to 4188 * it so there is nothing for us to do. 4189 */ 4190 ntfs_debug("Target mft_no 0x%llx does not have an " 4191 "object id assigned to it.", 4192 (unsigned long long)ni->mft_no); 4193 } else /* if (!err) */ { 4194 INDEX_ENTRY *ie; 4195 GUID object_id; 4196 4197 /* The inode has an object id assigned to it. */ 4198 ntfs_debug("Deleting object id from target mft_no " 4199 "0x%llx.", 4200 (unsigned long long)ni->mft_no); 4201 a = actx->a; 4202 /* 4203 * We need to make a copy of the object id and release 4204 * the mft record before looking up the object id in 4205 * the $ObjID/$O index otherwise we could deadlock if 4206 * the currently mapped mft record is in the same page 4207 * as one of the mft records of $ObjId. 4208 */ 4209 memcpy(&object_id, &((OBJECT_ID_ATTR*)((u8*)a + 4210 le16_to_cpu(a->value_offset)))-> 4211 object_id, sizeof(object_id)); 4212 ntfs_attr_search_ctx_put(actx); 4213 ntfs_mft_record_unmap(ni); 4214 err = vnode_get(objid_o_ni->vn); 4215 if (err) { 4216 ntfs_error(vol->mp, "Failed to get index " 4217 "vnode for $ObjId/$O."); 4218 goto err; 4219 } 4220 lck_rw_lock_exclusive(&objid_o_ni->lock); 4221 ictx = ntfs_index_ctx_get(objid_o_ni); 4222 if (!ictx) { 4223 ntfs_error(vol->mp, "Failed to get index " 4224 "context."); 4225 err = ENOMEM; 4226 goto iput_err; 4227 } 4228restart_ictx: 4229 /* Get the index entry matching the object id. */ 4230 err = ntfs_index_lookup(&object_id, sizeof(object_id), 4231 &ictx); 4232 if (err) { 4233 if (err == ENOENT) { 4234 ntfs_error(vol->mp, "Failed to delete " 4235 "object id of target " 4236 "inode 0x%llx from " 4237 "object id index " 4238 "because the object " 4239 "id was not found in " 4240 "the object id " 4241 "index. Volume is " 4242 "corrupt. Run " 4243 "chkdsk.", 4244 (unsigned long long) 4245 ni->mft_no); 4246 NVolSetErrors(vol); 4247 err = EIO; 4248 } else 4249 ntfs_error(vol->mp, "Failed to delete " 4250 "object id of target " 4251 "inode 0x%llx from " 4252 "object id index " 4253 "because looking up " 4254 "the object id in the " 4255 "object id index " 4256 "failed (error %d)." , 4257 (unsigned long long) 4258 ni->mft_no, err); 4259 goto iput_err; 4260 } 4261 ie = ictx->entry; 4262 /* We now have the index entry, delete it. */ 4263 err = ntfs_index_entry_delete(ictx); 4264 if (err) { 4265 if (err == -EAGAIN) { 4266 ntfs_debug("Restarting object id " 4267 "delete as tree was " 4268 "rearranged."); 4269 ntfs_index_ctx_reinit(ictx, objid_o_ni); 4270 goto restart_ictx; 4271 } 4272 ntfs_error(vol->mp, "Failed to delete object " 4273 "id of target inode 0x%llx " 4274 "from object id index (error " 4275 "%d).", 4276 (unsigned long long)ni->mft_no, 4277 err); 4278 goto iput_err; 4279 } 4280 ntfs_index_ctx_put(ictx); 4281 lck_rw_unlock_exclusive(&objid_o_ni->lock); 4282 (void)vnode_put(objid_o_ni->vn); 4283 /* 4284 * Now get back the mft record so we can re-look up the 4285 * object id attribute so we can delete it. 4286 * 4287 * This means we do not need to worry about 4288 * inconsistencies to do with the object id in our 4289 * error handling code paths later on. 4290 */ 4291 err = ntfs_mft_record_map(ni, &m); 4292 if (err) { 4293 ntfs_error(vol->mp, "Failed to re-map mft " 4294 "record 0x%llx (error %d). " 4295 "Leaving inconstent " 4296 "metadata. Run chkdsk.", 4297 (unsigned long long)ni->mft_no, 4298 err); 4299 NVolSetErrors(vol); 4300 goto err; 4301 } 4302 actx = ntfs_attr_search_ctx_get(ni, m); 4303 if (!actx) { 4304 ntfs_error(vol->mp, "Failed to re-get " 4305 "attribute search context for " 4306 "mft record 0x%llx (error " 4307 "%d). Leaving inconstent " 4308 "metadata. Run chkdsk.", 4309 (unsigned long long)ni->mft_no, 4310 err); 4311 NVolSetErrors(vol); 4312 err = ENOMEM; 4313 goto unm_err; 4314 } 4315 err = ntfs_attr_lookup(AT_OBJECT_ID, AT_UNNAMED, 0, 0, 4316 NULL, 0, actx); 4317 if (err) { 4318 ntfs_error(vol->mp, "Failed to re-look up " 4319 "object id in mft_no 0x%llx " 4320 "(error %d). Leaving " 4321 "inconsistent metadata. Run " 4322 "chkdsk.", 4323 (unsigned long long)ni->mft_no, 4324 err); 4325 NVolSetErrors(ni->vol); 4326 err = EIO; 4327 goto put_err; 4328 } 4329 /* 4330 * Remove the object id attribute from the mft record 4331 * and mark the mft record dirty. 4332 */ 4333 err = ntfs_attr_record_delete(ni, actx); 4334 if (err) { 4335 ntfs_error(vol->mp, "Failed to delete object " 4336 "id in mft_no 0x%llx (error " 4337 "%d). Leaving inconsistent " 4338 "metadata. Run chkdsk.", 4339 (unsigned long long)ni->mft_no, 4340 err); 4341 goto put_err; 4342 } 4343 } 4344 /* Reinit the search context for the AT_FILENAME lookup. */ 4345 ntfs_attr_search_ctx_reinit(actx); 4346 } 4347 /* Use label and goto instead of a loop to reduce indentation. */ 4348 fn_count = 0; 4349next_name: 4350 /* Increment the filename attribute counter. */ 4351 fn_count++; 4352 err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, NULL, 0, actx); 4353 if (err) { 4354 if (err == ENOENT) { 4355 /* 4356 * If the name we are looking for is not found there is 4357 * either some corruption or a bug given that a call to 4358 * ntfs_lookup_inode_by_name() just found the name in 4359 * the directory index. 4360 */ 4361 ntfs_error(vol->mp, "The target filename was not " 4362 "found in the mft record 0x%llx. " 4363 "This is not possible. This is " 4364 "either due to corruption or due to a " 4365 "driver bug. Run chkdsk.", 4366 (unsigned long long)ni->mft_no); 4367 NVolSetErrors(vol); 4368 err = EIO; 4369 } else 4370 ntfs_error(vol->mp, "Failed to look up target " 4371 "filename in the mft record 0x%llx " 4372 "(error %d).", 4373 (unsigned long long)ni->mft_no, err); 4374 goto put_err; 4375 } 4376 a = actx->a; 4377 fn = (FILENAME_ATTR*)((u8*)a + le16_to_cpu(a->value_offset)); 4378 fn_type = fn->filename_type; 4379 /* 4380 * If this is a specific DOS or WIN32 or combined name lookup, no need 4381 * to compare the actual name as there can only be one DOS and one 4382 * WIN32 name or only one combined name in an inode. 4383 */ 4384 if (seek_type && seek_type != FILENAME_POSIX) { 4385 /* 4386 * If this filename attribute does not match the target name 4387 * try the next one. 4388 */ 4389 if (seek_type != fn_type) 4390 goto next_name; 4391 /* We found the filename attribute matching the target name. */ 4392 if (fn_type == FILENAME_WIN32) { 4393 /* 4394 * We were looking for the WIN32 name so we can remove 4395 * it after having removed the DOS name. We now found 4396 * it, so switch it to the POSIX namespace as described 4397 * above and then go ahead and delete it. 4398 */ 4399 ntfs_debug("Switching namespace of filename attribute " 4400 "from WIN32 to POSIX."); 4401 fn_type = fn->filename_type = FILENAME_POSIX; 4402 NInoSetMrecNeedsDirtying(actx->ni); 4403 } 4404 goto found_name; 4405 } 4406 /* If this is the DOS name, note that we have seen it. */ 4407 if (fn_type == FILENAME_DOS) 4408 seen_dos = TRUE; 4409 /* If the names do not match, continue searching. */ 4410 if (fn->filename_length != ntfs_name_len) 4411 goto next_name; 4412 if (MREF_LE(fn->parent_directory) != dir_ni->mft_no) 4413 goto next_name; 4414 if (bcmp(fn->filename, ntfs_name, ntfs_name_len * sizeof(ntfschar))) 4415 goto next_name; 4416 /* Found the matching name. */ 4417 if (fn_type == FILENAME_WIN32) { 4418 /* 4419 * Pure WIN32 name. Repeat the lookup but for the DOS name 4420 * this time so we can remove that first. 4421 */ 4422 seek_type = FILENAME_DOS; 4423 /* 4424 * If @seen_dos is true, then restart the lookup from the 4425 * beginning and if not then continue the lookup where we left 4426 * off. 4427 */ 4428 if (seen_dos) { 4429 ntfs_attr_search_ctx_reinit(actx); 4430 fn_count = 0; 4431 } 4432 goto next_name; 4433 } 4434 if (fn_type == FILENAME_DOS) { 4435 /* 4436 * This cannot happen as ntfs_lookup_inode_by_name() always 4437 * returns @name for pure DOS names and hence we would have 4438 * @seek_type == FILENAME_DOS and thus would have picked this 4439 * filename attribute up above without ever doing a name based 4440 * match. 4441 */ 4442 ntfs_error(vol->mp, "Filename is in DOS namespace. This is " 4443 "not possible. This is either due to " 4444 "corruption or due to a driver bug. Run " 4445 "chkdsk."); 4446 NVolSetErrors(vol); 4447 err = EIO; 4448 goto put_err; 4449 } 4450found_name: 4451 /* 4452 * We found the target filename attribute and can now remove it from 4453 * the directory index. But before we can do that we need to make a 4454 * copy of the filename attribute value so we can release the mft 4455 * record before we delete the directory index entry. This is needed 4456 * because when we hold the target mft record and we call 4457 * ntfs_dir_entry_delete() this would cause the mft record for the 4458 * directory to be mapped which could result in a deadlock in the event 4459 * that both mft records are in the same page. 4460 */ 4461 tfn_alloc = le32_to_cpu(a->value_length); 4462 tfn = OSMalloc(tfn_alloc, ntfs_malloc_tag); 4463 if (!tfn) { 4464 /* 4465 * TODO: If @seek_type == FILENAME_WIN32 && 4466 * @fn->filename_type == FILENAME_POSIX we need to update the 4467 * directory entry filename_type to FILENAME_POSIX. See below 4468 * for how this is done for the error case in 4469 * ntfs_dir_entry_delete(). Given a memory allocation just 4470 * failed it is highly unlikely we would succeed in trying to 4471 * look up the directory entry so that we could change the 4472 * filename_type in it so at least for now just set the volume 4473 * has errors flag instead. 4474 */ 4475 ntfs_error(vol->mp, "Failed to allocate memory for temporary " 4476 "filename attribute. Leaving inconsistent " 4477 "metadata. Run chkdsk."); 4478 NVolSetErrors(vol); 4479 err = EIO; 4480 goto put_err; 4481 } 4482 memcpy(tfn, fn, tfn_alloc); 4483 ntfs_attr_search_ctx_put(actx); 4484 ntfs_mft_record_unmap(ni); 4485 /* 4486 * We copied the name and can now remove it from the directory index. 4487 * If the name is in the POSIX namespace, we may have converted it from 4488 * a pure WIN32 name after removing the corresponding DOS name, in 4489 * which case we need to update the index entry to reflect the 4490 * conversion should we fail to remove it from the directory index. 4491 * ntfs_dir_entry_delete() takes care of this for us. 4492 */ 4493 err = ntfs_dir_entry_delete(dir_ni, ni, tfn, tfn_alloc); 4494 if (err) { 4495 ntfs_error(vol->mp, "Failed to delete directory index entry " 4496 "(error %d).", err); 4497 goto err; 4498 } 4499 /* 4500 * Now get back the mft record. 4501 * 4502 * If getting back the mft record fails there is nothing we can do to 4503 * recover and must bail out completely leaving inconsistent metadata. 4504 * 4505 * TODO: We could try to add the dir entry back again in an attempt to 4506 * recover but as above we likely fail a memory allocation it is highly 4507 * unlikely we would succeed in trying to do the lookup and addition of 4508 * the directory entry. 4509 */ 4510 err = ntfs_mft_record_map(ni, &m); 4511 if (err) { 4512 ntfs_error(vol->mp, "Failed to re-map mft record 0x%llx " 4513 "(error %d). Leaving inconsistent metadata. " 4514 "Run chkdsk.", (unsigned long long)ni->mft_no, 4515 err); 4516 NVolSetErrors(vol); 4517 goto err; 4518 } 4519 actx = ntfs_attr_search_ctx_get(ni, m); 4520 if (!actx) { 4521 ntfs_error(vol->mp, "Failed to re-get attribute search " 4522 "context for mft record 0x%llx (error %d). " 4523 "Leaving inconsitent metadata. Run chkdsk.", 4524 (unsigned long long)ni->mft_no, err); 4525 NVolSetErrors(vol); 4526 err = EIO; 4527 goto unm_err; 4528 } 4529 /* 4530 * If the name is in the DOS namespace or this is not the last name we 4531 * also need to remove the name from the mft record it is in and 4532 * decrement the link count in the base mft record. 4533 */ 4534 if (fn_type == FILENAME_DOS || ni->link_count > 1) { 4535 /* Now need to re-lookup the target filename attribute. */ 4536 while (fn_count > 0) { 4537 fn_count--; 4538 err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, 4539 NULL, 0, actx); 4540 if (!err) 4541 continue; 4542 ntfs_error(vol->mp, "Failed to re-look up target " 4543 "filename in mft_no 0x%llx (error %d).", 4544 (unsigned long long)ni->mft_no, err); 4545 NVolSetErrors(vol); 4546 err = EIO; 4547 goto put_err; 4548 } 4549 a = actx->a; 4550 if (a->type != AT_FILENAME) 4551 panic("%s(): a->type (0x%x) != AT_FILENAME (0x30)\n", 4552 __FUNCTION__, le32_to_cpu(a->type)); 4553 fn = (FILENAME_ATTR*)((u8*)a + le16_to_cpu(a->value_offset)); 4554 if (fn_type != fn->filename_type) 4555 panic("%s(): fn_type != fn->filename_type\n", 4556 __FUNCTION__); 4557 /* Remove the filename from the mft record, too. */ 4558 err = ntfs_attr_record_delete(ni, actx); 4559 if (err) { 4560 ntfs_error(vol->mp, "Failed to delete filename " 4561 "attribute from mft_no 0x%llx (error " 4562 "%d).", (unsigned long long)ni->mft_no, 4563 err); 4564 NVolSetErrors(vol); 4565 err = EIO; 4566 goto put_err; 4567 } 4568 /* 4569 * Update the hard link count in the base mft record. Note we 4570 * subtract one from the inode link count if this is a rename 4571 * as the link count has been elevated by one by the caller. 4572 */ 4573 m->link_count = cpu_to_le16(ni->link_count - 1 - 4574 (is_rename ? 1 : 0)); 4575 } else /* if (fn_type != FILENAME_DOS && ni->link_count <= 1) */ { 4576 /* 4577 * This is the last name, so we need to mark the mft record as 4578 * unused in the mft record flags so no-one can open it by 4579 * accident and so that, in case of a crash between now and the 4580 * deletion of the inode, ntfsck will know that we meant to 4581 * delete the inode rather than that we were in the process of 4582 * allocating or renaming it so it will do the Right Thing(TM) 4583 * and complete the deletion process. 4584 */ 4585 m->flags &= ~MFT_RECORD_IN_USE; 4586 /* Ensure the base mft record gets written out. */ 4587 NInoSetMrecNeedsDirtying(ni); 4588 } 4589 /* 4590 * We have either deleted the filename completely or we only removed 4591 * the directory index entry if this is the last name. 4592 * 4593 * In either case, we need to update the hard link count and the ctime 4594 * in the ntfs inode (the ctime is the last_mft_change_time on NTFS). 4595 */ 4596 ni->link_count--; 4597 ni->last_mft_change_time = dir_ni->last_mft_change_time; 4598 NInoSetDirtyTimes(ni); 4599 /* 4600 * If this is the DOS name, we now need to find the WIN32 name, so it 4601 * can be deleted, too. Otherwise we are done. 4602 */ 4603 if (fn_type == FILENAME_DOS) { 4604 seek_type = FILENAME_WIN32; 4605 /* 4606 * We looked up the DOS name above thus we need to reinitialize 4607 * the search context for the WIN32 name lookup. 4608 */ 4609 ntfs_attr_search_ctx_reinit(actx); 4610 fn_count = 0; 4611 goto restart_name; 4612 } 4613 /* 4614 * If we removed a hard link but the inode is not deleted yet we need 4615 * to remove the parent vnode from the vnode as this association may no 4616 * longer exist. 4617 * 4618 * The same is true for the vnode name as we have just unlinked it. 4619 * 4620 * Note we skip this for the rename case because the subsequent call to 4621 * ntfs_link_internal() is going to update the vnode identity with the 4622 * new name and parent so no need to do wipe them here. 4623 */ 4624 if (ni->link_count > 0 && !is_rename) 4625 vnode_update_identity(ni->vn, NULL, NULL, 0, 0, 4626 VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); 4627 ntfs_debug("Done."); 4628put_err: 4629 ntfs_attr_search_ctx_put(actx); 4630unm_err: 4631 ntfs_mft_record_unmap(ni); 4632err: 4633 return err; 4634iput_err: 4635 if (ictx) 4636 ntfs_index_ctx_put(ictx); 4637 lck_rw_unlock_exclusive(&objid_o_ni->lock); 4638 (void)vnode_put(objid_o_ni->vn); 4639 return err; 4640} 4641 4642/** 4643 * ntfs_unlink - unlink and ntfs inode from its parent directory 4644 * @dir_ni: directory ntfs inode from which to unlink the ntfs inode 4645 * @ni: base ntfs inode to unlink 4646 * @cn: name of the inode to unlink 4647 * @flags: flags describing the unlink request 4648 * @is_rmdir: true if called from VNOP_RMDIR() and hence ntfs_vnop_rmdir() 4649 * 4650 * Unlink an inode with the ntfs inode @ni and name as specified in @cn from 4651 * the directory with ntfs inode @dir_ni. 4652 * 4653 * The flags in @flags further describe the unlink request. The following 4654 * flags are currently defined in OS X kernel: 4655 * VNODE_REMOVE_NODELETEBUSY - Do not delete busy files, i.e. use 4656 * Carbon delete semantics). 4657 * 4658 * If @is_rmdir is true the caller is VNOP_RMDIR() and hence ntfs_vnop_rmdir() 4659 * and if @is_rmdir is false the caller is VNOP_REMOVE() and hence 4660 * ntfs_vnop_remove(). Note @flags is always zero if @is_rmdir is true. 4661 * 4662 * Return 0 on success and the error code on error. 4663 * 4664 * Note that if the name of the inode to be removed is in the WIN32 or DOS 4665 * namespaces, both the WIN32 and the corresponding DOS names are removed. 4666 * 4667 * Note that for a hard link this function simply removes the name and its 4668 * directory entry and decrements the hard link count whilst for the last name, 4669 * i.e. the last link to an inode, it only removes the directory entry, i.e. it 4670 * does not remove the name, however it does decrement the hard link count to 4671 * zero. This is so that the inode can be undeleted and its original name 4672 * restored. In any case, we do not actually delete the inode here as it may 4673 * still be open and UNIX semantics require an unlinked inode to be still 4674 * accessible through already opened file descriptors. When the last file 4675 * descriptor is closed, we causes the inode to be deleted when the VFS 4676 * notifies us of the last close by calling VNOP_INACTIVE(), i.e. 4677 * ntfs_vnop_inactive(). 4678 */ 4679static errno_t ntfs_unlink(ntfs_inode *dir_ni, ntfs_inode *ni, 4680 struct componentname *cn, const int flags, const BOOL is_rmdir) 4681{ 4682 MFT_REF mref; 4683 ntfs_volume *vol; 4684 ntfs_inode *objid_o_ni; 4685 ntfschar *ntfs_name; 4686 ntfs_dir_lookup_name *name = NULL; 4687 size_t ntfs_name_size; 4688 signed ntfs_name_len; 4689 errno_t err; 4690 FILENAME_TYPE_FLAGS ntfs_name_type; 4691 ntfschar ntfs_name_buf[NTFS_MAX_NAME_LEN]; 4692 4693 vol = ni->vol; 4694 objid_o_ni = vol->objid_o_ni; 4695 ntfs_debug("Unlinking %s%.*s with mft_no 0x%llx from directory " 4696 "mft_no 0x%llx, flags 0x%x.", 4697 is_rmdir ? "directory " : "", (int)cn->cn_namelen, 4698 cn->cn_nameptr, (unsigned long long)ni->mft_no, 4699 (unsigned long long)dir_ni->mft_no, flags); 4700 /* 4701 * Do not allow attribute inodes or raw inodes to be deleted. Note 4702 * raw inodes are always attribute inodes, too. 4703 */ 4704 if (NInoAttr(ni)) { 4705 ntfs_debug("Target %.*s, mft_no 0x%llx is a%s inode, " 4706 "returning EPERM.", (int)cn->cn_namelen, 4707 cn->cn_nameptr, (unsigned long long)ni->mft_no, 4708 NInoAttr(ni) ? "n attribute" : " raw"); 4709 return EPERM; 4710 } 4711 /* The parent inode must be a directory. */ 4712 if (!S_ISDIR(dir_ni->mode)) { 4713 ntfs_debug("Parent mft_no 0x%llx is not a directory, " 4714 "returning ENOTDIR.", 4715 (unsigned long long)dir_ni->mft_no); 4716 return ENOTDIR; 4717 } 4718 /* Check for "." removal. */ 4719 if (ni == dir_ni) { 4720 ntfs_debug("Target %.*s, mft_no 0x%llx is the same as its " 4721 "parent directory, returning EINVAL.", 4722 (int)cn->cn_namelen, cn->cn_nameptr, 4723 (unsigned long long)ni->mft_no); 4724 return EINVAL; 4725 } 4726 /* Lock both the parent directory and the target inode for writing. */ 4727 lck_rw_lock_exclusive(&dir_ni->lock); 4728 lck_rw_lock_exclusive(&ni->lock); 4729 /* Ensure the parent directory has not been deleted. */ 4730 if (!dir_ni->link_count) { 4731 ntfs_debug("Parent directory mft_no 0x%llx has been deleted, " 4732 "returning ENOENT.", 4733 (unsigned long long)dir_ni->mft_no); 4734 /* 4735 * If the directory is somehow still in the name cache remove 4736 * it now. 4737 */ 4738 cache_purge(dir_ni->vn); 4739 err = ENOENT; 4740 goto err; 4741 } 4742 /* Ensure tha target has not been deleted by someone else already. */ 4743 if (!ni->link_count) { 4744 ntfs_debug("Target %.*s, mft_no 0x%llx has been deleted, " 4745 "returning ENOENT.", (int)cn->cn_namelen, 4746 cn->cn_nameptr, (unsigned long long)ni->mft_no); 4747 /* 4748 * If the target is somehow still in the name cache remove it 4749 * now. 4750 */ 4751 cache_purge(ni->vn); 4752 err = ENOENT; 4753 goto err; 4754 } 4755 /* 4756 * If this is a directory removal, i.e. rmdir, need to check that the 4757 * directory is empty. 4758 * 4759 * Note we already checked for "." removal and we do not need to check 4760 * for ".." removal because that would fail the directory is empty 4761 * check as the parent directory would at least have one entry and that 4762 * is the current directory. 4763 */ 4764 if (is_rmdir) { 4765 err = ntfs_dir_is_empty(ni); 4766 if (err) { 4767 if (err == ENOTEMPTY) 4768 ntfs_debug("Target directory %.*s, mft_no " 4769 "0x%llx is not empty, " 4770 "returning ENOTEMPTY.", 4771 (int)cn->cn_namelen, 4772 cn->cn_nameptr, 4773 (unsigned long long)ni->mft_no); 4774 else 4775 ntfs_error(vol->mp, "Failed to determine if " 4776 "target directory %.*s, " 4777 "mft_no 0x%llx is empty " 4778 "(error %d).", 4779 (int)cn->cn_namelen, 4780 cn->cn_nameptr, 4781 (unsigned long long)ni->mft_no, 4782 err); 4783 goto err; 4784 } 4785 } else { 4786 /* Do not allow directories to be unlinked. */ 4787 if (S_ISDIR(ni->mode)) { 4788 ntfs_debug("Target %.*s, mft_no 0x%llx is a " 4789 "directory, returning EPERM.", 4790 (int)cn->cn_namelen, cn->cn_nameptr, 4791 (unsigned long long)ni->mft_no); 4792 err = EPERM; 4793 goto err; 4794 } 4795 } 4796 /* 4797 * Do not allow any of the system files to be deleted. 4798 * 4799 * For NTFS 3.0+ volumes do not allow any of the extended system files 4800 * to be deleted, either. 4801 * 4802 * Note we specifically blacklist all system files that we make use of 4803 * except for the transaction log $UsnJrnl as that is allowed to be 4804 * deleted and its deletion means that transaction logging is disabled. 4805 * 4806 * Note that if the transaction log is present it will be held busy by 4807 * the NTFS driver thus unlinking the $UsnJrnl will not actually delete 4808 * it until the driver is unmounted. FIXME: Should we leave it like 4809 * this or should we detach the $UsnJrnl vnodes from the volume and 4810 * release them so they can be deleted immediately? 4811 * 4812 * TODO: What about all the new metadata files introduced with Windows 4813 * Vista? We are currently ignoring them and allowing them to be 4814 * deleted... 4815 */ 4816 if (ni->file_attributes & FILE_ATTR_SYSTEM) { 4817 BOOL is_system = FALSE; 4818 if (vol->major_ver <= 1) { 4819 if (ni->mft_no < FILE_Extend) 4820 is_system = TRUE; 4821 } else { 4822 if (ni->mft_no <= FILE_Extend) 4823 is_system = TRUE; 4824 if (dir_ni == vol->extend_ni) { 4825 if (ni == vol->objid_ni || 4826 ni == vol->quota_ni) 4827 is_system = TRUE; 4828 } 4829 } 4830 if (is_system) { 4831 ntfs_debug("Target %.*s, mft_no 0x%llx is a%s system " 4832 "file, returning EPERM.", 4833 (int)cn->cn_namelen, cn->cn_nameptr, 4834 (unsigned long long)ni->mft_no, 4835 (dir_ni == vol->extend_ni) ? 4836 "n extended" : ""); 4837 err = EPERM; 4838 goto err; 4839 } 4840 } 4841 /* 4842 * Ensure the file is not read-only (the read-only bit is ignored for 4843 * directories. 4844 */ 4845 if (!S_ISDIR(ni->mode) && ni->file_attributes & FILE_ATTR_READONLY) { 4846 ntfs_debug("Target %.*s, mft_no 0x%llx is marked read-only, " 4847 "returning EPERM.", (int)cn->cn_namelen, 4848 cn->cn_nameptr, 4849 (unsigned long long)ni->mft_no); 4850 err = EPERM; 4851 goto err; 4852 } 4853 /* 4854 * If the inode is a reparse point or if the inode is offline we cannot 4855 * remove a name from it yet. TODO: Implement this. 4856 */ 4857 if (ni->file_attributes & (FILE_ATTR_REPARSE_POINT | 4858 FILE_ATTR_OFFLINE)) { 4859 ntfs_error(vol->mp, "Target %.*s, mft_no 0x%llx is %s. " 4860 "Deleting names from such inodes is not " 4861 "supported yet, returning ENOTSUP.", 4862 (int)cn->cn_namelen, cn->cn_nameptr, 4863 (unsigned long long)ni->mft_no, 4864 ni->file_attributes & FILE_ATTR_REPARSE_POINT ? 4865 "a reparse point" : "offline"); 4866 err = ENOTSUP; 4867 goto err; 4868 } 4869 /* 4870 * If Carbon delete semantics are requested, do not allow busy files to 4871 * be unlinked. Note we do not use vnode_isinuse() as that accounts 4872 * for open named streams/extended attributes as well which we do not 4873 * care about. We only care for actually opened files thus we keep 4874 * track of them ourselves. 4875 */ 4876 if (flags & VNODE_REMOVE_NODELETEBUSY && ni->nr_opens) { 4877 ntfs_debug("Target %.*s, mft_no 0x%llx is busy (nr_opens " 4878 "0x%x) and Carbon delete semantics were " 4879 "requested, returning EBUSY.", 4880 (int)cn->cn_namelen, cn->cn_nameptr, 4881 (unsigned long long)ni->mft_no, 4882 (unsigned)ni->nr_opens); 4883 err = EBUSY; 4884 goto err; 4885 } 4886 /* 4887 * We need to make sure the target still has the name specified in @cn 4888 * that is being unlinked. It could have been unlinked or renamed 4889 * before we took the locks on the parent directory and the target. 4890 * 4891 * To do this, first convert the name of the target from utf8 to 4892 * Unicode then look up the converted name in the directory index. 4893 */ 4894 ntfs_name = ntfs_name_buf; 4895 ntfs_name_size = sizeof(ntfs_name_buf); 4896 ntfs_name_len = utf8_to_ntfs(vol, (u8*)cn->cn_nameptr, cn->cn_namelen, 4897 &ntfs_name, &ntfs_name_size); 4898 if (ntfs_name_len < 0) { 4899 err = -ntfs_name_len; 4900 if (err == ENAMETOOLONG) 4901 ntfs_debug("Failed (name is too long)."); 4902 else 4903 ntfs_error(vol->mp, "Failed to convert name to " 4904 "Unicode (error %d).", err); 4905 goto err; 4906 } 4907 err = ntfs_lookup_inode_by_name(dir_ni, ntfs_name, ntfs_name_len, 4908 &mref, &name); 4909 if (err) { 4910 if (err != ENOENT) { 4911 ntfs_error(vol->mp, "Failed to find name in directory " 4912 "(error %d).", err); 4913 goto err; 4914 } 4915enoent: 4916 /* 4917 * The name does not exist in the directory @dir_ni. 4918 * 4919 * This means someone renamed or deleted the name from the 4920 * directory before we managed to take the locks. 4921 */ 4922 ntfs_debug("Target %.*s, mft_no 0x%llx has been renamed or " 4923 "deleted already, returning ENOENT.", 4924 (int)cn->cn_namelen, cn->cn_nameptr, 4925 (unsigned long long)ni->mft_no); 4926 /* 4927 * If the target is somehow still in the name cache remove it 4928 * now. 4929 */ 4930 cache_purge(ni->vn); 4931 err = ENOENT; 4932 goto err; 4933 } 4934 /* 4935 * We found the target name in the directory index but does it still 4936 * point to the same mft record? The sequence number check ensures the 4937 * inode was not deleted and recreated with the same name and the same 4938 * mft record number. 4939 */ 4940 if (mref != MK_MREF(ni->mft_no, ni->seq_no)) 4941 goto enoent; 4942 /* 4943 * We are going to go ahead with unlinking the target. 4944 * 4945 * There are several different types of outcome from the above lookup 4946 * that need to be handled. 4947 * 4948 * If @name is NULL @ntfs_name contains the correctly cased name thus 4949 * we can simply look for that. In this case we set the name type to 0 4950 * as we do not know which namespace the name is in. 4951 * 4952 * If @name is not NULL the correctly cased name is in @name->name thus 4953 * we look for that. In this case we do know which namespace the name 4954 * is in as it is @name->type. 4955 */ 4956 ntfs_name_type = 0; 4957 if (name) { 4958 ntfs_name = name->name; 4959 ntfs_name_len = name->len; 4960 ntfs_name_type = name->type; 4961 } 4962 /* Now we can perform the actual unlink. */ 4963 err = ntfs_unlink_internal(dir_ni, ni, ntfs_name, ntfs_name_len, 4964 ntfs_name_type, FALSE); 4965 if (err) 4966 ntfs_error(vol->mp, "Failed to unlink %.*s with mft_no 0x%llx " 4967 "from directory mft_no 0x%llx (error %d).", 4968 (int)cn->cn_namelen, cn->cn_nameptr, 4969 (unsigned long long)ni->mft_no, 4970 (unsigned long long)dir_ni->mft_no, err); 4971 else 4972 ntfs_debug("Done."); 4973err: 4974 if (name) 4975 OSFree(name, sizeof(*name), ntfs_malloc_tag); 4976 lck_rw_unlock_exclusive(&ni->lock); 4977 lck_rw_unlock_exclusive(&dir_ni->lock); 4978 return err; 4979} 4980 4981/** 4982 * ntfs_vnop_remove - unlink a file 4983 * @a: arguments to remove function 4984 * 4985 * @a contains: 4986 * vnode_t a_dvp; directory from which to unlink the file 4987 * vnode_t a_vp; file to unlink 4988 * struct componentname *a_cnp; name of the file to unlink 4989 * int a_flags; flags describing the unlink request 4990 * vfs_context_t a_context; 4991 * 4992 * Unlink a file with vnode @a->a_vp and name as specified in @a->a_cnp form 4993 * the directory with vnode @a->a_dvp. 4994 * 4995 * The flags in @a->a_flags further describe the unlink request. The following 4996 * flags are currently defined in OS X kernel: 4997 * VNODE_REMOVE_NODELETEBUSY - Do not delete busy files, i.e. use 4998 * Carbon delete semantics). 4999 * 5000 * Return 0 on success and errno on error. 5001 * 5002 * Note that if the name of the inode to be removed is in the WIN32 or DOS 5003 * namespaces, both the WIN32 and the corresponding DOS names are removed. 5004 * 5005 * Note that for a hard link this function simply removes the name and its 5006 * directory entry and decrements the hard link count whilst for the last name, 5007 * i.e. the last link to an inode, it only removes the directory entry, i.e. it 5008 * does not remove the name, however it does decrement the hard link count to 5009 * zero. This is so that the inode can be undeleted and its original name 5010 * restored. In any case, we do not actually delete the inode here as it may 5011 * still be open and UNIX semantics require an unlinked inode to be still 5012 * accessible through already opened file descriptors. When the last file 5013 * descriptor is closed, we causes the inode to be deleted when the VFS 5014 * notifies us of the last close by calling VNOP_INACTIVE(), i.e. 5015 * ntfs_vnop_inactive(). 5016 */ 5017static int ntfs_vnop_remove(struct vnop_remove_args *a) 5018{ 5019 ntfs_inode *dir_ni = NTFS_I(a->a_dvp); 5020 ntfs_inode *ni = NTFS_I(a->a_vp); 5021 errno_t err; 5022 5023 if (!dir_ni || !ni) { 5024 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 5025 return EINVAL; 5026 } 5027 ntfs_debug("Entering."); 5028 err = ntfs_unlink(NTFS_I(a->a_dvp), NTFS_I(a->a_vp), a->a_cnp, 5029 a->a_flags, FALSE); 5030 ntfs_debug("Done (error %d).", (int)err); 5031 return err; 5032} 5033 5034/** 5035 * ntfs_link_internal - create a hard link to an inode 5036 * @ni: base ntfs inode to create hard link to 5037 * @dir_ni: directory ntfs inode in which to create the hard link 5038 * @cn: componentname specifying name of the hard link to create 5039 * @is_rename: if true ntfs_link_internal() is called for a rename 5040 * @name: Unicode name of the inode to unlink 5041 * @name_len: length of the name in Unicode characters 5042 * 5043 * Create a hard link to the ntfs inode @ni with name as specified in @cn in 5044 * the directory ntfs inode @dir_ni. 5045 * 5046 * If @is_rename is true the caller was ntfs_vnop_rename() in which case the 5047 * link count of the inode to link to will be one higher than the link count in 5048 * the mft record and @name and @name_len specify the Unicode name and length 5049 * in Unicode characters corresponding to @cn, respectively so we do not have 5050 * to convert @cn to Unicode in this case. 5051 * 5052 * If @is_rename is false then @name and @name_len are undefined. 5053 * 5054 * Return 0 on success and errno on error. 5055 * 5056 * Note we always create filenames in the POSIX namespace. 5057 */ 5058static errno_t ntfs_link_internal(ntfs_inode *ni, ntfs_inode *dir_ni, 5059 struct componentname *cn, const BOOL is_rename, 5060 const ntfschar *name, const signed name_len) 5061{ 5062 ntfs_volume *vol; 5063 FILENAME_ATTR *fn; 5064 ntfschar *ntfs_name; 5065 MFT_RECORD *m; 5066 ntfs_attr_search_ctx *ctx; 5067 size_t ntfs_name_size; 5068 signed ntfs_name_len; 5069 unsigned fn_alloc, fn_size; 5070 errno_t err, err2; 5071 BOOL is_dir; 5072 5073 vol = ni->vol; 5074 ntfs_debug("Creating a hard link to mft_no 0x%llx, named %.*s in " 5075 "directory mft_no 0x%llx.", 5076 (unsigned long long)ni->mft_no, (int)cn->cn_namelen, 5077 cn->cn_nameptr, (unsigned long long)dir_ni->mft_no); 5078 if (NInoAttr(ni)) 5079 panic("%s(): Inode to link to is an attribute/raw inode.\n", 5080 __FUNCTION__); 5081 is_dir = S_ISDIR(ni->mode); 5082 /* 5083 * Create a temporary filename attribute so we can find the correct 5084 * place to insert it into. We also need a temporary copy so we can 5085 * release the mft record before we add the directory entry. This is 5086 * needed because when we hold the mft record for the inode and we call 5087 * ntfs_dir_entry_add() this would cause the mft record for the 5088 * directory to be mapped which would result in a deadlock in the event 5089 * that both mft records are in the same page. 5090 */ 5091 fn_alloc = sizeof(FILENAME_ATTR) + NTFS_MAX_NAME_LEN * sizeof(ntfschar); 5092 fn = OSMalloc(fn_alloc, ntfs_malloc_tag); 5093 if (!fn) { 5094 ntfs_error(vol->mp, "Failed to allocate memory for temporary " 5095 "filename attribute."); 5096 err = ENOMEM; 5097 goto err; 5098 } 5099 bzero(fn, fn_alloc); 5100 /* Begin setting up the temporary filename attribute. */ 5101 fn->parent_directory = MK_LE_MREF(dir_ni->mft_no, dir_ni->seq_no); 5102 /* FILENAME_POSIX is zero and the attribute is already zeroed. */ 5103 /* fn->filename_type = FILENAME_POSIX; */ 5104 /* 5105 * If this is not a rename then convert the name from utf8 to Unicode. 5106 * If this is a rename on the other hand then we have the name in 5107 * Unicode already so just copy that over. 5108 */ 5109 ntfs_name = fn->filename; 5110 ntfs_name_size = NTFS_MAX_NAME_LEN * sizeof(ntfschar); 5111 if (!is_rename) { 5112 ntfs_name_len = utf8_to_ntfs(vol, (u8*)cn->cn_nameptr, 5113 cn->cn_namelen, &ntfs_name, &ntfs_name_size); 5114 if (ntfs_name_len < 0) { 5115 err = -ntfs_name_len; 5116 if (err == ENAMETOOLONG) 5117 ntfs_debug("Failed (name is too long)."); 5118 else 5119 ntfs_error(vol->mp, "Failed to convert name to " 5120 "Unicode (error %d).", err); 5121 goto err; 5122 } 5123 } else { 5124 memcpy(ntfs_name, name, name_len * sizeof(ntfschar)); 5125 ntfs_name_len = name_len; 5126 } 5127 /* Set the filename length in the temporary filename attribute. */ 5128 fn->filename_length = ntfs_name_len; 5129 fn_size = sizeof(FILENAME_ATTR) + ntfs_name_len * sizeof(ntfschar); 5130 /* 5131 * Copy the times from the standard information attribute which we have 5132 * cached in the ntfs inode. 5133 */ 5134 fn->creation_time = utc2ntfs(ni->creation_time); 5135 fn->last_data_change_time = utc2ntfs(ni->last_data_change_time); 5136 fn->last_mft_change_time = utc2ntfs(ni->last_mft_change_time); 5137 fn->last_access_time = utc2ntfs(ni->last_access_time); 5138 if (!is_dir) { 5139 lck_spin_lock(&ni->size_lock); 5140 fn->allocated_size = cpu_to_sle64(NInoNonResident(ni) && 5141 (NInoSparse(ni) || NInoCompressed(ni)) ? 5142 ni->compressed_size : ni->allocated_size); 5143 fn->data_size = cpu_to_sle64(ni->data_size); 5144 lck_spin_unlock(&ni->size_lock); 5145 } else { 5146 /* 5147 * Directories use 0 for the sizes in the filename attribute 5148 * and the attribute is already zeroed. 5149 */ 5150 /* fn->data_size = fn->allocated_size = 0; */ 5151 } 5152 /* 5153 * If this is not a directory or it is an encrypted directory, set the 5154 * needs archiving bit except for the core system files. 5155 */ 5156 fn->file_attributes = ni->file_attributes; 5157 if (!is_dir || NInoEncrypted(ni)) { 5158 BOOL need_set_archive_bit = TRUE; 5159 if (vol->major_ver >= 2) { 5160 if (ni->mft_no <= FILE_Extend) 5161 need_set_archive_bit = FALSE; 5162 } else { 5163 if (ni->mft_no <= FILE_UpCase) 5164 need_set_archive_bit = FALSE; 5165 } 5166 if (need_set_archive_bit) { 5167 ni->file_attributes |= FILE_ATTR_ARCHIVE; 5168 fn->file_attributes = ni->file_attributes; 5169 NInoSetDirtyFileAttributes(ni); 5170 } 5171 } 5172 /* 5173 * Directories need the FILE_ATTR_DUP_FILENAME_INDEX_PRESENT flag set 5174 * in their filename attributes both in their mft records and in the 5175 * index entries pointing to them but not in the standard information 5176 * attribute which is why it is not set in @ni->file_attributes. 5177 */ 5178 if (is_dir) 5179 fn->file_attributes |= FILE_ATTR_DUP_FILENAME_INDEX_PRESENT; 5180 /* 5181 * TODO: We need to find out whether it is true that ea_length takes 5182 * precedence over reparse_tag, i.e. we need to check that if both EAs 5183 * are present and this is a reparse point, we need to set the 5184 * ea_length rather than the reparse_tag. So far I have not been able 5185 * to create EAs on a reparse point and vice versa so perhaps the two 5186 * are mutually exclusive in which case we are fine... 5187 * 5188 * The attribute is already zeroed so no need to set anything to zero. 5189 */ 5190#if 0 5191 if (ni->ea_length) { 5192 fn->ea_length = cpu_to_le16(ni->ea_length); 5193 /* fn->reserved = 0; */ 5194 } else if (ni->file_attributes & FILE_ATTR_REPARSE_POINT) { 5195 // TODO: Instead of zero use actual value if/when we enable 5196 // creating hard links to reparse points... 5197 /* fn->reparse_tag = 0; */ 5198 } else { 5199 /* 5200 * We need to initialize the unused field to zero but as we 5201 * have already zeroed the attribute we do not need to do 5202 * anything now. 5203 */ 5204 /* fn->reparse_tag = 0; */ 5205 } 5206#endif 5207 /* 5208 * Add the created filename attribute to the parent directory index. 5209 * 5210 * We know @ni is the base inode since we bailed out for attribute 5211 * inodes above so we can use it to generate the mft reference. 5212 */ 5213 err = ntfs_dir_entry_add(dir_ni, fn, fn_size, 5214 MK_LE_MREF(ni->mft_no, ni->seq_no)); 5215 if (err) 5216 goto err; 5217 /* 5218 * The ea_length and reparse_tag are only set in the directory index 5219 * entries and not in filename attributes in the mft record so zero 5220 * them here, before adding the filename attribute to the mft record. 5221 */ 5222 fn->reparse_tag = 0; 5223 /* 5224 * Add the created filename attribute to the mft record as well. 5225 * 5226 * Again, we know @ni is the base inode. 5227 */ 5228 err = ntfs_mft_record_map(ni, &m); 5229 if (err) { 5230 ntfs_error(vol->mp, "Failed to map mft record 0x%llx (error " 5231 "%d).", (unsigned long long)ni->mft_no, err); 5232 goto rm_err; 5233 } 5234 ctx = ntfs_attr_search_ctx_get(ni, m); 5235 if (!ctx) { 5236 err = ENOMEM; 5237 goto unm_err; 5238 } 5239 err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, fn, fn_size, ctx); 5240 if (err != ENOENT) { 5241 if (!err) { 5242 ntfs_debug("Failed (filename already present in " 5243 "inode."); 5244 err = EEXIST; 5245 } else 5246 ntfs_error(vol->mp, "Failed to add filename to mft_no " 5247 "0x%llx because looking up the " 5248 "filename in the mft record failed " 5249 "(error %d).", 5250 (unsigned long long)ni->mft_no, err); 5251 goto put_err; 5252 } 5253 /* 5254 * The current implementation of ntfs_attr_lookup() will always return 5255 * pointing into the base mft record when an attribute was not found. 5256 */ 5257 if (ni != ctx->ni) 5258 panic("%s(): ni != ctx->ni\n", __FUNCTION__); 5259 if (m != ctx->m) 5260 panic("%s(): m != ctx->m\n", __FUNCTION__); 5261 /* 5262 * @ctx->a now points to the location in the mft record at which we 5263 * need to insert the filename attribute, so insert it now. 5264 * 5265 * Note we ignore the case where @ctx->is_error is true because we do 5266 * not need the attribute any more for anything after it has been 5267 * inserted so we do not care that we failed to map its mft record. 5268 */ 5269 err = ntfs_resident_attr_record_insert(ni, ctx, AT_FILENAME, NULL, 0, 5270 fn, fn_size); 5271 if (err) { 5272 ntfs_error(vol->mp, "Failed to add filename to mft_no 0x%llx " 5273 "because inserting the filename attribute " 5274 "failed (error %d).", 5275 (unsigned long long)ni->mft_no, err); 5276 goto put_err; 5277 } 5278 /* 5279 * Update the hard link count in the mft record. Note we subtract one 5280 * from the inode link count if this is a rename as the link count has 5281 * been elevated by one by the caller. 5282 */ 5283 ni->link_count++; 5284 m->link_count = cpu_to_le16(ni->link_count - (is_rename ? 1 : 0)); 5285 /* 5286 * Update the ctime in the inode by copying it from the target 5287 * directory inode where it will have been updated by the above call to 5288 * ntfs_dir_entry_add(). 5289 */ 5290 ni->last_mft_change_time = dir_ni->last_mft_change_time; 5291 NInoSetDirtyTimes(ni); 5292 /* 5293 * Invalidate negative cache entries in the directory. We need to do 5294 * this because there may be negative cache entries which would match 5295 * the name of the just created inode but in a different case. Such 5296 * negative cache entries would now be incorrect thus we need to throw 5297 * away all negative cache entries to ensure there cannot be any 5298 * incorrectly negative entries in the name cache. 5299 */ 5300 cache_purge_negatives(dir_ni->vn); 5301 /* 5302 * We should add the new hard link to the name cache. Problem is that 5303 * this is likely not to be a useful thing to do as the original name 5304 * is likely in the name cache already and the OS X name cache only 5305 * allows one name per vnode and cache_enter() simply returns without 5306 * doing anything if a name is already present in the name cache for 5307 * the vnode. Thus we could use vnode_update_identity() instead to 5308 * switch the cached name from the original name to the new hard link. 5309 * 5310 * FIXME: The question is whether this is a useful thing to do. On the 5311 * one hand people creating a hard link are likely to want to then 5312 * access the inode via the new name but on the other hand hard links 5313 * are often used in applications for locking purposes and in this case 5314 * after the hard link is created the application is likely to unlink 5315 * the original name thus it would be beneficial if that remains in the 5316 * cache until this happens which will automatically remove the name 5317 * from the name cache and the next lookup of the new name will insert 5318 * the new one. Thus it is best if we do nothing at all now. If OS X 5319 * ever allows multiple name links per vnode we can uncomment the below 5320 * cache_enter() call. 5321 * 5322 * For the rename case we have just removed the original name, thus it 5323 * makes sense to add the new name now and whilst at it also update the 5324 * vnode identity with the new name and parent as the old ones are no 5325 * longer valid. 5326 */ 5327 if (is_rename) { 5328 vnode_update_identity(ni->vn, dir_ni->vn, cn->cn_nameptr, 5329 cn->cn_namelen, cn->cn_hash, 5330 VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); 5331 cache_enter(dir_ni->vn, ni->vn, cn); 5332 cn->cn_flags &= ~MAKEENTRY; 5333 } 5334 /* 5335 * Ensure the base mft record is written to disk. 5336 * 5337 * Note we do not set any of the NInoDirty*() flags because we have 5338 * just created the inode thus all the fields are in sync between the 5339 * ntfs_inode @ni and its mft record @m. 5340 * 5341 * Also note we defer the unmapping of the mft record to here so that 5342 * we do not get racing time updates, etc during concurrent runs of 5343 * link(2) and rename(2) where the source inode for the rename is the 5344 * inode that has a new hardlink created to it at the same time. This 5345 * case can happen because we do not lock the source inode in 5346 * ntfs_vnop_rename(). 5347 */ 5348 NInoSetMrecNeedsDirtying(ni); 5349 /* We are done with the mft record. */ 5350 ntfs_attr_search_ctx_put(ctx); 5351 ntfs_mft_record_unmap(ni); 5352 /* Free the temporary filename attribute. */ 5353 OSFree(fn, fn_alloc, ntfs_malloc_tag); 5354 ntfs_debug("Done."); 5355 return 0; 5356put_err: 5357 ntfs_attr_search_ctx_put(ctx); 5358unm_err: 5359 ntfs_mft_record_unmap(ni); 5360rm_err: 5361#if 0 5362 if (ni->ea_length) { 5363 fn->ea_length = cpu_to_le16(ni->ea_length); 5364 /* fn->reserved = 0; */ 5365 } else if (ni->file_attributes & FILE_ATTR_REPARSE_POINT) { 5366 // TODO: Instead of zero use actual value if/when we enable 5367 // creating hard links to reparse points... 5368 /* fn->reparse_tag = 0; */ 5369 } else { 5370 /* 5371 * We need to initialize the unused field to zero but as we 5372 * have already zeroed the attribute we do not need to do 5373 * anything now. 5374 */ 5375 /* fn->reparse_tag = 0; */ 5376 } 5377#endif 5378 err2 = ntfs_dir_entry_delete(dir_ni, ni, fn, fn_size); 5379 if (err2) { 5380 ntfs_error(vol->mp, "Failed to rollback index entry creation " 5381 "in error handling code path (error %d). " 5382 "Leaving inconsistent metadata. Run chkdsk.", 5383 err2); 5384 NVolSetErrors(vol); 5385 } 5386err: 5387 if (fn) 5388 OSFree(fn, fn_alloc, ntfs_malloc_tag); 5389 if (err != EEXIST) 5390 ntfs_error(vol->mp, "Failed (error %d).", err); 5391 else 5392 ntfs_debug("Failed (error EEXIST)."); 5393 return err; 5394} 5395 5396/** 5397 * ntfs_vnop_link - create a hard link to an inode 5398 * @a: arguments to link function 5399 * 5400 * @a contains: 5401 * vnode_t a_vp; vnode to create hard link to 5402 * vnode_t a_tdvp; destination directory for the hard link 5403 * struct componentname *a_cnp; name of the hard link to create 5404 * vfs_context_t a_context; 5405 * 5406 * Create a hard link to the inode specified by the vnode @a->a_vp with name as 5407 * specified in @a->a_cnp in the directory specified by the vnode @a->a_tdvp. 5408 * 5409 * Return 0 on success and errno on error. 5410 * 5411 * Note we always create filenames in the POSIX namespace. 5412 */ 5413static int ntfs_vnop_link(struct vnop_link_args *a) 5414{ 5415 ntfs_inode *ni, *dir_ni; 5416 ntfs_volume *vol; 5417 struct componentname *cn; 5418 errno_t err; 5419 5420 ni = NTFS_I(a->a_vp); 5421 vol = ni->vol; 5422 dir_ni = NTFS_I(a->a_tdvp); 5423 if (!dir_ni || !ni) { 5424 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 5425 return EINVAL; 5426 } 5427 cn = a->a_cnp; 5428 ntfs_debug("Creating a hard link to mft_no 0x%llx, named %.*s in " 5429 "directory mft_no 0x%llx.", 5430 (unsigned long long)ni->mft_no, (int)cn->cn_namelen, 5431 cn->cn_nameptr, (unsigned long long)dir_ni->mft_no); 5432 /* Do not allow attribute/raw inodes to be linked to. */ 5433 if (NInoAttr(ni)) { 5434 ntfs_debug("Mft_no 0x%llx is a%s inode, returning EPERM.", 5435 (unsigned long long)ni->mft_no, 5436 NInoRaw(ni) ? " raw" : "n attribute"); 5437 return EPERM; 5438 } 5439 /* The target inode must be a directory. */ 5440 if (!S_ISDIR(dir_ni->mode)) { 5441 ntfs_debug("Target mft_no 0x%llx is not a directory, " 5442 "returning ENOTDIR.", 5443 (unsigned long long)dir_ni->mft_no); 5444 return ENOTDIR; 5445 } 5446 /* Lock the target directory inode for writing. */ 5447 lck_rw_lock_exclusive(&dir_ni->lock); 5448 /* The inode being linked to must not be a directory. */ 5449 if (S_ISDIR(ni->mode)) { 5450 lck_rw_unlock_exclusive(&dir_ni->lock); 5451 ntfs_debug("Mft_no 0x%llx to link to is a directory, cannot " 5452 "create hard link %.*s to it, returning " 5453 "EPERM.", (unsigned long long)ni->mft_no, 5454 (int)cn->cn_namelen, cn->cn_nameptr); 5455 return EPERM; 5456 } 5457 /* Lock the inode to link to for writing. */ 5458 lck_rw_lock_exclusive(&ni->lock); 5459 /* Ensure the target directory has not been deleted. */ 5460 if (!dir_ni->link_count) { 5461 ntfs_debug("Target directory mft_no 0x%llx has been deleted, " 5462 "returning ENOENT.", 5463 (unsigned long long)dir_ni->mft_no); 5464 /* 5465 * If the directory is somehow still in the name cache remove 5466 * it now. 5467 */ 5468 cache_purge(dir_ni->vn); 5469 err = ENOENT; 5470 goto err; 5471 } 5472 /* 5473 * Ensure the inode has not been deleted. Note we really should be 5474 * checking that the source of the hard link has not been unlinked yet 5475 * but we do not know what the source name was as the caller does not 5476 * provide it to us and we do not know which name we were called for 5477 * from just looking at the source vnode/inode. 5478 */ 5479 if (!ni->link_count) { 5480 ntfs_debug("Inode %.*s, mft_no 0x%llx has been deleted, " 5481 "returning ENOENT.", (int)cn->cn_namelen, 5482 cn->cn_nameptr, (unsigned long long)ni->mft_no); 5483 /* 5484 * If the target is somehow still in the name cache remove it 5485 * now. 5486 */ 5487 cache_purge(ni->vn); 5488 err = ENOENT; 5489 goto err; 5490 } 5491 /* 5492 * The inode being linked to must not be a directory or device special 5493 * file. TODO: Extend the checks when we support device special files. 5494 */ 5495 if (S_ISDIR(ni->mode)) { 5496 ntfs_debug("Mft_no 0x%llx to link to is a directory, cannot " 5497 "create hard link %.*s to it, returning " 5498 "EPERM.", (unsigned long long)ni->mft_no, 5499 (int)cn->cn_namelen, cn->cn_nameptr); 5500 err = EPERM; 5501 goto err; 5502 } 5503 /* 5504 * Do not allow any of the system files to be linked to. 5505 * 5506 * For NTFS 3.0+ volumes do not allow any of the extended system files 5507 * to be linked to, either. 5508 * 5509 * Note we specifically blacklist all system files that we make use of. 5510 * 5511 * TODO: What about all the new metadata files introduced with Windows 5512 * Vista? We are currently ignoring them and allowing them to be 5513 * linked to... 5514 */ 5515 if (ni->file_attributes & FILE_ATTR_SYSTEM) { 5516 BOOL is_system = FALSE; 5517 if (vol->major_ver <= 1) { 5518 if (ni->mft_no < FILE_Extend) 5519 is_system = TRUE; 5520 } else { 5521 if (ni->mft_no <= FILE_Extend) 5522 is_system = TRUE; 5523 if (ni == vol->objid_ni || ni == vol->quota_ni || 5524 ni == vol->usnjrnl_ni) 5525 is_system = TRUE; 5526 } 5527 if (is_system) { 5528 ntfs_debug("Mft_no 0x%llx is a%s system file, " 5529 "returning EPERM.", 5530 (unsigned long long)ni->mft_no, 5531 (ni->mft_no > FILE_Extend) ? 5532 "n extended" : ""); 5533 err = EPERM; 5534 goto err; 5535 } 5536 } 5537 /* 5538 * Ensure the inode to link to is not read-only (we already checked 5539 * that @ni is not a directory). 5540 */ 5541 if (ni->file_attributes & FILE_ATTR_READONLY) { 5542 ntfs_debug("Mft_no 0x%llx is marked read-only, returning " 5543 "EPERM.", (unsigned long long)ni->mft_no); 5544 err = EPERM; 5545 goto err; 5546 } 5547 /* 5548 * TODO: Test if Windows is happy with a reparse point having a hard 5549 * link and if so remove this check and copy in the reparse point tag 5550 * into the filename attribute below. For mount point reparse points 5551 * the reparse point is a directory so the link attempt would already 5552 * have been aborted. 5553 * 5554 * TODO: Test if Windows is happy with an offline inode having a hard 5555 * link and if so remove this check. 5556 */ 5557 if (ni->file_attributes & (FILE_ATTR_REPARSE_POINT | 5558 FILE_ATTR_OFFLINE)) { 5559 ntfs_debug("Mft_no 0x%llx is %s. Creating hard links to such " 5560 "inodes is not allowed, returning EPERM.", 5561 (unsigned long long)ni->mft_no, 5562 (ni->file_attributes & 5563 FILE_ATTR_REPARSE_POINT) ? 5564 "a reparse point" : "offline"); 5565 err = EPERM; 5566 goto err; 5567 } 5568 /* Check if the maximum link count is already reached. */ 5569 if (ni->link_count >= NTFS_MAX_HARD_LINKS) { 5570 ntfs_debug("Cannot create hard link to mft_no 0x%llx because " 5571 "it already has too many hard links.", 5572 (unsigned long long)ni->mft_no); 5573 err = EMLINK; 5574 goto err; 5575 } 5576 /* Go ahead and create the hard link. */ 5577 err = ntfs_link_internal(ni, dir_ni, cn, FALSE, NULL, 0); 5578 if (err) { 5579 if (err != EEXIST) 5580 ntfs_error(vol->mp, "Failed to create hard link to " 5581 "mft_no 0x%llx, named %.*s, in " 5582 "directory mft_no 0x%llx (error %d).", 5583 (unsigned long long)ni->mft_no, 5584 (int)cn->cn_namelen, cn->cn_nameptr, 5585 (unsigned long long)dir_ni->mft_no, 5586 err); 5587 else 5588 ntfs_debug("Failed to create hard link to mft_no " 5589 "0x%llx, named %.*s, in directory " 5590 "mft_no 0x%llx (error EEXIST).", 5591 (unsigned long long)ni->mft_no, 5592 (int)cn->cn_namelen, cn->cn_nameptr, 5593 (unsigned long long)dir_ni->mft_no); 5594 } else 5595 ntfs_debug("Done."); 5596err: 5597 /* We are done, unlock the inode and the target directory. */ 5598 lck_rw_unlock_exclusive(&ni->lock); 5599 lck_rw_unlock_exclusive(&dir_ni->lock); 5600 return err; 5601} 5602 5603/** 5604 * ntfs_vnop_rename - rename an inode (file/directory/symbolic link/etc) 5605 * @a: arguments to rename function 5606 * 5607 * @a contains: 5608 * vnode_t a_fdvp; directory containing source inode 5609 * vnode_t a_fvp; source inode to be renamed 5610 * struct componentname *a_fcnp; name of the inode to rename 5611 * vnode_t a_tdvp; target directory to move the source to 5612 * vnode_t a_tvp; target inode to be deleted 5613 * struct componentname *a_tcnp; name of the inode to delete 5614 * vfs_context_t a_context; 5615 * 5616 * Rename the inode @a_fvp with name as specified in @a->a_fcnp located in the 5617 * directory @a->a_fdvp to the new name specified in a->a_tcnp placing it in 5618 * the target directory @a->a_tdvp. 5619 * 5620 * If @a->a_tvp is not NULL it means that the rename target already exists 5621 * which means we have to delete the rename target before we can perform the 5622 * rename. In this case @a->a_tvp is the existing target inode and its name is 5623 * the rename target name specified in @a->a_tcnp and it is located in the 5624 * target directory @a->a_tdvp. 5625 * 5626 * Return 0 on success and errno on error. 5627 * 5628 * Note we always create the target name @a->a_tcnp in the POSIX namespace. 5629 * 5630 * Rename is a complicated operation because there are several special cases 5631 * that need consideration: 5632 * 5633 * First of all unchecked renaming can create directory loops which are not 5634 * attached to the file system root, e.g. take the directory tree /a/b/c and 5635 * perform a rename of /a/b to /a/b/c/ which if allowed to proceed would create 5636 * /a and b/c/b where the latter is a loop in that b points back to c which 5637 * points back to b. Also this loop no longer is attached to the file system 5638 * directory tree and there is no way to access it any more as there is no link 5639 * from /a to b or c any more. Thus we have to check for this case and return 5640 * EINVAL error instead of doing the rename. Also a concurrent rename could 5641 * reshape the tree after our check so that our case would result in a loop 5642 * after all thus all tree reshaping renames must be done under a rename lock. 5643 * Note the VFS already holds the mnt_renamelock mutex for some renames but it 5644 * does not hold it in all cases we need it to be held so we still need our own 5645 * NTFS rename lock. 5646 * 5647 * Further VNOP_RENAME() must observe the following rules: 5648 * 5649 * - Source and destination must either both be directories, or both not be 5650 * directories. If this is not the case return ENOTDIR if the target is not 5651 * a directory and EISDIR if the target is a directory. 5652 * 5653 * - If the target is a directory, it must be empty. Return ENOTEMPTY if not. 5654 * 5655 * - It is not allowed to rename "/", ".", or "..". Return EINVAL if this is 5656 * attempted. 5657 * 5658 * - If the source inode and the target inode are the same and the mount is 5659 * case sensitive or the parent directories are also the same and the names 5660 * are the same do not do anything at all and return success, i.e. 0. Note 5661 * this is a violation of POSIX but it is needed to allow renaming of files 5662 * from one case to another, i.e. when a mount is not case sensitive but case 5663 * preserving (this is the default for NTFS) and the source and target inodes 5664 * and their parent directories match but the names do not match we want to 5665 * perform the rename rather than just return success. If we still find that 5666 * the target exists as a hard link rather than this being a case changing 5667 * rename we still need to abort and return success to comply with POSIX. 5668 * 5669 * FIXME: There is a bug in the VFS in that it never calls VNOP_RENAME() at 5670 * all when it is called with source and target strings being the same. This 5671 * is wrong when the string matches the name but does not have the same case, 5672 * i.e. the rename would normally succeed switching the case to the new case. 5673 * The VFS is currently forbidding this to happen. <rdar://problem/5485782> 5674 */ 5675static int ntfs_vnop_rename(struct vnop_rename_args *a) 5676{ 5677 MFT_REF src_mref, dst_mref; 5678 ntfs_inode *src_dir_ni, *src_ni, *dst_dir_ni, *dst_ni; 5679 struct componentname *src_cn, *dst_cn; 5680 ntfs_volume *vol; 5681 ntfschar *ntfs_name_buf, *orig_ntfs_name, *dst_ntfs_name; 5682 ntfschar *src_ntfs_name, *target_ntfs_name; 5683 ntfs_dir_lookup_name *src_name, *dst_name; 5684 size_t orig_ntfs_name_size, dst_ntfs_name_size; 5685 signed orig_ntfs_name_len, dst_ntfs_name_len, src_ntfs_name_len; 5686 signed target_ntfs_name_len; 5687 errno_t err, err2; 5688 FILENAME_TYPE_FLAGS src_ntfs_name_type, target_ntfs_name_type; 5689 BOOL have_unlinked = FALSE; 5690 5691 dst_name = src_name = NULL; 5692 src_dir_ni = NTFS_I(a->a_fdvp); 5693 src_ni = NTFS_I(a->a_fvp); 5694 src_cn = a->a_fcnp; 5695 dst_dir_ni = NTFS_I(a->a_tdvp); 5696 if (!src_dir_ni || !src_ni || !dst_dir_ni) { 5697 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 5698 return EINVAL; 5699 } 5700 vol = src_dir_ni->vol; 5701 dst_cn = a->a_tcnp; 5702 if (a->a_tvp) { 5703 dst_ni = NTFS_I(a->a_tvp); 5704 if (!dst_ni) { 5705 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 5706 return EINVAL; 5707 } 5708 ntfs_debug("Entering for source mft_no 0x%llx, name %.*s, " 5709 "parent directory mft_no 0x%llx and " 5710 "destination mft_no 0x%llx, name %.*s, parent " 5711 "directory mft_no 0x%llx.", 5712 (unsigned long long)src_ni->mft_no, 5713 (int)src_cn->cn_namelen, src_cn->cn_nameptr, 5714 (unsigned long long)src_dir_ni->mft_no, 5715 (unsigned long long)dst_ni->mft_no, 5716 (int)dst_cn->cn_namelen, dst_cn->cn_nameptr, 5717 (unsigned long long)dst_dir_ni->mft_no); 5718 if (src_ni == dst_ni && NVolCaseSensitive(vol)) { 5719 ntfs_debug("Source and destination inodes are the " 5720 "same and the volume is case " 5721 "sensitive. Returning success " 5722 "without doing anything as required " 5723 "by POSIX."); 5724 return 0; 5725 } 5726 } else { 5727 dst_ni = NULL; 5728 ntfs_debug("Entering for source mft_no 0x%llx, name %.*s, " 5729 "parent directory mft_no 0x%llx and no " 5730 "destination mft_no, destination name %.*s, " 5731 "parent directory mft_no 0x%llx.", 5732 (unsigned long long)src_ni->mft_no, 5733 (int)src_cn->cn_namelen, src_cn->cn_nameptr, 5734 (unsigned long long)src_dir_ni->mft_no, 5735 (int)dst_cn->cn_namelen, dst_cn->cn_nameptr, 5736 (unsigned long long)dst_dir_ni->mft_no); 5737 } 5738 /* 5739 * The source and target parent inodes must be directories which 5740 * implies they are base inodes. 5741 */ 5742 if (!S_ISDIR(src_dir_ni->mode) || !S_ISDIR(dst_dir_ni->mode)) { 5743 ntfs_debug("%s parent inode 0x%llx is not a directory, " 5744 "returning ENOTDIR.", 5745 !S_ISDIR(src_dir_ni->mode) ? 5746 "Source" : "Destination", (unsigned long long) 5747 (!S_ISDIR(src_dir_ni->mode) ? 5748 src_dir_ni->mft_no : dst_dir_ni->mft_no)); 5749 return ENOTDIR; 5750 } 5751 /* 5752 * All inodes must be locked in parent -> child order so we need to 5753 * check whether the source and target parent inodes have a 5754 * parent/child relationship with each other. 5755 * 5756 * If both are the same we have the easiest case and we just lock the 5757 * single directory inode. 5758 * 5759 * If the two are not the same we need to exclude all other tree 5760 * reshaping renames from happening as they could change the 5761 * relationship between the parent directory inodes under our feet. To 5762 * do this we use a per ntfs volume lock so we can then go on to 5763 * determine their parent/child relationship. 5764 * 5765 * Once we have established if there is a parent/child relationship we 5766 * lock the parent followed by the child and if the two are completely 5767 * unrelated the order of locking does not matter so we just lock the 5768 * destination followed by the source. 5769 * 5770 * Note that we take this opportunity of walking the directory tree up 5771 * to the root starting from @dst_dir_ni to also check whether @src_ni 5772 * is either equal to or a parent of @dst_dir_ni in which case a 5773 * directory loop would be caused by the rename so we have to abort it 5774 * with EINVAL error. 5775 */ 5776 if (src_dir_ni == dst_dir_ni) 5777 lck_rw_lock_exclusive(&src_dir_ni->lock); 5778 else { 5779 BOOL is_parent; 5780 5781 lck_mtx_lock(&vol->rename_lock); 5782 err = ntfs_inode_is_parent(src_dir_ni, dst_dir_ni, &is_parent, 5783 src_ni); 5784 if (err) { 5785 lck_mtx_unlock(&vol->rename_lock); 5786 /* 5787 * @err == EINVAL means @src_ni matches or is a parent 5788 * of @dst_dir_ni. This would create a directory 5789 * loop so abort the rename but do not emit an error 5790 * message as there is no error as such. 5791 */ 5792 if (err != EINVAL) 5793 ntfs_error(vol->mp, "Failed to determine " 5794 "whether source directory " 5795 "mft_no 0x%llx is a parent of " 5796 "destination directory mft_no " 5797 "0x%llx (error %d).", 5798 (unsigned long long) 5799 src_dir_ni->mft_no, 5800 (unsigned long long) 5801 dst_dir_ni->mft_no, err); 5802 return err; 5803 } 5804 /* 5805 * If @src_dir_ni is a parent of @dst_dir_ni, lock @src_dir_ni 5806 * followed by @dst_dir_ni. 5807 * 5808 * Otherwise either @dst_dir_ni is a parent of @src_dir_ni, in 5809 * which case we have to lock @dst_dir_ni followed by 5810 * @src_dir_ni, or they are unrelated in which case lock 5811 * ordering does not matter thus we do not need to distinguish 5812 * those two cases and can simply lock @dst_dir_ni followed by 5813 * @src_dir_ni. 5814 */ 5815 if (is_parent) { 5816 lck_rw_lock_exclusive(&src_dir_ni->lock); 5817 lck_rw_lock_exclusive(&dst_dir_ni->lock); 5818 } else { 5819 lck_rw_lock_exclusive(&dst_dir_ni->lock); 5820 lck_rw_lock_exclusive(&src_dir_ni->lock); 5821 } 5822 } 5823 /* 5824 * The source cannot be the source directory and the destination cannot 5825 * be the destination directory. Also as we are about to lock the 5826 * target ensure it does not equal the source directory either. We 5827 * have already checked for the source being equal to the target 5828 * directory above so no need to check again. 5829 */ 5830 if (dst_ni && dst_ni == src_dir_ni) { 5831 ntfs_debug("The source parent directory equals the target, " 5832 "returning ENOTEMPTY."); 5833 err = ENOTEMPTY; 5834 /* Set @dst_ni to NULL so we do not try to unlock it. */ 5835 dst_ni = NULL; 5836 goto err; 5837 } 5838 if (src_ni == src_dir_ni || (dst_ni && dst_ni == dst_dir_ni)) { 5839 ntfs_debug("The source and/or the target is/are equal to " 5840 "their parent directories, returning EINVAL."); 5841 err = EINVAL; 5842 /* Set @dst_ni to NULL so we do not try to unlock it. */ 5843 dst_ni = NULL; 5844 goto err; 5845 } 5846 /* 5847 * If the destination inode exists lock it so it can be unlinked 5848 * safely. For example if it is a directory we need to ensure that it 5849 * is empty and that no-one creates an entry in it whilst the delete is 5850 * in progress which requires us to hold an exclusive lock on it. 5851 */ 5852 if (dst_ni) 5853 lck_rw_lock_exclusive(&dst_ni->lock); 5854 /* 5855 * Because we have locked the parent inode of the source inode there is 5856 * no need to lock the source inode itself. We are not going to unlink 5857 * it completely, just move it from one location/name to another name 5858 * and/or place in the directory tree and the mft record will be mapped 5859 * and thus locked for exclusive access whenever we modify the inode 5860 * which will serialize any potential concurrent operations on the 5861 * inode. The only concurrent operation to watch out for is when the 5862 * source inode is a directory and someone calls VNOP_REMOVE() or 5863 * VNOP_RMDIR() on any of its child inodes. This can end up in the 5864 * situation where the index root node is locked in 5865 * ntfs_index_entry_delete() and hence the mft record is mapped whilst 5866 * the free space in the mft record is evaluated but then before this 5867 * information is used the mft record is unmapped and then mapped again 5868 * as part of a call to ntfs_index_entry_lock_two() and if our 5869 * VNOP_RENAME() manages to map the mft record whilst it is temporarily 5870 * unmapped during the ntfs_index_entry_lock_two() we can cause the 5871 * free space in the mft record to decrease and thus the 5872 * ntfs_index_entry_delete() may then encounter an out of space 5873 * condition when it thought it had determined the amount of free space 5874 * already and thus assume something has gone wrong and panic(). We 5875 * overcome this problem inside ntfs_index_entry_delete() by rechecking 5876 * the free space after reacquiring the lock and dealing with it as 5877 * appropriate. 5878 * 5879 * First, ensure the parent directories have not been deleted. 5880 */ 5881 if (!src_dir_ni->link_count || !dst_dir_ni->link_count) { 5882 ntfs_debug("One or both of the parent directories mft_no " 5883 "0x%llx and mft_no 0x%llx has/have been " 5884 "deleted, returning ENOENT.", 5885 (unsigned long long)src_dir_ni->mft_no, 5886 (unsigned long long)dst_dir_ni->mft_no); 5887 /* 5888 * If the directory is somehow still in the name cache remove 5889 * it now. 5890 */ 5891 if (!src_dir_ni->link_count) 5892 cache_purge(src_dir_ni->vn); 5893 if (!dst_dir_ni->link_count) 5894 cache_purge(dst_dir_ni->vn); 5895 err = ENOENT; 5896 goto err; 5897 } 5898 /* Rename is not allowed on attribute/raw inodes. */ 5899 if (NInoAttr(src_ni) || (dst_ni && NInoAttr(dst_ni))) { 5900 ntfs_debug("Source and/or target inode is/are attribute/raw " 5901 "inodes, returning EPERM."); 5902 err = EPERM; 5903 goto err; 5904 } 5905 /* Ensure the source has not been deleted by someone else already. */ 5906 if (!src_ni->link_count) { 5907 ntfs_debug("Source %.*s, mft_no 0x%llx has been deleted, " 5908 "returning ENOENT.", (int)src_cn->cn_namelen, 5909 src_cn->cn_nameptr, 5910 (unsigned long long)src_ni->mft_no); 5911 /* 5912 * If the source is somehow still in the name cache remove it 5913 * now. 5914 */ 5915 cache_purge(src_ni->vn); 5916 err = ENOENT; 5917 goto err; 5918 } 5919 /* 5920 * Ensure the target has not been deleted by someone else already. If 5921 * it has been deleted pretend the caller did not specify a target. 5922 * This is what HFS+ does, too. 5923 */ 5924 if (dst_ni && !dst_ni->link_count) { 5925 ntfs_debug("Target %.*s, mft_no 0x%llx has been deleted, " 5926 "pretending no target was specified.", 5927 (int)dst_cn->cn_namelen, dst_cn->cn_nameptr, 5928 (unsigned long long)dst_ni->mft_no); 5929 /* 5930 * If the target is somehow still in the name cache remove it 5931 * now. 5932 */ 5933 cache_purge(dst_ni->vn); 5934 lck_rw_unlock_exclusive(&dst_ni->lock); 5935 dst_ni = NULL; 5936 } 5937 /* 5938 * If the destination exists need to ensure that it is a directory if 5939 * the source is a directory or that it is not a directory if the 5940 * source is not a directory. 5941 * 5942 * Also, need to ensure the target directory is empty. 5943 * 5944 * If the source and destination are the same none of these checks 5945 * apply so skip them. 5946 */ 5947 if (dst_ni && src_ni != dst_ni) { 5948 if (S_ISDIR(src_ni->mode)) { 5949 if (!S_ISDIR(dst_ni->mode)) { 5950 ntfs_debug("Source is a directory but " 5951 "destination is not, " 5952 "returning ENOTDIR"); 5953 err = ENOTDIR; 5954 goto err; 5955 } 5956 /* The target is a directory, but is it empty? */ 5957 err = ntfs_dir_is_empty(dst_ni); 5958 if (err) { 5959 if (err == ENOTEMPTY) 5960 ntfs_debug("Target directory %.*s, " 5961 "mft_no 0x%llx is not " 5962 "empty, returning " 5963 "ENOTEMPTY.", 5964 (int)dst_cn->cn_namelen, 5965 dst_cn->cn_nameptr, 5966 (unsigned long long) 5967 dst_ni->mft_no); 5968 else { 5969 ntfs_error(vol->mp, "Failed to " 5970 "determine if target " 5971 "directory %.*s, " 5972 "mft_no 0x%llx is " 5973 "empty (error %d).", 5974 (int)dst_cn->cn_namelen, 5975 dst_cn->cn_nameptr, 5976 (unsigned long long) 5977 dst_ni->mft_no, err); 5978 err = EIO; 5979 } 5980 goto err; 5981 } 5982 } else /* if (!S_ISDIR(src_ni->mode)) */ { 5983 if (S_ISDIR(dst_ni->mode)) { 5984 ntfs_debug("Source is not a directory but " 5985 "destination is, returning " 5986 "EISDIR"); 5987 err = EISDIR; 5988 goto err; 5989 } 5990 } 5991 } 5992 /* Ensure none of the inodes are read-only. */ 5993 if ((!S_ISDIR(src_ni->mode) && 5994 src_ni->file_attributes & FILE_ATTR_READONLY) || 5995 (dst_ni && !S_ISDIR(dst_ni->mode) && 5996 dst_ni->file_attributes & FILE_ATTR_READONLY)) { 5997 ntfs_debug("One of the inodes involved in the rename is " 5998 "read-only, returning EPERM."); 5999 err = EPERM; 6000 goto err; 6001 } 6002 /* 6003 * Do not allow any of the system files to be renamed/deleted. 6004 * 6005 * For NTFS 3.0+ volumes do not allow any of the extended system files 6006 * to be renamed/deleted, either. 6007 * 6008 * Note we specifically blacklist all system files that we make use of. 6009 * 6010 * TODO: What about all the new metadata files introduced with Windows 6011 * Vista? We are currently ignoring them and allowing them to be 6012 * renamed/deleted... 6013 */ 6014 if (src_ni->file_attributes & FILE_ATTR_SYSTEM || (dst_ni && 6015 dst_ni->file_attributes & FILE_ATTR_SYSTEM)) { 6016 BOOL is_system = FALSE; 6017 if (vol->major_ver <= 1) { 6018 if (src_ni->mft_no < FILE_Extend || (dst_ni && 6019 dst_ni->mft_no < FILE_Extend)) 6020 is_system = TRUE; 6021 } else { 6022 if (src_ni->mft_no <= FILE_Extend || (dst_ni && 6023 dst_ni->mft_no <= FILE_Extend)) 6024 is_system = TRUE; 6025 if (src_dir_ni == vol->extend_ni) { 6026 if (src_ni == vol->objid_ni || 6027 src_ni == vol->quota_ni || 6028 src_ni == vol->usnjrnl_ni) 6029 is_system = TRUE; 6030 } 6031 if (dst_dir_ni == vol->extend_ni) { 6032 if (dst_ni == vol->objid_ni || 6033 dst_ni == vol->quota_ni || 6034 dst_ni == vol->usnjrnl_ni) 6035 is_system = TRUE; 6036 } 6037 } 6038 if (is_system) { 6039 ntfs_debug("Source and/or target inode is a system " 6040 "file, returning EPERM."); 6041 err = EPERM; 6042 goto err; 6043 } 6044 } 6045 /* 6046 * If the source/target inodes are reparse points or if they are 6047 * offline we cannot rename/delete them yet. TODO: Implement this. 6048 */ 6049 if (src_ni->file_attributes & (FILE_ATTR_REPARSE_POINT | 6050 FILE_ATTR_OFFLINE) || (dst_ni && 6051 dst_ni->file_attributes & (FILE_ATTR_REPARSE_POINT | 6052 FILE_ATTR_OFFLINE))) { 6053 ntfs_error(vol->mp, "Source or target inode is a reparse " 6054 "point or offline, renaming such indoes is " 6055 "notsupported yet, returning ENOTSUP."); 6056 err = ENOTSUP; 6057 goto err; 6058 } 6059 /* 6060 * To proceed further we need to convert both the source and target 6061 * names from utf8 to Unicode. This is a good time to do both as the 6062 * conversion also checks for invalid names, too long names, etc. 6063 * 6064 * Note we allocate both source and target names with a single buffer 6065 * so we only have to call once into the allocator. 6066 */ 6067 ntfs_name_buf = OSMalloc(NTFS_MAX_NAME_LEN * 2, ntfs_malloc_tag); 6068 if (!ntfs_name_buf) { 6069 ntfs_debug("Not enough memory to allocate name buffer."); 6070 err = ENOMEM; 6071 goto err; 6072 } 6073 orig_ntfs_name = ntfs_name_buf; 6074 dst_ntfs_name = (ntfschar*)((u8*)ntfs_name_buf + NTFS_MAX_NAME_LEN); 6075 dst_ntfs_name_size = orig_ntfs_name_size = NTFS_MAX_NAME_LEN; 6076 orig_ntfs_name_len = utf8_to_ntfs(vol, (u8*)src_cn->cn_nameptr, 6077 src_cn->cn_namelen, &orig_ntfs_name, 6078 &orig_ntfs_name_size); 6079 if (orig_ntfs_name_len < 0) { 6080 err = -orig_ntfs_name_len; 6081 if (err == ENAMETOOLONG) 6082 ntfs_debug("Failed (source name is too long)."); 6083 else 6084 ntfs_error(vol->mp, "Failed to convert name to " 6085 "Unicode (error %d).", err); 6086 goto free_err; 6087 } 6088 dst_ntfs_name_len = utf8_to_ntfs(vol, (u8*)dst_cn->cn_nameptr, 6089 dst_cn->cn_namelen, &dst_ntfs_name, 6090 &dst_ntfs_name_size); 6091 if (dst_ntfs_name_len < 0) { 6092 err = -dst_ntfs_name_len; 6093 if (err == ENAMETOOLONG) 6094 ntfs_debug("Failed (target name is too long)."); 6095 else 6096 ntfs_error(vol->mp, "Failed to convert target name to " 6097 "Unicode (error %d).", err); 6098 goto free_err; 6099 } 6100 /* 6101 * We need to make sure the source still has the name specified in 6102 * @src_cn. It could have been unlinked or renamed before we took the 6103 * lock on the parent directory. 6104 * 6105 * To do this, look up the converted source name in the source parent 6106 * directory index. 6107 */ 6108 err = ntfs_lookup_inode_by_name(src_dir_ni, orig_ntfs_name, 6109 orig_ntfs_name_len, &src_mref, &src_name); 6110 if (err) { 6111 if (err != ENOENT) { 6112 ntfs_error(vol->mp, "Failed to find source name in " 6113 "directory (error %d).", err); 6114 goto free_err; 6115 } 6116src_enoent: 6117 /* 6118 * The source name does not exist in the source parent 6119 * directory. 6120 * 6121 * This means someone renamed or deleted the name from the 6122 * directory before we managed to take the locks. 6123 */ 6124 ntfs_debug("Source has been renamed or deleted already, " 6125 "returning ENOENT."); 6126 /* 6127 * If the source is somehow still in the name cache remove it 6128 * now. 6129 */ 6130 cache_purge(src_ni->vn); 6131 err = ENOENT; 6132 goto free_err; 6133 } 6134 /* 6135 * We found the source name in the directory index but does it still 6136 * point to the same mft record? The sequence number check ensures the 6137 * inode was not deleted and recreated with the same name and the same 6138 * mft record number. 6139 */ 6140 if (src_mref != MK_MREF(src_ni->mft_no, src_ni->seq_no)) 6141 goto src_enoent; 6142 /* 6143 * We now have verified everything to do with the source. Set the 6144 * source name to be the correctly cased name (unless it was correctly 6145 * cased already in which case @src_name will be NULL and 6146 * @orig_ntfs_name contains the correcly cased name). 6147 */ 6148 if (src_name) { 6149 src_ntfs_name = src_name->name; 6150 src_ntfs_name_len = src_name->len; 6151 src_ntfs_name_type = src_name->type; 6152 } else { 6153 src_ntfs_name = orig_ntfs_name; 6154 src_ntfs_name_len = orig_ntfs_name_len; 6155 src_ntfs_name_type = 0; 6156 } 6157 /* 6158 * Now we need to verify the target. In an ideal world, either it has 6159 * to be specified in @dst_ni in which case it also has to exist in the 6160 * destination parent directory @dst_dir_ni, or @dst_ni has to be NULL 6161 * in which case the target name must not exist in the destination 6162 * parent directory. 6163 * 6164 * But because the VFS obtains the target before we take the necessary 6165 * locks it is possible for the above ideal not to be true. There are 6166 * several possible cases: 6167 * 6168 * - Target was specified but deleted. We have detected this case 6169 * above and have set @dst_ni to NULL thus we do not need to worry 6170 * about this case any more. 6171 * - Target was not specified but another inode was created with the 6172 * same name. In this case we return EEXIST which is what HFS+ does, 6173 * too. 6174 * - Target was specified but renamed. This means we may or may not 6175 * find a directory entry of the same name. If we do not find a 6176 * matching directory entry we know the target has been renamed thus 6177 * we can simply set @dst_ni to NULL and pretend it does not exist. 6178 * If we do find a directory entry that matches in name but does not 6179 * point to the same mft reference we know the target was renamed and 6180 * another inode was created with the same name. In this case we 6181 * return EEXIST which is what HFS+ does, too. 6182 */ 6183 err = ntfs_lookup_inode_by_name(dst_dir_ni, dst_ntfs_name, 6184 dst_ntfs_name_len, &dst_mref, &dst_name); 6185 if (err) { 6186 if (err != ENOENT) { 6187 ntfs_error(vol->mp, "Failed to find target name in " 6188 "directory (error %d).", err); 6189 goto free_err; 6190 } 6191 /* 6192 * The destination name does not exist in the destination 6193 * parent directory which means that the target must have been 6194 * renamed to something else before we took the locks. We 6195 * treat this the same as if had been deleted, i.e. we pretend 6196 * the caller did not specify a target. 6197 */ 6198 if (dst_ni) { 6199 ntfs_debug("Target %.*s, mft_no 0x%llx has been " 6200 "renamed, pretending no target was " 6201 "specified.", (int)dst_cn->cn_namelen, 6202 dst_cn->cn_nameptr, 6203 (unsigned long long)dst_ni->mft_no); 6204 lck_rw_unlock_exclusive(&dst_ni->lock); 6205 dst_ni = NULL; 6206 } 6207 } else /* if (!err) */ { 6208 /* 6209 * The destination name exists in the directory index. 6210 * 6211 * If the caller did not specify it in @dst_ni or the 6212 * destination inode has been deleted (in which case we set 6213 * @dst_ni to NULL above) or the target was renamed and another 6214 * inode was created with the same name return error EEXIST 6215 * which is what HFS+ does, too. 6216 * 6217 * FIXME: Technically it would probably be more correct to get 6218 * the new target ntfs inode and restart the function but at 6219 * least for now stick with the same behaviour as HFS+. 6220 */ 6221 if (!dst_ni || dst_mref != MK_MREF(dst_ni->mft_no, 6222 dst_ni->seq_no)) { 6223 ntfs_debug("Target name %.*s exists but %s, returning " 6224 "EEXIST.", (int)dst_cn->cn_namelen, 6225 dst_cn->cn_nameptr, !dst_ni ? 6226 "target inode was not specified or it " 6227 "was already deleted" : 6228 "does not match specified target " 6229 "inode (it must have been renamed and " 6230 "a new inode created with the same " 6231 "name)"); 6232 err = EEXIST; 6233 goto free_err; 6234 } 6235 /* 6236 * We still need the destination name thus use a new variable 6237 * to store the correctly cased target name. 6238 */ 6239 if (!dst_name) { 6240 target_ntfs_name = dst_ntfs_name; 6241 target_ntfs_name_len = dst_ntfs_name_len; 6242 target_ntfs_name_type = 0; 6243 } else { 6244 target_ntfs_name = dst_name->name; 6245 target_ntfs_name_len = dst_name->len; 6246 target_ntfs_name_type = dst_name->type; 6247 } 6248 /* 6249 * We have verified everything to do with the target. We now 6250 * need to unlink it unless the source and the target are the 6251 * same, i.e. we are changing the case of an existing filename. 6252 * We need to distinguish two cases. If the volume is mounted 6253 * case sensitive or it is not case sensitive and the source 6254 * and destination names do not match (i.e. they are different 6255 * hard links to the same inode) we do not proceed and return 6256 * success (this is required by POSIX). Otherwise the volume 6257 * is not case sensitive and the source and destination names 6258 * match (i.e. they are the same hard link) and we can either 6259 * return success when the source and destination names are 6260 * identical (same case) or we can proceed with the rename when 6261 * the case differs. 6262 * 6263 * Note we have caught the case of the inodes being equal and 6264 * the volume being mounted case sensitive earlier on so we now 6265 * know that the volume is not mounted case sensitive. 6266 */ 6267 if (src_ni == dst_ni) { 6268 /* 6269 * If the two names are not the same hardlink return 6270 * success not doing anything as required by POSIX. 6271 * 6272 * Note we do not need to care about case when 6273 * comparing because we are comparing the correctly 6274 * cased names. 6275 */ 6276 if (src_ntfs_name_len != target_ntfs_name_len || 6277 bcmp(src_ntfs_name, target_ntfs_name, 6278 src_ntfs_name_len * sizeof(ntfschar))) { 6279 ntfs_debug("Source and target inodes are the " 6280 "same but the source and " 6281 "target names are different " 6282 "hard links. Returning " 6283 "success without doing " 6284 "anything as required by " 6285 "POSIX."); 6286 goto done; 6287 } 6288 /* 6289 * The names are the same hard link. If the existing 6290 * name is the same as the destination name (i.e. the 6291 * target name before case correction) there is 6292 * nothing to do and we can return success. 6293 */ 6294 if (src_ntfs_name_len == dst_ntfs_name_len && 6295 !bcmp(src_ntfs_name, dst_ntfs_name, 6296 src_ntfs_name_len * sizeof(ntfschar))) { 6297 ntfs_debug("Source and destination are " 6298 "identical so no need to do " 6299 "anything. Returning " 6300 "success."); 6301 goto done; 6302 } 6303 /* 6304 * The names are the same hard link but they differ in 6305 * case thus there is no target to be removed as it 6306 * will be removed as part of the actual rename when 6307 * the source name is removed. 6308 */ 6309 } else /* if (dst_ni && src_ni != dst_ni) */ { 6310 /* 6311 * The source and the target are not the same thus now 6312 * unlink the target. We can do this atomically before 6313 * adding the new entry because both the parent 6314 * directory inode and the target inode are locked for 6315 * writing thus no-one can access either until we have 6316 * finished. FIXME: The only pitfal is what happens if 6317 * the rename fails after we have removed the target? 6318 * We just ignore this problem for now and let the 6319 * target disappear. This is what HFS does also so at 6320 * least we are not the only non-POSIX conformant file 6321 * system on OS X... In fact as long as we return EIO 6322 * on error once we have unlinked the target POSIX 6323 * still considers this ok. (This is what HFS does, 6324 * too.) 6325 * 6326 * Note we do not set @is_rename to true here as this 6327 * is just a normal unlink operation. 6328 */ 6329 err = ntfs_unlink_internal(dst_dir_ni, dst_ni, 6330 target_ntfs_name, target_ntfs_name_len, 6331 target_ntfs_name_type, FALSE); 6332 if (err) { 6333 ntfs_error(vol->mp, "Rename failed because " 6334 "the target mft_no 0x%llx " 6335 "could not be removed from " 6336 "directory mft_no 0x%llx " 6337 "(error %d).", 6338 (unsigned long long) 6339 dst_ni->mft_no, 6340 (unsigned long long) 6341 dst_dir_ni->mft_no, err); 6342 goto free_err; 6343 } 6344 /* 6345 * Set @have_unlinked to true so that we know that we 6346 * have to return error EIO from now on if we fail to 6347 * complete the rename. 6348 */ 6349 have_unlinked = TRUE; 6350 } 6351 /* 6352 * Release the lock on the destination inode and set it to NULL 6353 * so we assume it does not exist from now on. 6354 */ 6355 lck_rw_unlock_exclusive(&dst_ni->lock); 6356 dst_ni = NULL; 6357 } 6358 /* 6359 * We dealt with the target if there was one thus now we can begin the 6360 * actual rename. 6361 * 6362 * To start with we lock the source inode for writing which allows us 6363 * to split the removal of the source name and the addition of the 6364 * destination name into two events. 6365 * 6366 * Note we cheat a little and set @dst_ni to @src_ni so that @src_ni is 6367 * unlocked at the end of the function/on error. 6368 */ 6369 if (dst_ni) 6370 panic("%s(): dst_ni\n", __FUNCTION__); 6371 dst_ni = src_ni; 6372 lck_rw_lock_exclusive(&src_ni->lock); 6373 /* 6374 * As the source inode is now locked for writing we can perform the 6375 * rename in two stages. First we remove the source name and then we 6376 * add the destination name both to the mft record of the inode and to 6377 * the parent directory indexes. We can do this atomically because 6378 * both the parent directory and the source inode are locked for 6379 * writing thus no-one can access either until we are finished. 6380 * 6381 * As removal of the source name can leave the source inode with a zero 6382 * link count we artificially increment the link count here to ensure 6383 * it cannot reach zero. This is required to guarantee that the unlink 6384 * of the source name will remove the filename attribute and to ensure 6385 * that the object id is not deleted. Finally, this also ensures 6386 * no-one can ever see the inode in a deleted state (although this 6387 * should never happen anyway as we have the inode locked for writing). 6388 * 6389 * Note the link count in the ntfs inode is unsigned int type, i.e. at 6390 * least 32-bit, to allow us to overflow 16-bits here if needed. In 6391 * this way we do not need to worry about the link count overflowing 6392 * here which makes the code simpler. 6393 * 6394 * We set @is_rename to true as we have elevated the link count by one. 6395 */ 6396 src_ni->link_count++; 6397 err = ntfs_unlink_internal(src_dir_ni, src_ni, src_ntfs_name, 6398 src_ntfs_name_len, src_ntfs_name_type, TRUE); 6399 if (err) { 6400 ntfs_error(vol->mp, "Rename failed because the source name, " 6401 "%.*s mft_no 0x%llx could not be removed from " 6402 "directory mft_no 0x%llx (error %d).", 6403 (int)src_cn->cn_namelen, src_cn->cn_nameptr, 6404 (unsigned long long)src_ni->mft_no, 6405 (unsigned long long)src_dir_ni->mft_no, err); 6406 goto dec_err; 6407 } 6408 /* 6409 * The source name is now removed both from the source parent directory 6410 * index and from the mft record of the source inode. 6411 * 6412 * Now add the destination name as a hard link to the mft record of the 6413 * source inode and to the destination parent directory index. 6414 * 6415 * Calling ntfs_link_internal() also sets the "needs to be archived" 6416 * bit on the ntfs inode unless we are renaming an unencrypted 6417 * directory inode so we do not need to worry about setting it 6418 * ourselves. 6419 */ 6420 err = ntfs_link_internal(src_ni, dst_dir_ni, dst_cn, TRUE, 6421 dst_ntfs_name, dst_ntfs_name_len); 6422 if (err) 6423 goto link_err; 6424 /* We are done, decrement the link count back to its correct value. */ 6425 src_ni->link_count--; 6426done: 6427 if (src_name) 6428 OSFree(src_name, sizeof(*src_name), ntfs_malloc_tag); 6429 if (dst_name) 6430 OSFree(dst_name, sizeof(*dst_name), ntfs_malloc_tag); 6431 OSFree(ntfs_name_buf, NTFS_MAX_NAME_LEN * 2, ntfs_malloc_tag); 6432err: 6433 /* If the destination inode existed we locked it so unlock it now. */ 6434 if (dst_ni) 6435 lck_rw_unlock_exclusive(&dst_ni->lock); 6436 /* Drop the source and destination parent directory inode locks. */ 6437 lck_rw_unlock_exclusive(&src_dir_ni->lock); 6438 if (src_dir_ni != dst_dir_ni) { 6439 lck_rw_unlock_exclusive(&dst_dir_ni->lock); 6440 lck_mtx_unlock(&vol->rename_lock); 6441 } 6442 ntfs_debug("Done (error %d).", (int)err); 6443 return err; 6444link_err: 6445 ntfs_error(vol->mp, "Rename failed because the destination name %.*s, " 6446 "mft_ni 0x%llx could not be added to directory mft_no " 6447 "0x%llx (error %d).", (int)dst_cn->cn_namelen, 6448 dst_cn->cn_nameptr, (unsigned long long)src_ni->mft_no, 6449 (unsigned long long)dst_dir_ni->mft_no, err); 6450 /* 6451 * Try to roll back the unlink of the source by creating a new hard 6452 * link with the old name. 6453 */ 6454 err2 = ntfs_link_internal(src_ni, src_dir_ni, src_cn, TRUE, 6455 orig_ntfs_name, orig_ntfs_name_len); 6456 if (err2) { 6457 ntfs_error(vol->mp, "Failed to roll back partially completed " 6458 "rename (error %d). Leaving corrupt " 6459 "metadata and returning EIO. Unmount and run " 6460 "chkdsk.", err2); 6461 NVolSetErrors(vol); 6462 err = EIO; 6463 } else 6464 ntfs_debug("Re-linking of source name succeeded."); 6465dec_err: 6466 src_ni->link_count--; 6467free_err: 6468 if (have_unlinked) { 6469 /* We unlinked an existing target, need to re-link it now. */ 6470 ntfs_debug("Rename failed but the target was already unlinked " 6471 "and relinking it is not implemented (yet), " 6472 "returning EIO. (Given you were renaming " 6473 "over it chances are you did not care about " 6474 "the target anyway.)"); 6475 err = EIO; 6476 } 6477 goto done; 6478} 6479 6480/** 6481 * ntfs_vnop_mkdir - create a directory 6482 * @a: arguments to mkdir function 6483 * 6484 * @a contains: 6485 * vnode_t a_dvp; directory in which to create the dir 6486 * vnode_t *a_vpp; destination pointer for the created dir 6487 * struct componentname *a_cnp; name of the directory to create 6488 * struct vnode_attr *a_vap; attributes to set on the created dir 6489 * vfs_context_t a_context; 6490 * 6491 * Create a directory with name as specified in @a->a_cnp in the directory 6492 * specified by the vnode @a->a_dvp. Assign the attributes @a->a_vap to the 6493 * created directory. Finally return the vnode of the created directory in 6494 * *@a->a_vpp. 6495 * 6496 * Return 0 on success and errno on error. 6497 * 6498 * Note we always create directory names in the POSIX namespace. 6499 */ 6500static int ntfs_vnop_mkdir(struct vnop_mkdir_args *a) 6501{ 6502 errno_t err; 6503#ifdef DEBUG 6504 ntfs_inode *ni = NTFS_I(a->a_dvp); 6505 6506 if (ni) 6507 ntfs_debug("Creating a directory named %.*s in directory " 6508 "mft_no 0x%llx.", (int)a->a_cnp->cn_namelen, 6509 a->a_cnp->cn_nameptr, 6510 (unsigned long long)ni->mft_no); 6511#endif 6512 err = ntfs_create(a->a_dvp, a->a_vpp, a->a_cnp, a->a_vap, FALSE); 6513 ntfs_debug("Done (error %d).", (int)err); 6514 return err; 6515} 6516 6517/** 6518 * ntfs_vnop_rmdir - remove an empty directory 6519 * @a: arguments to rmdir function 6520 * 6521 * @a contains: 6522 * vnode_t a_dvp; parent directory remove from 6523 * vnode_t a_vp; directory to remove 6524 * struct componentname *a_cnp; name of the dircetory to remove 6525 * vfs_context_t a_context; 6526 * 6527 * Make sure that the directory with vnode @a->a_vp and name as specified in 6528 * @a->a_cnp is empty and if so remove it from its parent directory with vnode 6529 * @a->a_dvp. 6530 * 6531 * Return 0 on success and errno on error. 6532 * 6533 * Note that if the name of the directory to be removed is in the WIN32 or DOS 6534 * namespaces, both the WIN32 and the corresponding DOS names are removed. 6535 * 6536 * Note that this function only removes the directory entry, i.e. it does not 6537 * remove the name, however it does decrement the hard link count to zero. 6538 * This is so that the directory can be undeleted and its original name 6539 * restored. In any case, we do not actually delete the inode here as it may 6540 * still be open and UNIX semantics require an unlinked inode to be still 6541 * accessible through already opened file descriptors. When the last file 6542 * descriptor is closed, we causes the inode to be deleted when the VFS 6543 * notifies us of the last close by calling VNOP_INACTIVE(), i.e. 6544 * ntfs_vnop_inactive(). 6545 */ 6546static int ntfs_vnop_rmdir(struct vnop_rmdir_args *a) 6547{ 6548 ntfs_inode *dir_ni = NTFS_I(a->a_dvp); 6549 ntfs_inode *ni = NTFS_I(a->a_vp); 6550 errno_t err; 6551 6552 ntfs_debug("Entering."); 6553 if (!dir_ni || !ni) { 6554 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 6555 return EINVAL; 6556 } 6557 err = ntfs_unlink(dir_ni, ni, a->a_cnp, 0, TRUE); 6558 ntfs_debug("Done (error %d).", (int)err); 6559 return err; 6560} 6561 6562/** 6563 * ntfs_vnop_symlink - create a symbolic link 6564 * @a: arguments to symlink function 6565 * 6566 * @a contains: 6567 * vnode_t a_dvp; directory to create the symlink in 6568 * vnode_t *a_vpp; destination pointer for the new symlink 6569 * struct componentname *a_cnp; name of the symlink to create 6570 * struct vnode_attr *a_vap; attributes to set on the new symlink 6571 * char *a_target; path to point the created symlink at 6572 * vfs_context_t a_context; 6573 * 6574 * Create a symbolic link to the path string @a->a_target with name as 6575 * specified in @a->a_cnp in directory specified by the vnode @a->a_dvp. 6576 * Assign the attributes @a->a_vap to the created symlink. Finally return the 6577 * vnode of the created symlink in *@a->a_vpp. 6578 * 6579 * We implement symbolic links the same way as SFM, i.e. a symbolic link is a 6580 * regular file as far as NTFS is concerned with an AFP_AfpInfo named stream 6581 * containing the finder info with the type set to 'slnk' and the creator set 6582 * to 'rhap'. This is basically how HFS+ stores symbolic links, too. 6583 * 6584 * Return 0 on success and errno on error. 6585 * 6586 * Note, since IEEE Std 1003.1-2001 does not require any association of file 6587 * times with symbolic links, there is no requirement that file times be 6588 * updated by symlink(). - This is what POSIX says about updating times in 6589 * symlink() thus we do not update any of the times except as an indirect 6590 * result of calling ntfs_write() on the symbolic link inode. 6591 */ 6592static int ntfs_vnop_symlink(struct vnop_symlink_args *a) 6593{ 6594 uio_t uio; 6595 ntfs_inode *dir_ni, *ni, *raw_ni; 6596 int err, err2; 6597 unsigned len; 6598 6599 dir_ni = NTFS_I(a->a_dvp); 6600 if (!dir_ni) { 6601 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 6602 return EINVAL; 6603 } 6604 ntfs_debug("Creating a symbolic link named %.*s in directory mft_no " 6605 "0x%llx and pointing it at path \"%s\".", 6606 (int)a->a_cnp->cn_namelen, a->a_cnp->cn_nameptr, 6607 (unsigned long long)dir_ni->mft_no, a->a_target); 6608 len = strlen(a->a_target); 6609 /* Zero length symbolic links are not allowed. */ 6610 if (!len || len > MAXPATHLEN) { 6611 err = EINVAL; 6612 if (len) 6613 err = ENAMETOOLONG; 6614 ntfs_error(dir_ni->vol->mp, "Invalid symbolic link target " 6615 "length %d, returning %s.", len, 6616 len ? "ENAMETOOLONG" : "EINVAL"); 6617 return err; 6618 } 6619retry: 6620 /* Create the symbolic link inode. */ 6621 err = ntfs_create(dir_ni->vn, a->a_vpp, a->a_cnp, a->a_vap, TRUE); 6622 if (err) { 6623 if (err != EEXIST) 6624 ntfs_error(dir_ni->vol->mp, "Failed to create " 6625 "symbolic link named %.*s in " 6626 "directory mft_no 0x%llx and pointing " 6627 "to path \"%s\" (error %d).", 6628 (int)a->a_cnp->cn_namelen, 6629 a->a_cnp->cn_nameptr, 6630 (unsigned long long)dir_ni->mft_no, 6631 a->a_target, err); 6632 else 6633 ntfs_debug("Failed to create symbolic link named %.*s " 6634 "in directory mft_no 0x%llx and " 6635 "pointing to path \"%s\" (error " 6636 "EEXIST).", (int)a->a_cnp->cn_namelen, 6637 a->a_cnp->cn_nameptr, 6638 (unsigned long long)dir_ni->mft_no, 6639 a->a_target); 6640 return err; 6641 } 6642 /* Note the ntfs inode @ni is locked for writing. */ 6643 ni = NTFS_I(*a->a_vpp); 6644 /* Make sure no-one deleted it under our feet. */ 6645 if (NInoDeleted(ni)) { 6646 /* Remove the inode from the name cache. */ 6647 cache_purge(ni->vn); 6648 /* Release the vnode and try the create again. */ 6649 lck_rw_unlock_exclusive(&ni->lock); 6650 vnode_put(ni->vn); 6651 goto retry; 6652 } 6653 /* 6654 * Create a uio and attach the target path to it so we can use 6655 * ntfs_write() to do the work. 6656 */ 6657 uio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE); 6658 if (!uio) { 6659 err = ENOMEM; 6660 ntfs_error(dir_ni->vol->mp, "Failed to allocate UIO."); 6661 goto err; 6662 } 6663 err = uio_addiov(uio, (uintptr_t)a->a_target, len); 6664 if (err) 6665 panic("%s(): Failed to attach target path buffer to UIO " 6666 "(error %d).", __FUNCTION__, err); 6667 /* 6668 * FIXME: At present the kernel does not allow VLNK vnodes to use the 6669 * UBC (<rdar://problem/5794900>) thus we need to use a shadow VREG 6670 * vnode to do the actual write of the symbolic link data. Fortunately 6671 * we already implemented this functionality for compressed files where 6672 * we need to read the compressed data using a shadow vnode so we use 6673 * the same implementation here, thus our shadow vnode is a raw inode. 6674 */ 6675 err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_EXCLUSIVE, &raw_ni); 6676 if (err) { 6677 ntfs_error(ni->vol->mp, "Failed to get raw inode (error %d).", 6678 err); 6679 goto err; 6680 } 6681 if (!NInoRaw(raw_ni)) 6682 panic("%s(): Requested raw inode but got non-raw one.\n", 6683 __FUNCTION__); 6684 /* 6685 * Write the symbolic link target to the created inode. We pass in 6686 * IO_UNIT as we want an atomic i/o operation. 6687 * 6688 * FIXME: ntfs_write() does not always honour the IO_UNIT flag so we 6689 * still have to test for partial writes. 6690 */ 6691 err = ntfs_write(raw_ni, uio, IO_UNIT, TRUE); 6692 /* 6693 * Update the sizes in the base inode. Note there is no need to lock 6694 * @raw_ni->size_lock as the values cannot change at present as we are 6695 * holding the inode lock @raw_ni->lock for write. 6696 */ 6697 lck_spin_lock(&ni->size_lock); 6698 ni->initialized_size = raw_ni->initialized_size; 6699 ni->data_size = raw_ni->data_size; 6700 ni->allocated_size = raw_ni->allocated_size; 6701 ni->compressed_size = raw_ni->compressed_size; 6702 lck_spin_unlock(&ni->size_lock); 6703 if (NInoNonResident(raw_ni)) 6704 NInoSetNonResident(ni); 6705 lck_rw_unlock_exclusive(&raw_ni->lock); 6706 vnode_put(raw_ni->vn); 6707 /* Check for write errors. */ 6708 if (uio_resid(uio) && !err) 6709 err = EIO; 6710 /* We no longer need the uio. */ 6711 uio_free(uio); 6712 if (!err) { 6713 lck_rw_unlock_exclusive(&ni->lock); 6714 ntfs_debug("Done."); 6715 return 0; 6716 } 6717 /* Write failed or was partial, unlink the created symbolic link. */ 6718 ntfs_error(dir_ni->vol->mp, "Failed to write target path to symbolic " 6719 "link inode (error %d).", err); 6720err: 6721 lck_rw_unlock_exclusive(&ni->lock); 6722 err2 = ntfs_unlink(dir_ni, ni, a->a_cnp, 0, FALSE); 6723 if (err2) { 6724 ntfs_error(dir_ni->vol->mp, "Failed to unlink symbolic link " 6725 "inode in error code path (error %d). Run " 6726 "chkdsk.", err2); 6727 NVolSetErrors(dir_ni->vol); 6728 } 6729 vnode_put(ni->vn); 6730 return err; 6731} 6732 6733/** 6734 * ntfs_vnop_readdir - read directory entries into a supplied buffer 6735 * @a: arguments to readdir function 6736 * 6737 * @a contains: 6738 * vnode_t a_vp; directory vnode to read directory entries from 6739 * uio_t a_uio; destination in which to return the entries 6740 * int a_flags; flags describing the entries to return 6741 * int *a_eofflag; return end of file status (can be NULL) 6742 * int *a_numdirent; return number of entries returned (can be NULL) 6743 * vfs_context_t a_context; 6744 * 6745 * See ntfs_dir.c::ntfs_readdir() for a description of the implemented 6746 * features. In addition to those described features VNOP_READDIR() should 6747 * also implement the below features. 6748 * 6749 * @a->a_flags can have the following bits set: 6750 * VNODE_READDIR_EXTENDED use extended directory entries 6751 * VNODE_READDIR_REQSEEKOFF requires seek offset (cookies) 6752 * VNODE_READDIR_SEEKOFF32 seek offset values should be 32-bit 6753 * 6754 * When VNODE_READDIR_EXTENDED is set, the format of the returned directory 6755 * entry structures changes to the direntry structure which is defined as: 6756 * 6757 * u64 d_ino; inode number of entry 6758 * u64 d_seekoff; seek offset (optional, used by servers) 6759 * u16 d_reclen; length of this record 6760 * u16 d_namlen; length of string in d_name 6761 * u8 d_type; inode type (one of DT_DIR, DT_REG, etc) 6762 * char d_name[MAXPATHLEN]; null terminated filename 6763 * 6764 * If VNODE_READDIR_REQSEEKOFF is set, VNODE_READDIR_EXTENDED must also be set, 6765 * and it means that the seek offset (d_seekoff) in the direntry structure must 6766 * be set. If VNODE_READDIR_REQSEEKOFF is not set, the seek offset can be set 6767 * to zero as the caller will ignore it. 6768 * 6769 * If VNODE_READDIR_SEEKOFF32 is set, both VNODE_READDIR_EXTENDED and 6770 * VNODE_READDIR_REQSEEKOFF must be set and it means that the seek offset must 6771 * be at most 32-bits, i.e. the most significant 32-bits of d_seekoff must be 6772 * zero. 6773 * 6774 * All the VNODE_READDIR_* flags are only ever set by the NFS server and given 6775 * we do not yet support NFS exporting of NTFS volumes we just abort if any of 6776 * them are set. 6777 * 6778 * If the directory is deleted-but-in-use, we do not synthesize entries for "." 6779 * and "..". 6780 * 6781 * Return 0 on success and the error code on error. 6782 */ 6783static int ntfs_vnop_readdir(struct vnop_readdir_args *a) 6784{ 6785 user_ssize_t start_count; 6786 ntfs_inode *dir_ni = NTFS_I(a->a_vp); 6787 errno_t err; 6788 6789 if (!dir_ni) { 6790 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 6791 return EINVAL; 6792 } 6793 ntfs_debug("Entering for directory inode 0x%llx.", 6794 (unsigned long long)dir_ni->mft_no); 6795 /* 6796 * FIXME: Is this check necessary? Can we ever get here for 6797 * non-directories? All current callers (except the NFS server) ensure 6798 * that @dir_ni is a directory. We do not currently support NFS 6799 * exporting so this should indeed definitely never trigger but leave 6800 * it here as a kind of debug assertion. 6801 */ 6802 if (!S_ISDIR(dir_ni->mode)) { 6803 ntfs_debug("Not a directory, returning ENOTDIR."); 6804 return ENOTDIR; 6805 } 6806 if (a->a_flags) { 6807 ntfs_error(dir_ni->vol->mp, "None of the VNODE_READDIR_* " 6808 "flags are supported yet, sorry."); 6809 return ENOTSUP; 6810 } 6811 lck_rw_lock_shared(&dir_ni->lock); 6812 /* Do not allow messing with the inode once it has been deleted. */ 6813 if (NInoDeleted(dir_ni)) { 6814 /* Remove the inode from the name cache. */ 6815 cache_purge(dir_ni->vn); 6816 lck_rw_unlock_shared(&dir_ni->lock); 6817 ntfs_debug("Directory is deleted."); 6818 return ENOENT; 6819 } 6820 start_count = uio_resid(a->a_uio); 6821 err = ntfs_readdir(dir_ni, a->a_uio, a->a_eofflag, a->a_numdirent); 6822 /* 6823 * Update the last_access_time (atime) if something was read. 6824 * 6825 * Skip the update if atime updates are disabled via the noatime mount 6826 * option or the volume is read only. 6827 */ 6828 if (uio_resid(a->a_uio) < start_count && !NVolReadOnly(dir_ni->vol) && 6829 !(vfs_flags(dir_ni->vol->mp) & MNT_NOATIME)) { 6830 dir_ni->last_access_time = ntfs_utc_current_time(); 6831 NInoSetDirtyTimes(dir_ni); 6832 } 6833 lck_rw_unlock_shared(&dir_ni->lock); 6834 ntfs_debug("Done (error %d).", (int)err); 6835 return err; 6836} 6837 6838/** 6839 * ntfs_vnop_readdirattr - 6840 * 6841 */ 6842static int ntfs_vnop_readdirattr(struct vnop_readdirattr_args *a) 6843{ 6844 errno_t err; 6845 6846 ntfs_debug("Entering."); 6847 (void)nop_readdirattr(a); 6848 // TODO: 6849 err = ENOTSUP; 6850 ntfs_debug("Done (error %d).", (int)err); 6851 return err; 6852} 6853 6854/** 6855 * ntfs_vnop_readlink - read the contents of a symbolic link 6856 * @a: arguments to readlink function 6857 * 6858 * @a contains: 6859 * vnode_t a_vp; vnode of symbolic link whose data to read 6860 * uio_t *a_uio; destination in which to return the read data 6861 * vfs_context_t a_context; 6862 * 6863 * Read the path stored in the symbolic link vnode @a->a_vp and return it in 6864 * the destination buffer pointed to by @a->a_uio. 6865 * 6866 * uio_resid(@a->a_uio) is the maximum number of bytes to read and 6867 * uio_offset(@a->a_uio) must be zero. 6868 * 6869 * We implement symbolic links the same way as SFM, i.e. a symbolic link is a 6870 * regular file as far as NTFS is concerned with an AFP_AfpInfo named stream 6871 * containing the finder info with the type set to 'slnk' and the creator set 6872 * to 'rhap'. This is basically how HFS+ stores symbolic links, too. 6873 * 6874 * Thus obtaining the symbolic link target is a simple matter of calling 6875 * ntfs_read() on the symbolic link inode. 6876 * 6877 * TODO: We may wish to add support for other symbolic link types found on NTFS 6878 * volumes such as the methods used by: 6879 * - Windows Services for Unix (SFU) and the userspace ntfsmount driver, 6880 * - SMB/Samba (when run on a file system without native symbolic links) 6881 * - Cygwin 6882 * 6883 * It may also be worth supporting reparse point based symbolic links but those 6884 * are a lot trickier if at all possible as they contain information that 6885 * cannot be resolved without access to the Windows registry and potentially 6886 * without access to the Windows Domain/Active Directory. 6887 * 6888 * Return 0 on success and errno on error. 6889 * 6890 * Note, since IEEE Std 1003.1-2001 does not require any association of file 6891 * times with symbolic links, there is no requirement that file times be 6892 * updated by readlink(). 6893 */ 6894static int ntfs_vnop_readlink(struct vnop_readlink_args *a) 6895{ 6896 s64 size; 6897 user_ssize_t start_count; 6898 ntfs_inode *ni, *raw_ni; 6899 uio_t uio = a->a_uio; 6900 errno_t err; 6901 6902 ni = NTFS_I(a->a_vp); 6903 if (!ni) { 6904 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 6905 return EINVAL; 6906 } 6907 ntfs_debug("Entering for mft_no 0x%llx.", 6908 (unsigned long long)ni->mft_no); 6909 /* 6910 * Protect against changes in initialized_size and thus against 6911 * truncation also and against deletion/rename. 6912 */ 6913 lck_rw_lock_shared(&ni->lock); 6914 /* Do not allow messing with the inode once it has been deleted. */ 6915 if (!ni->link_count || NInoDeleted(ni)) { 6916 /* Remove the inode from the name cache. */ 6917 cache_purge(ni->vn); 6918 err = ENOENT; 6919 goto err; 6920 } 6921 if (!S_ISLNK(ni->mode)) { 6922 ntfs_debug("Not a symbolic link, returning EINVAL."); 6923 err = EINVAL; 6924 goto err; 6925 } 6926 if (uio_offset(uio)) { 6927 ntfs_error(ni->vol->mp, "uio_offset(uio) is not zero, " 6928 "returning EINVAL."); 6929 err = EINVAL; 6930 goto err; 6931 } 6932 /* 6933 * FIXME: At present the kernel does not allow VLNK vnodes to use the 6934 * UBC (<rdar://problem/5794900>) thus we need to use a shadow VREG 6935 * vnode to do the actual read of the symbolic link data. Fortunately 6936 * we already implemented this functionality for compressed files where 6937 * we need to read the compressed data using a shadow vnode so we use 6938 * the same implementation here, thus our shadow vnode is a raw inode. 6939 * 6940 * Doing this has the unfortunate consequence that if the symbolic link 6941 * inode is compressed or encrypted we cannot read it as we are already 6942 * using the raw inode and we can only have one raw inode. 6943 */ 6944 lck_spin_lock(&ni->size_lock); 6945 size = ni->data_size; 6946 lck_spin_unlock(&ni->size_lock); 6947 /* Zero length symbolic links are not allowed. */ 6948 if (!size || size > MAXPATHLEN) { 6949 ntfs_error(ni->vol->mp, "Invalid symbolic link size %lld in " 6950 "mft_no 0x%llx, returning EINVAL.", 6951 (long long)size, 6952 (unsigned long long)ni->mft_no); 6953 err = EINVAL; 6954 goto err; 6955 } 6956 start_count = uio_resid(uio); 6957 err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_SHARED, &raw_ni); 6958 if (err) { 6959 ntfs_error(ni->vol->mp, "Failed to get raw inode (error %d).", 6960 err); 6961 goto err; 6962 } 6963 if (!NInoRaw(raw_ni)) 6964 panic("%s(): Requested raw inode but got non-raw one.\n", 6965 __FUNCTION__); 6966 lck_spin_lock(&raw_ni->size_lock); 6967 if (size > ubc_getsize(raw_ni->vn) || size != raw_ni->data_size) 6968 panic("%s(): size (0x%llx) > ubc_getsize(raw_ni->vn, 0x%llx) " 6969 "|| size != raw_ni->data_size (0x%llx)\n", 6970 __FUNCTION__, (unsigned long long)size, 6971 (unsigned long long)ubc_getsize(raw_ni->vn), 6972 (unsigned long long)raw_ni->data_size); 6973 lck_spin_unlock(&raw_ni->size_lock); 6974 /* Perform the actual read of the symbolic link data into the uio. */ 6975 err = ntfs_read(raw_ni, uio, 0, TRUE); 6976 lck_rw_unlock_shared(&raw_ni->lock); 6977 vnode_put(raw_ni->vn); 6978 /* 6979 * If the read was partial, reset @uio pretending that the read never 6980 * happened unless we used up all the space in the uio and it was 6981 * simply not big enough to hold the entire symbolic link data in which 6982 * case we return a truncated result. 6983 */ 6984 if (err || (uio_resid(uio) && start_count - uio_resid(uio) != size)) { 6985 /* 6986 * FIXME: Should we be trying to continue a partial read in 6987 * case we can complete it with multiple calls to ntfs_read()? 6988 */ 6989 if (!err) { 6990 ntfs_debug("ntfs_read() returned a partial read, " 6991 "pretending the read never happened."); 6992 err = EIO; 6993 } 6994 uio_setoffset(uio, 0); 6995 uio_setresid(uio, start_count); 6996 if (err) 6997 ntfs_error(ni->vol->mp, "Failed to read symbolic link " 6998 "data (error %d).", err); 6999 } 7000 ntfs_debug("Done (error %d).", (int)err); 7001err: 7002 lck_rw_unlock_shared(&ni->lock); 7003 return err; 7004} 7005 7006/** 7007 * ntfs_mft_record_free_all - free clusters referenced by an mft record 7008 * @base_ni: base ntfs inode to which the (extent) inode @ni and @m belong 7009 * @ni: ntfs inode for which to free all clusters 7010 * @m: mft record for which to free all clusters 7011 * 7012 * For the ntfs inode @ni and its mft record @m, iterate over all attributes in 7013 * the mft record and free all clusters referenced by the attributes. @base_ni 7014 * is the base ntfs inode to which @ni and @m belong. 7015 * 7016 * Also, mark the mft record as not in use, increment its sequence number and 7017 * mark it dirty to ensure it gets written out later. 7018 * 7019 * When any operations fail this function notifies the user about it and marks 7020 * the volume dirty but does not return an error code as the caller can proceed 7021 * regardless without caring if some clusters failed to be freed. A later 7022 * chkdsk will find them and free them and in the mean time they just waste 7023 * some space on the volume. 7024 */ 7025static void ntfs_mft_record_free_all(ntfs_inode *base_ni, ntfs_inode *ni, 7026 MFT_RECORD *m) 7027{ 7028 ntfs_volume *vol = base_ni->vol; 7029 ATTR_RECORD *a; 7030 errno_t err; 7031 ntfs_runlist rl; 7032 7033 for (a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 7034 a->type != AT_END; 7035 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { 7036 if ((u8*)a < (u8*)m || (u8*)a > (u8*)m + 7037 le32_to_cpu(m->bytes_in_use) || 7038 le32_to_cpu(m->bytes_in_use) > 7039 le32_to_cpu(m->bytes_allocated) || 7040 !a->length) { 7041 ntfs_warning(vol->mp, "Found corrupt attribute whilst " 7042 "releasing deleted mft_no 0x%llx. " 7043 "Run chkdsk to recover lost space and " 7044 "fix any other inconsistencies.", 7045 (unsigned long long)ni->mft_no); 7046 NVolSetErrors(vol); 7047 break; 7048 } 7049 /* 7050 * For most resident attribute records, there is nothing we 7051 * need to do as they do not reference any clusters outside the 7052 * mft record itself. 7053 */ 7054 if (!a->non_resident) { 7055 STANDARD_INFORMATION *si; 7056 7057 /* 7058 * We only need to deal with the standard information 7059 * attribute. 7060 */ 7061 if (a->type != AT_STANDARD_INFORMATION) 7062 continue; 7063 /* 7064 * We need to update the {a,m,c}times from the ntfs 7065 * inode into the corresponding times in the standard 7066 * information attribute. The inode ctime, i.e. the 7067 * last_mft_change_time in the standard information 7068 * attribute, gives us a de facto deleted time that can 7069 * be used by ntfsck and ntfsundelete for example. 7070 */ 7071 si = (STANDARD_INFORMATION*)((u8*)a + 7072 le16_to_cpu(a->value_offset)); 7073 si->last_data_change_time = utc2ntfs( 7074 base_ni->last_data_change_time); 7075 si->last_mft_change_time = utc2ntfs( 7076 base_ni->last_mft_change_time); 7077 si->last_access_time = utc2ntfs( 7078 base_ni->last_access_time); 7079 /* Whilst here also update the file attributes. */ 7080 si->file_attributes = base_ni->file_attributes; 7081 /* 7082 * We need to take care to handle NTFS 1.x style 7083 * standard information attributes on NTFS 3.0+ volumes 7084 * as they are lazily updated on write after a volume 7085 * has been upgraded from 1.x and after a volume has 7086 * been accessed by an older NTFS driver such as the 7087 * one in Windows NT4. 7088 */ 7089#if 0 7090 if (vol->major_ver <= 3 || 7091 le32_to_cpu(a->value_length) < 7092 sizeof(STANDARD_INFORMATION)) 7093 continue; 7094#endif 7095 /* 7096 * We have an NTFS 3.0+ style, extended standard 7097 * information attribute. 7098 */ 7099 /* 7100 * TODO: When we implement support for $UsnJrnl, we 7101 * will need to journal the delete event and update the 7102 * usn field in the standard information attribute. 7103 * For now this is not needed as we stamp the 7104 * transaction log thus telling applications querying 7105 * the transaction log that it does not contain 7106 * uptodate information. We cannot do this at unlink 7107 * time because there may still be writes and truncates 7108 * happening due to existing open file descriptors and 7109 * the delete event has to come last. 7110 */ 7111 /* 7112 * TODO: When we implement support for quotas, we will 7113 * need to update the quota control entry belonging to 7114 * the user_id specified in the owner_id field in the 7115 * standard information attribute by updating its 7116 * change_time field to the current time and 7117 * decrementing its bytes_used field by the amount 7118 * specified in the quota_charged field in the standard 7119 * information attribute as well as setting the 7120 * exceeded_time to 0 if we go from over the soft quota 7121 * specified in the limit of the quota control entry. 7122 * For now this is not needed as we mark all quotas as 7123 * invalid when we mount a volume read-write. We 7124 * cannot do the quota update at unlink time because 7125 * there may still be writes and truncates happening 7126 * due to existing open file descriptors which will 7127 * affect the quota related fields. 7128 */ 7129 continue; 7130 } 7131 /* 7132 * For non-resident attribute records, we need to free all the 7133 * clusters specified in their mapping pairs array. 7134 * 7135 * If this is the base extent, we only need to do this if the 7136 * allocated size is not zero. If this is not the base extent 7137 * then by definition the allocated size cannot be zero and 7138 * more importantly an extent mft rceord does not have the 7139 * allocated_size field set thus it is always zero. 7140 */ 7141 if (!a->lowest_vcn && !a->allocated_size) 7142 continue; 7143 rl.rl = NULL; 7144 rl.alloc = rl.elements = 0; 7145 err = ntfs_mapping_pairs_decompress(vol, a, &rl); 7146 if (!err) { 7147 VCN lowest_vcn; 7148 7149 /* 7150 * We need to supply the correct start and count values 7151 * otherwise freeing the clusters fails when an 7152 * attribute has multiple extent records because the 7153 * runlist contains unmapped elements. 7154 */ 7155 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 7156 err = ntfs_cluster_free_from_rl(vol, rl.rl, lowest_vcn, 7157 sle64_to_cpu(a->highest_vcn) + 1 - 7158 lowest_vcn, NULL); 7159 if (err) { 7160 ntfs_warning(vol->mp, "Failed to free some " 7161 "allocated clusters belonging " 7162 "to mft_no 0x%llx (error " 7163 "%d). Run chkdsk to recover " 7164 "the lost space.", 7165 (unsigned long long)ni->mft_no, 7166 err); 7167 NVolSetErrors(vol); 7168 } 7169 OSFree(rl.rl, rl.alloc, ntfs_malloc_tag); 7170 } else { 7171 ntfs_error(vol->mp, "Cannot free some allocated space " 7172 "belonging to mft_no 0x%llx because " 7173 "the decompression of the mapping " 7174 "pairs array failed (error %d). Run " 7175 "chkdsk to recover the lost space.", 7176 (unsigned long long)ni->mft_no, err); 7177 NVolSetErrors(vol); 7178 } 7179 } 7180 /* 7181 * We have processed all attributes in the base mft record thus we can 7182 * mark it as not in use, increment its sequence number, and mark it 7183 * dirty for later writeout. 7184 */ 7185 m->flags &= ~MFT_RECORD_IN_USE; 7186 if (m->sequence_number != const_cpu_to_le16(0xffff)) 7187 m->sequence_number = cpu_to_le16( 7188 le16_to_cpu(m->sequence_number) + 1); 7189 else 7190 m->sequence_number = const_cpu_to_le16(1); 7191 ni->seq_no = le16_to_cpu(m->sequence_number); 7192 NInoSetMrecNeedsDirtying(ni); 7193} 7194 7195/** 7196 * ntfs_vnop_inactive - the last reference to a vnode has been dropped 7197 * @args: arguments to inactive function 7198 * 7199 * @args contains: 7200 * vnode_t a_vp; vnode whose last reference has been dropped 7201 * vfs_context_t a_context; 7202 * 7203 * Last reference to a vnode has been dropped or a forced unmount is in 7204 * progress. 7205 * 7206 * Note: When called from reclaim, the vnode has a zero v_iocount and 7207 * v_usecount and vnode_isrecycled() is true. 7208 * 7209 * Return 0 on success and errno on error. 7210 * 7211 * Note the current OS X VFS ignores the return value from VNOP_INACTIVE() and 7212 * hence ntfs_vnop_inactive(). 7213 */ 7214static int ntfs_vnop_inactive(struct vnop_inactive_args *args) 7215{ 7216 leMFT_REF mref; 7217 vnode_t vn = args->a_vp; 7218 ntfs_inode *base_ni, *mftbmp_ni, *ni = NTFS_I(vn); 7219 ntfs_volume *vol; 7220 MFT_RECORD *m; 7221 leMFT_REF *mrefs; 7222 unsigned nr_mrefs; 7223 errno_t err; 7224 BOOL is_delete; 7225 7226 if (!ni) { 7227 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 7228 return 0; 7229 } 7230 is_delete = !ni->link_count; 7231 vol = ni->vol; 7232 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len 0x%x%s.", 7233 (unsigned long long)ni->mft_no, 7234 (unsigned)le32_to_cpu(ni->type), (unsigned)ni->name_len, 7235 is_delete ? ", is delete" : ""); 7236 base_ni = ni; 7237 if (NInoAttr(ni)) 7238 base_ni = ni->base_ni; 7239 /* 7240 * This is the last close thus remove any directory hints. 7241 * 7242 * Note we check for presence of directory hints outside the locks as 7243 * an optimization. It is not a disaster if we miss any as all will be 7244 * released in ntfs_inode_free() before the inode is thrown away at the 7245 * latest. 7246 */ 7247 if (ni != base_ni && ni->type == AT_INDEX_ALLOCATION && 7248 ni->nr_dirhints) { 7249 lck_rw_lock_exclusive(&ni->lock); 7250 ntfs_dirhints_put(ni, 0); 7251 lck_rw_unlock_exclusive(&ni->lock); 7252 } 7253 /* 7254 * If the inode is not being deleted or this is a raw inode sync it and 7255 * we are done. 7256 */ 7257 if (!is_delete || NInoRaw(ni)) { 7258sync: 7259 /* 7260 * Commit dirty data to disk unless mounted read-only. 7261 * 7262 * WARNING: Please see <rdar://problem/7202356> why this causes 7263 * stack exhaustion and kernel panics by creating a loop where 7264 * the VNOP_INACTIVE() calls ntfs_inode_sync() which ends up 7265 * doing ntfs_inode_get() which in turn triggers another 7266 * VNOP_INACTIVE() which in turn calls ntfs_inode_sync() and 7267 * thus ntfs_inode_get() which in turns calls VNOP_INACTIVE() 7268 * and so on until the stack overflows. 7269 */ 7270 err = 0; 7271 if (!NVolReadOnly(vol)) 7272 err = ntfs_inode_sync(ni, IO_SYNC | IO_CLOSE, FALSE); 7273 if (!err) 7274 ntfs_debug("Done."); 7275 else 7276 ntfs_error(vol->mp, "Failed to sync mft_no 0x%llx, " 7277 "type 0x%x, name_len 0x%x (error %d).", 7278 (unsigned long long)ni->mft_no, 7279 (unsigned)le32_to_cpu(ni->type), 7280 (unsigned)ni->name_len, err); 7281 return err; 7282 } 7283 if (ni != base_ni) 7284 lck_rw_lock_exclusive(&base_ni->lock); 7285 lck_rw_lock_exclusive(&ni->lock); 7286 /* Do not allow messing with the inode once it has been deleted. */ 7287 if (NInoDeleted(ni)) { 7288 /* Remove the inode from the name cache. */ 7289 cache_purge(vn); 7290 lck_rw_unlock_exclusive(&ni->lock); 7291 if (ni != base_ni) 7292 lck_rw_unlock_exclusive(&base_ni->lock); 7293 ntfs_debug("Done (was already deleted)."); 7294 return 0; 7295 } 7296 /* 7297 * If someone else re-instantiated the inode whilst we were waiting for 7298 * the inode lock sync the inode instead of deleting it. 7299 */ 7300 if (ni->link_count) { 7301 lck_rw_unlock_exclusive(&ni->lock); 7302 if (ni != base_ni) 7303 lck_rw_unlock_exclusive(&base_ni->lock); 7304 ntfs_debug("Someone re-instantiated the inode."); 7305 goto sync; 7306 } 7307 /* 7308 * The inode has been unlinked, delete it now freeing all allocated 7309 * space on disk as well as all related resources on disk. Note we 7310 * proceed on errors because there is not much we can do about them. 7311 * We have to carry on regardless as the inode is about to be 7312 * terminated in any case. 7313 * 7314 * On a metadata affecting error, we mark the volume dirty and leave it 7315 * to a subsequent chkdsk to clean up after us. This is not a disaster 7316 * since there are no directory entries pointing to the inode @ni any 7317 * more, thus us failing just means that we will keep some on disk 7318 * resources allocated so chkdsk will just find this file and delete 7319 * it. 7320 * 7321 * First, remove the inode from the inode cache so it cannot be found 7322 * any more. 7323 */ 7324 lck_mtx_lock(&ntfs_inode_hash_lock); 7325 /* 7326 * Mark the inode as having been deleted so we do not try to remove it 7327 * from the ntfs inode hash again in ntfs_inode_reclaim(). 7328 */ 7329 NInoSetDeleted(ni); 7330 /* 7331 * Remove the ntfs_inode from the inode hash so it cannot be looked up 7332 * any more. 7333 */ 7334 ntfs_inode_hash_rm_nolock(ni); 7335 lck_mtx_unlock(&ntfs_inode_hash_lock); 7336 /* Remove the inode from the name cache if it is still in it. */ 7337 cache_purge(vn); 7338 /* 7339 * The inode/vnode are no longer reachable at all so drop the inode 7340 * lock. Anyone waiting on the lock should test for NInoDeleted() and 7341 * abort once they have taken the lock. 7342 */ 7343 lck_rw_unlock_exclusive(&ni->lock); 7344 /* In case someone is waiting on the inode do a wakeup. */ 7345 ntfs_inode_wakeup(ni); 7346 /* Invalidate all buffers to do with the vnode. */ 7347 err = buf_invalidateblks(vn, 0, 0, 0); 7348 if (err) 7349 ntfs_error(vol->mp, "Failed to invalidate cached buffers " 7350 "(error %d).", err); 7351 /* 7352 * Invalidate all cached pages in the VM. 7353 * 7354 * This will fail for non-regular (VREG) nodes as they do not have UBC 7355 * info attached to them and ubc_msync() returns error in this case. 7356 */ 7357 if (vnode_isreg(vn)) { 7358 err = ubc_msync(vn, 0, ubc_getsize(vn), NULL, UBC_INVALIDATE); 7359 if (err) 7360 ntfs_error(vol->mp, "Failed to invalidate cached " 7361 "pages (error %d).", err); 7362 } 7363 /* 7364 * Cause the vnode to be reused immediately when we return rather than 7365 * sitting around in the vnode cache. 7366 */ 7367 vnode_recycle(vn); 7368 /* 7369 * ntfs_unlink() and ntfs_vnop_rename() bail out for attribute inodes 7370 * so we cannot get here with an attribute inode unless something has 7371 * gone badly wrong. 7372 * 7373 * When a named stream is deleted via VNOP_REMOVENAMEDSTREAM() its 7374 * link_count is set to zero so we get here on the last close. We have 7375 * to perform the actual freeing of allocated space if the attribute is 7376 * non-resident as well as the removal of the attribute record here. 7377 */ 7378 if (ni != base_ni) { 7379 ntfs_attr_search_ctx *ctx; 7380 7381 if (ni->type != AT_DATA || !ni->name_len) 7382 panic("%s(): ni != base_ni && (ni->type != AT_DATA || " 7383 "!ni->name_len)\n", __FUNCTION__); 7384 /* 7385 * For simplicity, if the attribute is non-resident, we 7386 * truncate the attribute to zero size first as that causes 7387 * both the allocated clusters to be freed as well as all 7388 * extent attribute records to be deleted. 7389 * 7390 * We then only need to remove the base attribute record and we 7391 * are done. 7392 */ 7393 if (NInoNonResident(ni)) { 7394 err = ntfs_attr_resize(ni, 0, 0, NULL); 7395 if (err) { 7396 ntfs_error(vol->mp, "Cannot delete named " 7397 "stream from mft_no 0x%llx " 7398 "because truncating the " 7399 "stream inode to zero size " 7400 "failed (error %d).", 7401 (unsigned long long)ni->mft_no, 7402 err); 7403 goto err; 7404 } 7405 } 7406 /* Remove the named stream. */ 7407 err = ntfs_mft_record_map(base_ni, &m); 7408 if (err) { 7409 ntfs_error(vol->mp, "Failed to delete named stream " 7410 "because mapping the mft record " 7411 "0x%llx failed (error %d).", 7412 (unsigned long long)ni->mft_no, err); 7413 goto err; 7414 } 7415 ctx = ntfs_attr_search_ctx_get(base_ni, m); 7416 if (!ctx) { 7417 ntfs_error(vol->mp, "Failed to delete named stream " 7418 "because allocating an attribute " 7419 "search context failed."); 7420 goto unm_err; 7421 } 7422 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 7423 NULL, 0, ctx); 7424 if (err) { 7425 ntfs_error(vol->mp, "Failed to delete named stream " 7426 "because looking up the named $DATA " 7427 "attribute in the mft record 0x%llx " 7428 "failed (error %d).", 7429 (unsigned long long)ni->mft_no, err); 7430 goto put_err; 7431 } 7432 err = ntfs_attr_record_delete(base_ni, ctx); 7433 if (err) { 7434 ntfs_error(vol->mp, "Failed to delete named stream " 7435 "because deleting the named $DATA " 7436 "attribute from its mft record 0x%llx " 7437 "failed (error %d).", 7438 (unsigned long long)ctx->ni->mft_no, 7439 err); 7440 goto put_err; 7441 } 7442 ntfs_debug("Done (deleted attribute inode)."); 7443put_err: 7444 ntfs_attr_search_ctx_put(ctx); 7445unm_err: 7446 ntfs_mft_record_unmap(base_ni); 7447err: 7448 lck_rw_unlock_exclusive(&base_ni->lock); 7449 return err; 7450 } 7451 /* 7452 * We only need to be concerned with the allocated space on disk which 7453 * we need to deallocate and any related resources on disk, which we 7454 * also need to deallocate and/or mark unused. To do this, we map the 7455 * base mft record and iterate over all its attributes and deal with 7456 * each of them in sequence. 7457 */ 7458 err = ntfs_mft_record_map(ni, &m); 7459 if (err) { 7460 ntfs_warning(vol->mp, "Cannot release deleted mft_no 0x%llx " 7461 "because the mapping of the base mft record " 7462 "failed (error %d). Run chkdsk to recover " 7463 "lost resources.", 7464 (unsigned long long)ni->mft_no, err); 7465 NVolSetErrors(vol); 7466 return 0; 7467 } 7468 /* 7469 * Make sure the mft record was marked as not in use in 7470 * ntfs_unlink_internal(). 7471 */ 7472 if (m->flags & MFT_RECORD_IN_USE) 7473 panic("%s(): m->flags & MFT_RECORD_IN_USE\n", __FUNCTION__); 7474 /* 7475 * We will need the mft reference of the base mft record below but we 7476 * are about to change it thus make a note of the old one now. 7477 */ 7478 mref = MK_LE_MREF(ni->mft_no, ni->seq_no); 7479 /* 7480 * Release all clusters allocated to attribute records located in the 7481 * extent mft record. 7482 */ 7483 ntfs_mft_record_free_all(ni, ni, m); 7484 /* 7485 * We are finished with the base mft record, if there is an attribute 7486 * list attribute, we iterate over its entries and each time we 7487 * encounter an extent mft record that we have not done yet, we map it 7488 * and iterate over all its attributes as we did above for the base mft 7489 * record, followed by marking the extent mft record as not in use, 7490 * incrementing its sequence number, and marking it dirty, again as we 7491 * did above for the base mft record. Finally, we add it to our list 7492 * of mft records to deallocate from the $MFT/$BITMAP attribute. 7493 * 7494 * As an optimization, we reuse the attribute list buffer as our list 7495 * of mft records to deallocate from the $MFT/$BITMAP attribute. This 7496 * works because each ATTR_LIST_ENTRY record in the attribute list 7497 * attribute is at least 24 bytes long and we only need to store 8 7498 * bytes for each mft reference in our list of mft records to 7499 * deallocate so we are guaranteed to have enough space in the buffer 7500 * for our needs and we are also guaranteed that we will never 7501 * overwrite part of the attribute list attribute data that we have not 7502 * dealt with yet. 7503 */ 7504 nr_mrefs = 1; 7505 mrefs = &mref; 7506 if (NInoAttrList(ni)) { 7507 ATTR_LIST_ENTRY *entry, *next_entry, *end; 7508 ntfs_inode *eni; 7509 7510 if (!ni->attr_list || ni->attr_list_size < sizeof(leMFT_REF) || 7511 !ni->attr_list_alloc) 7512 panic("%s(): !ni->attr_list || !ni->attr_list_size || " 7513 "!ni->attr_list_alloc\n", __FUNCTION__); 7514 entry = (ATTR_LIST_ENTRY*)ni->attr_list; 7515 mrefs = (leMFT_REF*)entry; 7516 next_entry = (ATTR_LIST_ENTRY*)((u8*)entry + 7517 le16_to_cpu(entry->length)); 7518 end = (ATTR_LIST_ENTRY*)(ni->attr_list + ni->attr_list_size); 7519 /* 7520 * Add the mft reference of the base mft record as the first 7521 * element in our list as we have already dealt with it. 7522 */ 7523 *mrefs = mref; 7524 while (entry < end) { 7525 unsigned i; 7526 7527 mref = entry->mft_reference; 7528 for (i = 0; i < nr_mrefs; i++) { 7529 if (mref == mrefs[i]) 7530 goto do_next; 7531 } 7532 /* 7533 * This mft reference has not been encountered before. 7534 * Add it to the list of mft references and free all 7535 * disk storage associated with all the attribute 7536 * records stored in the mft record with this mft 7537 * reference. 7538 */ 7539 mrefs[nr_mrefs++] = mref; 7540 err = ntfs_extent_mft_record_map(ni, le64_to_cpu(mref), 7541 &eni, &m); 7542 if (!err) { 7543 /* 7544 * Release all clusters allocated to attribute 7545 * records located in the extent mft record and 7546 * mark the mft record as not in use. 7547 * 7548 * We need to ensure the mft record is marked 7549 * as in use. It can happen that it is not 7550 * marked in use after a system crash occurs 7551 * whilst a file is being extended. 7552 */ 7553 if (m->flags & MFT_RECORD_IN_USE) 7554 ntfs_mft_record_free_all(ni, eni, m); 7555 else { 7556 ntfs_warning(vol->mp, "Extent mft_no " 7557 "0x%llx, base mft_no " 7558 "0x%llx is marked as " 7559 "not in use. Cannot " 7560 "release allocated " 7561 "clusters. Unmount " 7562 "and run chkdsk to " 7563 "recover the lost " 7564 "clusters.", 7565 (unsigned long long) 7566 MREF_LE(mref), 7567 (unsigned long long) 7568 ni->mft_no); 7569 NVolSetErrors(vol); 7570 } 7571 /* Unmap the mft record again. */ 7572 ntfs_extent_mft_record_unmap(eni); 7573 } else { 7574 ntfs_warning(vol->mp, "Failed to release " 7575 "allocated clusters because " 7576 "mapping extent mft_no 0x%llx, " 7577 "base mft_no 0x%llx failed " 7578 "(error %d). Unmount and run " 7579 "chkdsk to recover the lost " 7580 "clusters.", 7581 (unsigned long long)MREF_LE(mref), 7582 (unsigned long long)ni->mft_no, 7583 err); 7584 NVolSetErrors(vol); 7585 } 7586do_next: 7587 entry = next_entry; 7588 next_entry = (ATTR_LIST_ENTRY*)((u8*)entry + 7589 le16_to_cpu(entry->length)); 7590 } 7591 } 7592 ntfs_mft_record_unmap(ni); 7593 /* 7594 * Mark the base mft record and all extent mft records (if any) as 7595 * unused in the mft bitmap. 7596 * 7597 * Note that this means that ntfs_inode_reclaim() may run when someone 7598 * else has already reused one of the mft records we are freeing now. 7599 * This is ok because all ntfs_inode_reclaim() does is to do some 7600 * memory freeing. And we have already removed the inode from the 7601 * inode cache thus there are no problems from that point of view 7602 * either. 7603 */ 7604 lck_rw_lock_exclusive(&vol->mftbmp_lock); 7605 mftbmp_ni = vol->mftbmp_ni; 7606 err = vnode_get(mftbmp_ni->vn); 7607 if (err) 7608 ntfs_warning(vol->mp, "Failed to get vnode for $MFT/$BITMAP " 7609 "(error %d) thus cannot release mft " 7610 "record(s). Run chkdsk to recover the lost " 7611 "mft record(s).", err); 7612 else { 7613 lck_rw_lock_shared(&mftbmp_ni->lock); 7614 while (nr_mrefs > 0) { 7615 nr_mrefs--; 7616 err = ntfs_bitmap_clear_bit(mftbmp_ni, 7617 MREF_LE(mrefs[nr_mrefs])); 7618 if (!err) { 7619 /* 7620 * We cleared a bit in the mft bitmap thus we 7621 * need to reflect this in the cached number of 7622 * free mft records. 7623 */ 7624 vol->nr_free_mft_records++; 7625 if (vol->nr_free_mft_records >= 7626 vol->nr_mft_records) 7627 panic("%s(): vol->nr_free_mft_records " 7628 "> vol->nr_mft_records" 7629 "\n", __FUNCTION__); 7630 } else { 7631 ntfs_error(vol->mp, "Failed to free mft_no " 7632 "0x%llx (error %d). Run " 7633 "chkdsk to recover the lost " 7634 "mft record.", 7635 (unsigned long long) 7636 MREF_LE(mrefs[nr_mrefs]), err); 7637 NVolSetErrors(vol); 7638 } 7639 } 7640 lck_rw_unlock_shared(&mftbmp_ni->lock); 7641 (void)vnode_put(mftbmp_ni->vn); 7642 } 7643 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 7644 ntfs_debug("Done (deleted base inode)."); 7645 return 0; 7646} 7647 7648/** 7649 * ntfs_vnop_reclaim - free ntfs specific parts of a vnode so it can be reused 7650 * @a: arguments to reclaim function 7651 * 7652 * @a contains: 7653 * vnode_t a_vp; vnode to be reclaimed 7654 * vfs_context_t a_context; 7655 * 7656 * Reclaim a vnode so it can be used for other purposes. 7657 * 7658 * Note: This is called from reclaim. The vnode has a zero v_iocount and 7659 * v_usecount and vnode_isrecycled() is true. 7660 * 7661 * Return 0 on success and errno on error. 7662 * 7663 * Note the current OS X VFS panic()s the machine if VNOP_RECLAIM() and hence 7664 * ntfs_vnop_reclaim() returns an error. 7665 */ 7666static int ntfs_vnop_reclaim(struct vnop_reclaim_args *a) 7667{ 7668 vnode_t vn = a->a_vp; 7669 ntfs_inode *ni = NTFS_I(vn); 7670 errno_t err; 7671 7672 /* Do not dereference @ni if it is NULL. */ 7673#ifdef DEBUG 7674 if (ni) 7675 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len " 7676 "0x%x.", (unsigned long long)ni->mft_no, 7677 le32_to_cpu(ni->type), (unsigned)ni->name_len); 7678 else 7679 ntfs_debug("Entering for already reclaimed vnode!"); 7680#endif 7681 vnode_removefsref(vn); 7682 err = ntfs_inode_reclaim(ni); 7683 ntfs_debug("Done (error %d).", (int)err); 7684 return err; 7685} 7686 7687/** 7688 * ntfs_vnop_pathconf - get configurable pathname variables 7689 * @a: arguments to pathconf function 7690 * 7691 * @a contains: 7692 * vnode_t a_vp; vnode for which to return pathconf information 7693 * int a_name; the pathconf variable to be queried 7694 * register_t *a_retval; destination for result of query 7695 * vfs_context_t a_context; 7696 * 7697 * Return POSIX pathconf information applicable to ntfs file system. Some 7698 * @a_name values are intercepted by the VFS in vn_pathconf (pathconf(2) -> 7699 * vn_pathconf() -> VNOP_PATHCONF() -> ntfs_vnop_pathconf()) so we do not 7700 * bother with them. 7701 * 7702 * Return 0 on success and EINVAL if an unsupported @a_name was queried for. 7703 */ 7704static int ntfs_vnop_pathconf(struct vnop_pathconf_args *a) 7705{ 7706 ntfs_inode *ni = NTFS_I(a->a_vp); 7707 ntfs_volume *vol = NTFS_MP(vnode_mount(a->a_vp)); 7708 errno_t err = 0; 7709 7710 ntfs_debug("Entering for pathconf variable number %d.", a->a_name); 7711 if (ni) { 7712 lck_rw_lock_shared(&ni->lock); 7713 /* 7714 * Do not allow messing with the inode once it has been 7715 * deleted. 7716 */ 7717 if (NInoDeleted(ni)) { 7718 /* Remove the inode from the name cache. */ 7719 cache_purge(ni->vn); 7720 lck_rw_unlock_shared(&ni->lock); 7721 ntfs_debug("Directory is deleted."); 7722 return ENOENT; 7723 } 7724 } 7725 switch (a->a_name) { 7726 case _PC_LINK_MAX: 7727 /* 7728 * The maximum file link count. For ntfs, the link count is 7729 * stored in the mft record in the link_count field which is of 7730 * type le16, thus 16 bits. For attribute inodes and 7731 * directories however, no hard links are allowed and thus the 7732 * maximum link count is 1. 7733 */ 7734 if (!ni) { 7735 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 7736 return EINVAL; 7737 } 7738 *a->a_retval = NTFS_MAX_HARD_LINKS; 7739 if (NInoAttr(ni) || S_ISDIR(ni->mode)) 7740 *a->a_retval = 1; 7741 break; 7742 case _PC_NAME_MAX: 7743 /* 7744 * The maximum number of bytes in a filename. For ntfs, this 7745 * is stored in the attribute record in the name_length field 7746 * which is of type u8, thus 8 bits. 7747 */ 7748 *a->a_retval = NTFS_MAX_NAME_LEN; /* 255 */ 7749 break; 7750 case _PC_PATH_MAX: 7751 /* 7752 * The maximum number of bytes in a path name. Ntfs imposes no 7753 * restrictions so use the system limit. 7754 */ 7755 *a->a_retval = PATH_MAX; /* 1024 */ 7756 break; 7757 case _PC_PIPE_BUF: 7758 /* 7759 * The maximum number of bytes which will be written atomically 7760 * to a pipe, again ntfs imposes no restrictions so use the 7761 * system limit. 7762 */ 7763 *a->a_retval = PIPE_BUF; /* 512 */ 7764 break; 7765 case _PC_CHOWN_RESTRICTED: 7766 /* 7767 * Non-zero if appropriate privileges are required for the 7768 * chown(2) system call. For ntfs, this is always the case. 7769 */ 7770 *a->a_retval = 200112; /* unistd.h: _POSIX_CHOWN_RESTRICTED */ 7771 break; 7772 case _PC_NO_TRUNC: 7773 /* 7774 * Non-zero if accessing filenames longer than _POSIX_NAME_MAX 7775 * (which we specified above to be NTFS_MAX_NAME_LEN) generates 7776 * an error. For ntfs, this is always the case. 7777 */ 7778 *a->a_retval = 200112; /* unistd.h: _POSIX_NO_TRUNC */ 7779 break; 7780 case _PC_NAME_CHARS_MAX: 7781 /* 7782 * The maximum number of characters in a filename. This is 7783 * the same as _PC_NAME_MAX, above. 7784 */ 7785 *a->a_retval = NTFS_MAX_NAME_LEN; /* 255 */ 7786 break; 7787 case _PC_CASE_SENSITIVE: 7788 /* 7789 * Return 1 if case sensitive and 0 if not. For ntfs, this 7790 * depends on the mount options. 7791 */ 7792 if (vol) 7793 *a->a_retval = (NVolCaseSensitive(vol) ? 1 : 0); 7794 else 7795 err = EINVAL; 7796 break; 7797 case _PC_CASE_PRESERVING: 7798 /* 7799 * Return 1 if case preserving and 0 if not. For ntfs, this is 7800 * always 1, i.e. ntfs always preserves case. 7801 */ 7802 *a->a_retval = 1; 7803 break; 7804 case _PC_FILESIZEBITS: 7805 /* 7806 * The number of bits to represent file size. For ntfs, the 7807 * file size is stored in the attribute record in the data_size 7808 * field which is of type sle64, thus 63 bits. 7809 */ 7810 *a->a_retval = 63; 7811 break; 7812 default: 7813 err = EINVAL; 7814 } 7815 if (ni) 7816 lck_rw_unlock_shared(&ni->lock); 7817 ntfs_debug("Done (error %d).", (int)err); 7818 return err; 7819} 7820 7821/** 7822 * ntfs_vnop_allocate - 7823 */ 7824static int ntfs_vnop_allocate(struct vnop_allocate_args *a) 7825{ 7826 errno_t err; 7827 7828 ntfs_debug("Entering."); 7829 // TODO: 7830 (void)nop_allocate(a); 7831 err = ENOTSUP; 7832 ntfs_debug("Done (error %d).", (int)err); 7833 return err; 7834} 7835 7836/** 7837 * ntfs_vnop_pagein - read a range of pages into memory 7838 * @a: arguments to pagein function 7839 * 7840 * @a contains: 7841 * vnode_t a_vp; vnode whose data to read into the page range 7842 * upl_t a_pl; page list describing destination page range 7843 * upl_offset_t a_pl_offset; byte offset into page list at which to start 7844 * off_t a_f_offset; byte offset in the vnode at which to start 7845 * size_t a_size; number of bytes to read from the vnode 7846 * int a_flags; flags further describing the pagein request 7847 * vfs_context_t a_context; 7848 * 7849 * Read @a->a_size bytes from the vnode @a-a_vp, starting at byte offset 7850 * @a->a_f_offset into the vnode, into the range of pages specified by the page 7851 * list @a->a_pl, starting at byte offset @a->a_pl_offset into the page list. 7852 * 7853 * The flags in @a->a_flags further describe the pagein request. The following 7854 * pagein flags are currently defined in OS X kernel: 7855 * UPL_IOSYNC - Perform synchronous i/o. 7856 * UPL_NOCOMMIT - Do not commit/abort the page range. 7857 * UPL_NORDAHEAD - Do not perform any speculative read-ahead. 7858 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 7859 * 7860 * For encrypted attributes we abort for now as we do not support them yet. 7861 * 7862 * For non-resident, non-compressed attributes we use cluster_pagein_ext() 7863 * which deals with both normal and multi sector transfer protected attributes. 7864 * 7865 * For resident attributes and non-resident, compressed attributes we read the 7866 * data ourselves by mapping the page list, and in the resident case, mapping 7867 * the mft record, looking up the attribute in it, and copying the requested 7868 * data from the mapped attribute into the page list, then unmapping the mft 7869 * record, whilst for non-resident, compressed attributes, we get the raw inode 7870 * and use it with ntfs_read_compressed() to read and decompress the data into 7871 * our mapped page list. We then unmap the page list and finally, if 7872 * UPL_NOCOMMIT is not specified, we commit (success) or abort (error) the page 7873 * range. 7874 * 7875 * Return 0 on success and errno on error. 7876 * 7877 * Note the pages in the page list are marked busy on entry and the busy bit is 7878 * cleared when we commit the page range. Thus it is perfectly safe for us to 7879 * fill the pages with encrypted or mst protected data and to decrypt or mst 7880 * deprotect in place before committing the page range. 7881 * 7882 * Adapted from cluster_pagein_ext(). 7883 */ 7884static int ntfs_vnop_pagein(struct vnop_pagein_args *a) 7885{ 7886 ntfs_inode *base_ni, *ni = NTFS_I(a->a_vp); 7887 int err; 7888 7889 if (!ni) { 7890 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 7891 if (!(a->a_flags & UPL_NOCOMMIT) && a->a_pl) 7892 ubc_upl_abort_range(a->a_pl, a->a_pl_offset, a->a_size, 7893 UPL_ABORT_FREE_ON_EMPTY | 7894 UPL_ABORT_ERROR); 7895 return EINVAL; 7896 } 7897 base_ni = ni; 7898 if (NInoAttr(ni)) 7899 base_ni = ni->base_ni; 7900 ntfs_debug("Entering for mft_no 0x%llx, offset 0x%llx, size 0x%llx, " 7901 "pagein flags 0x%x, page list offset 0x%llx.", 7902 (unsigned long long)ni->mft_no, 7903 (unsigned long long)a->a_f_offset, 7904 (unsigned long long)a->a_size, a->a_flags, 7905 (unsigned long long)a->a_pl_offset); 7906 err = ntfs_pagein(ni, a->a_f_offset, a->a_size, a->a_pl, 7907 a->a_pl_offset, a->a_flags); 7908 /* 7909 * Update the last_access_time (atime) if something was read and this 7910 * is the base ntfs inode or it is a named stream (this is what HFS+ 7911 * does, too). 7912 * 7913 * Skip the update if atime updates are disabled via the noatime mount 7914 * option or the volume is read only or this is a symbolic link. 7915 * 7916 * Also, skip the core system files except for the root directory. 7917 */ 7918 if (!err && !NVolReadOnly(ni->vol) && 7919 !(vfs_flags(ni->vol->mp) & MNT_NOATIME) && 7920 !S_ISLNK(base_ni->mode) && 7921 (ni == base_ni || ni->type == AT_DATA)) { 7922 BOOL need_update_time; 7923 7924 need_update_time = TRUE; 7925 if (ni->vol->major_ver > 1) { 7926 if (base_ni->mft_no <= FILE_Extend && 7927 base_ni != ni->vol->root_ni) 7928 need_update_time = FALSE; 7929 } else { 7930 if (base_ni->mft_no <= FILE_UpCase && 7931 base_ni != ni->vol->root_ni) 7932 need_update_time = FALSE; 7933 } 7934 if (need_update_time) { 7935 base_ni->last_access_time = ntfs_utc_current_time(); 7936 NInoSetDirtyTimes(base_ni); 7937 } 7938 } 7939 return err; 7940} 7941 7942// TODO: Move to ntfs_page.[hc]. 7943static int ntfs_mst_pageout(ntfs_inode *ni, upl_t upl, upl_offset_t upl_ofs, 7944 unsigned size, s64 attr_ofs, s64 attr_size, int flags) 7945{ 7946 ntfs_volume *vol = ni->vol; 7947 u8 *kaddr; 7948 kern_return_t kerr; 7949 unsigned rec_size, rec_shift, nr_recs, i; 7950 int err; 7951 NTFS_RECORD_TYPE magic = 0; 7952 BOOL do_commit; 7953 7954 do_commit = !(flags & UPL_NOCOMMIT); 7955 if (ni->type == AT_INDEX_ALLOCATION) 7956 magic = magic_INDX; 7957 else 7958 panic("%s(): Unknown mst protected inode 0x%llx, type 0x%x, " 7959 "name_len 0x%x.", __FUNCTION__, 7960 (unsigned long long)ni->mft_no, 7961 (unsigned)le32_to_cpu(ni->type), 7962 (unsigned)ni->name_len); 7963 ntfs_debug("Entering for mft_no 0x%llx, page list offset 0x%llx, size " 7964 "0x%x, offset 0x%llx, pageout flags 0x%x, magic is " 7965 "0x%x.", (unsigned long long)ni->mft_no, 7966 (unsigned long long)upl_ofs, size, 7967 (unsigned long long)attr_ofs, flags, 7968 (unsigned)le32_to_cpu(magic)); 7969 if (attr_ofs < 0 || attr_ofs >= attr_size || attr_ofs & PAGE_MASK_64 || 7970 size & PAGE_MASK || upl_ofs & PAGE_MASK) { 7971 err = EINVAL; 7972 goto err; 7973 } 7974 if (!NInoMstProtected(ni)) 7975 panic("%s(): Called for non-mst protected attribute.\n", 7976 __FUNCTION__); 7977 if (!NInoNonResident(ni)) 7978 panic("%s(): Resident mst protected attribute.\n", 7979 __FUNCTION__); 7980 rec_size = ni->block_size; 7981 if (attr_ofs & (rec_size - 1) || size & (rec_size - 1)) 7982 panic("%s(): Write not aligned to NTFS record boundary.\n", 7983 __FUNCTION__); 7984 rec_shift = ni->block_size_shift; 7985 /* Clip the number of records to the size of the attribute. */ 7986 nr_recs = size >> rec_shift; 7987 if (attr_ofs + size > attr_size) { 7988 unsigned to_write; 7989 7990 /* Abort any pages outside the end of the attribute. */ 7991 to_write = attr_size - attr_ofs; 7992 nr_recs = to_write >> rec_shift; 7993 to_write = (to_write + PAGE_MASK) & ~PAGE_MASK; 7994 if (size != to_write) { 7995 if (size < to_write) 7996 panic("%s(): size less than to_write.\n", 7997 __FUNCTION__); 7998 ntfs_debug("Truncating write past end of attribute."); 7999 if (do_commit) 8000 ubc_upl_abort_range(upl, upl_ofs + to_write, 8001 size - to_write, 8002 UPL_ABORT_FREE_ON_EMPTY); 8003 size = to_write; 8004 } 8005 } 8006 if (!nr_recs) 8007 panic("%s(): NTFS record size greater than write size.\n", 8008 __FUNCTION__); 8009 /* 8010 * Need to apply the mst fixups and abort on errors. To apply the 8011 * fixups need to map the page list so we can access its contents. 8012 */ 8013 kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); 8014 if (kerr != KERN_SUCCESS) { 8015 ntfs_error(vol->mp, "ubc_upl_map() failed (error %d).", 8016 (int)kerr); 8017 err = EIO; 8018 goto err; 8019 } 8020 /* 8021 * Loop over the records in the page list and for each apply the mst 8022 * fixups. On any fixup errors, remove all the applied fixups and 8023 * abort the write completely. 8024 */ 8025 for (i = 0; i < nr_recs; i++) { 8026 NTFS_RECORD *rec = (NTFS_RECORD*)(kaddr + (i << rec_shift)); 8027 if (__ntfs_is_magic(rec->magic, magic)) { 8028 err = ntfs_mst_fixup_pre_write(rec, rec_size); 8029 if (err) { 8030 ntfs_error(vol->mp, "Failed to apply mst " 8031 "fixups (mft_no 0x%llx, type " 8032 "0x%x, offset 0x%llx).", 8033 (unsigned long long)ni->mft_no, 8034 (unsigned)le32_to_cpu(ni->type), 8035 (unsigned long long)attr_ofs + 8036 (i << rec_shift)); 8037 goto mst_err; 8038 } 8039 } 8040 } 8041 /* Unmap the page list again so we can call cluster_pageout_ext(). */ 8042 // FIXME: Can we leave the page list mapped throughout the 8043 // cluster_pageout_ext() call? That would be a lot more efficient and 8044 // simplify error handling. 8045 kerr = ubc_upl_unmap(upl); 8046 if (kerr != KERN_SUCCESS) { 8047 ntfs_error(vol->mp, "ubc_upl_unmap() failed (error %d).", 8048 (int)kerr); 8049 err = EIO; 8050 goto mst_err; 8051 } 8052 /* 8053 * We need the write to be synchronous so we do not leave the metadata 8054 * with the fixups applied for too long. 8055 * 8056 * We also need to set the no commit flag so we can still recover from 8057 * errors by removing the fixups. 8058 */ 8059 flags |= UPL_IOSYNC | UPL_NOCOMMIT; 8060 /* 8061 * On success the fixups will have been removed by the 8062 * ntfs_cluster_iodone() callback. 8063 */ 8064 err = cluster_pageout_ext(ni->vn, upl, upl_ofs, attr_ofs, size, 8065 attr_size, flags, ntfs_cluster_iodone, NULL); 8066 if (!err) { 8067 if (do_commit) { 8068 /* Commit the page range we wrote out. */ 8069 ubc_upl_commit_range(upl, upl_ofs, size, 8070 UPL_COMMIT_FREE_ON_EMPTY | 8071 UPL_COMMIT_CLEAR_DIRTY); 8072 } 8073 ntfs_debug("Done."); 8074 return err; 8075 } 8076 ntfs_error(vol->mp, "Failed (cluster_pageout_ext() returned error " 8077 "%d).", err); 8078 /* 8079 * We may have some records left with applied fixups thus remove them 8080 * again. It does not matter if it is done twice as this is an error 8081 * code path and the only side effect is a little slow down. 8082 */ 8083 kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); 8084 if (kerr != KERN_SUCCESS) { 8085 ntfs_error(vol->mp, "ubc_upl_map() failed (error %d), cannot " 8086 "remove mst fixups. Unmount and run chkdsk.", 8087 (int)kerr); 8088 NVolSetErrors(vol); 8089 goto err; 8090 } 8091mst_err: 8092 /* Remove the applied fixups, unmap the page list and abort. */ 8093 while (i > 0) { 8094 NTFS_RECORD *rec = (NTFS_RECORD*)(kaddr + (--i << rec_shift)); 8095 if (__ntfs_is_magic(rec->magic, magic)) 8096 ntfs_mst_fixup_post_write(rec); 8097 } 8098 kerr = ubc_upl_unmap(upl); 8099 if (kerr != KERN_SUCCESS) 8100 ntfs_error(vol->mp, "ubc_upl_unmap() failed (error %d).", 8101 (int)kerr); 8102err: 8103 if (do_commit) 8104 ubc_upl_abort_range(upl, upl_ofs, size, 8105 UPL_ABORT_FREE_ON_EMPTY); 8106 return err; 8107} 8108 8109/** 8110 * ntfs_vnop_pageout - write a range of pages to storage 8111 * @a: arguments to pageout function 8112 * 8113 * @a contains: 8114 * vnode_t a_vp; vnode whose data to write from the page range 8115 * upl_t a_pl; page list describing the source page range 8116 * upl_offset_t a_pl_offset; byte offset into page list at which to start 8117 * off_t a_f_offset; byte offset in the vnode at which to start 8118 * size_t a_size; number of bytes to write to the vnode 8119 * int a_flags; flags further describing the pageout request 8120 * vfs_context_t a_context; 8121 * 8122 * If UPL_NESTED_PAGEOUT is set in the flags (a->a_flags) we are called from 8123 * cluster_io() which is in turn called from cluster_write() which is in turn 8124 * called from ntfs_vnop_write() which means we are already holding the inode 8125 * lock (@ni->lock). Alternatively cluster_io() can be called from 8126 * cluster_push() which can be called from various places in NTFS. 8127 * 8128 * Write @a->a_size bytes to the vnode @a-a_vp, starting at byte offset 8129 * @a->a_f_offset into the vnode, from the range of pages specified by the page 8130 * list @a->a_pl, starting at byte offset @a->a_pl_offset into the page list. 8131 * 8132 * The flags in @a->a_flags further describe the pageout request. The 8133 * following pageout flags are currently defined in OS X kernel: 8134 * UPL_IOSYNC - Perform synchronous i/o. 8135 * UPL_NOCOMMIT - Do not commit/abort the page range. 8136 * UPL_KEEPCACHED - Data is already cached in memory, keep it cached. 8137 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 8138 * 8139 * For encrypted attributes we abort for now as we do not support them yet. 8140 * 8141 * For non-resident, non-compressed attributes we use cluster_pageout_ext() 8142 * which deals with both normal and multi sector transfer protected attributes. 8143 * 8144 * In the case of multi sector transfer protected attributes we apply the 8145 * fixups and then submit the i/o synchronously by setting the UPL_IOSYNC flag. 8146 * 8147 * For resident attributes and non-resident, compressed attributes we write the 8148 * data ourselves by mapping the page list, and in the resident case, mapping 8149 * the mft record, looking up the attribute in it, and copying the data to the 8150 * mapped attribute from the page list, then unmapping the mft record, whilst 8151 * for non-resident, compressed attributes, we get the raw inode and use it 8152 * with ntfs_write_compressed() to compress and write the data from our mapped 8153 * page list. We then unmap the page list and finally, if UPL_NOCOMMIT is not 8154 * specified, we commit (success) or abort (error) the page range. 8155 * 8156 * Return 0 on success and errno on error. 8157 * 8158 * Note the pages in the page list are marked busy on entry and the busy bit is 8159 * cleared when we commit the page range. Thus it is perfectly safe for us to 8160 * apply the mst fixups and write out the data which will then also take away 8161 * the fixups again before committing the page range. 8162 * 8163 * Adapted from cluster_pageout_ext(). 8164 */ 8165static int ntfs_vnop_pageout(struct vnop_pageout_args *a) 8166{ 8167 s64 attr_ofs, attr_size, alloc_size, bytes; 8168 ntfs_inode *base_ni, *ni = NTFS_I(a->a_vp); 8169 upl_t upl = a->a_pl; 8170 ntfs_volume *vol; 8171 u8 *kaddr; 8172 upl_offset_t upl_ofs = a->a_pl_offset; 8173 kern_return_t kerr; 8174 unsigned to_write, size = a->a_size; 8175 int err, flags = a->a_flags; 8176 lck_rw_type_t lock_type = LCK_RW_TYPE_SHARED; 8177 BOOL locked = FALSE; 8178 8179 if (!ni) { 8180 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 8181 if (!(flags & UPL_NOCOMMIT) && upl) 8182 ubc_upl_abort_range(upl, upl_ofs, size, 8183 UPL_ABORT_FREE_ON_EMPTY); 8184 return EINVAL; 8185 } 8186 vol = ni->vol; 8187 attr_ofs = a->a_f_offset; 8188 base_ni = ni; 8189 if (NInoAttr(ni)) 8190 base_ni = ni->base_ni; 8191 ntfs_debug("Entering for mft_no 0x%llx, offset 0x%llx, size 0x%x, " 8192 "pageout flags 0x%x, page list offset 0x%llx.", 8193 (unsigned long long)ni->mft_no, 8194 (unsigned long long)attr_ofs, size, flags, 8195 (unsigned long long)upl_ofs); 8196 /* 8197 * If the caller did not specify any i/o, then we are done. We cannot 8198 * issue an abort because we do not have a upl or we do not know its 8199 * size. 8200 */ 8201 if (!upl || size <= 0) { 8202 ntfs_error(vol->mp, "NULL page list passed in or request size " 8203 "is below zero (error EINVAL)."); 8204 return EINVAL; 8205 } 8206 if (S_ISDIR(ni->mode)) { 8207 ntfs_error(vol->mp, "Called for directory vnode."); 8208 err = EISDIR; 8209 goto err; 8210 } 8211 if (NVolReadOnly(vol)) { 8212 err = EROFS; 8213 goto err; 8214 } 8215 /* 8216 * Need to clip i/o at maximum file size of 2^63-1 bytes in case 8217 * someone creates a sparse file and is playing silly with seek + write 8218 * note we only need to check for this for sparse files as non-sparse 8219 * files can never reach 2^63-1 because that is also the maximum space 8220 * on the volume thus the write would simply get an ENOSPC when the 8221 * volume is full. 8222 */ 8223 if (NInoSparse(ni) && (u64)attr_ofs + size > NTFS_MAX_ATTRIBUTE_SIZE) { 8224 err = EFBIG; 8225 goto err; 8226 } 8227#if 1 // TODO: Remove this when sparse support is done... 8228 if (NInoSparse(ni)) { 8229 err = ENOTSUP; 8230 goto err; 8231 } 8232#endif 8233 /* 8234 * Protect against changes in initialized_size and thus against 8235 * truncation also but only if the VFS is not calling back into the 8236 * NTFS driver after the NTFS driver called it in which case we are 8237 * already holding the lock. 8238 * 8239 * There is a complication in that the UPL is already created by the 8240 * caller thus us taking the lock here is a case of lock reversal wrt 8241 * the UPL keeping the pages locked for exclusive access thus we can 8242 * deadlock with a concurrent file create for example when it holds the 8243 * ntfs inode lock @ni->lock for exclusive access on the index vnode of 8244 * the parent directory and then calls ntfs_page_map() to map a page 8245 * from the index as we already hold the same UPL that ntfs_page_map() 8246 * will try to get thus if we go to sleep on the ntfs inode lock that 8247 * is held exclusive by the create code path we would now deadlock. 8248 * 8249 * To avoid the deadlock, we do a try-lock for the ntfs inode lock and 8250 * if that fails we simply abort the pages returning them to the VM 8251 * without modification thus they should remain dirty and they should 8252 * be paged out at a later point in time. 8253 * 8254 * We then return ENXIO to indicate that this is a temporary failure to 8255 * the caller. 8256 * 8257 * FIXME: There is a complication and that is that we really need to 8258 * hole the inode lock for writing if we are writing to a hole and/or 8259 * writing past the initialized size as we would then be modifying the 8260 * initialized_size. But if UPL_NESTED_PAGEOUT is set we have no idea 8261 * whether the caller is holding the lock for write or not and we 8262 * cannot safely drop/retake the lock in any case... For now we ignore 8263 * the problem and just emit a warning in this case. 8264 */ 8265 if (!(flags & UPL_NESTED_PAGEOUT)) { 8266 if (NInoSparse(ni)) 8267 lock_type = LCK_RW_TYPE_EXCLUSIVE; 8268 if (!lck_rw_try_lock(&ni->lock, lock_type)) { 8269 ntfs_debug("Failed to take ni->lock for %s for mft_no " 8270 "0x%llx, type 0x%x. Aborting with " 8271 "ENXIO to avoid deadlock.", 8272 (lock_type == LCK_RW_TYPE_SHARED) ? 8273 "reading" : "writing", 8274 (unsigned long long)ni->mft_no, 8275 (unsigned)le32_to_cpu(ni->type)); 8276 if (!(flags & UPL_NOCOMMIT)) 8277 ubc_upl_abort_range(upl, upl_ofs, size, 8278 UPL_ABORT_FREE_ON_EMPTY); 8279 return ENXIO; 8280 } 8281 locked = TRUE; 8282 } else { 8283 if (NInoSparse(ni)) 8284 ntfs_warning(vol->mp, "flags & UPL_NESTED_PAGEOUT && " 8285 "NINoSparse(ni), need inode lock " 8286 "exclusive but caller holds the lock " 8287 "so we do not know if it is exclusive " 8288 "or not."); 8289 } 8290 /* Do not allow messing with the inode once it has been deleted. */ 8291 if (NInoDeleted(ni)) { 8292 /* Remove the inode from the name cache. */ 8293 cache_purge(ni->vn); 8294 err = ENOENT; 8295 goto err; 8296 } 8297retry_pageout: 8298 /* 8299 * TODO: This check may no longer be necessary now that we lock against 8300 * changes in initialized size and thus truncation... Revisit this 8301 * issue when the write code has been written and remove the check if 8302 * appropriate simply using ubc_getsize(vn); without the size_lock. 8303 */ 8304 lck_spin_lock(&ni->size_lock); 8305 attr_size = ubc_getsize(a->a_vp); 8306 if (attr_size > ni->data_size) 8307 attr_size = ni->data_size; 8308 /* 8309 * Cannot pageout to a negative offset or if we are starting beyond the 8310 * end of the attribute or if the attribute offset is not page aligned 8311 * or the size requested is not a multiple of PAGE_SIZE. 8312 */ 8313 if (attr_ofs < 0 || attr_ofs >= attr_size || attr_ofs & PAGE_MASK_64 || 8314 size & PAGE_MASK || upl_ofs & PAGE_MASK) { 8315 lck_spin_unlock(&ni->size_lock); 8316 err = EINVAL; 8317 goto err; 8318 } 8319// TODO: HERE: 8320 // FIXME: For now abort writes beyond initialized size... 8321 // TODO: This causes a problem and that is in ntfs_vnop_write() we only 8322 // update the initialized size after calling cluster_write() which 8323 // means we cannot zero up to the initialized size here or we could 8324 // trample over data that has just been written out. Also this causes 8325 // our check here to trigger even though we are not really outside the 8326 // initialized size at all and in fact this page out may be part of the 8327 // write itself so it has to succeed. But on the other hand if this is 8328 // a genuine mmap()-based write we do need to do the zeroing. We need 8329 // to somehow be able to tell the difference between the two... 8330 // If the initialized size equals attr_ofs then we can safely perform 8331 // the write and then update the initialized size to attr_ofs + size 8332 // but need to be careful to update the data size appropriately and 8333 // also need to make sure not to exceed the end of the write otherwise 8334 // we would cause a file extension here when we should not do so. In 8335 // fact if this is not part of an extending write then we should not 8336 // modify the data size and only the initialized size instead. 8337 if (attr_ofs + size > ni->initialized_size && ni->initialized_size != 8338 ni->data_size) { 8339 lck_spin_unlock(&ni->size_lock); 8340 ntfs_error(vol->mp, "Writing beyond the initialized size of " 8341 "an attribute is not implemented yet."); 8342 err = ENOTSUP; 8343 goto err; 8344 } 8345 alloc_size = ni->allocated_size; 8346 lck_spin_unlock(&ni->size_lock); 8347 /* 8348 * If this is a sparse attribute we need to fill any holes overlapping 8349 * the write. We can skip resident attributes as they cannot have 8350 * sparse regions. 8351 * 8352 * As allocated size goes in units of clusters we need to round down 8353 * the start offset to the nearest cluster boundary and we need to 8354 * round up the end offset to the next cluster boundary. 8355 */ 8356 if (NInoSparse(ni) && NInoNonResident(ni) && ni->type == AT_DATA) { 8357 s64 aligned_end, new_end; 8358 8359 aligned_end = (attr_ofs + size + vol->cluster_size_mask) & 8360 ~vol->cluster_size_mask; 8361 /* 8362 * Only need to instantiate holes up to the allocated size 8363 * itself. Everything else would be an extension which is not 8364 * allowed from VNOP_PAGEOUT(). 8365 */ 8366 if (aligned_end > alloc_size) 8367 aligned_end = alloc_size; 8368 err = ntfs_attr_instantiate_holes(ni, 8369 attr_ofs & ~vol->cluster_size_mask, 8370 aligned_end, &new_end, TRUE); 8371 if (err) { 8372 ntfs_error(vol->mp, "Cannot perform pageout of mft_no " 8373 "0x%llx because instantiation of " 8374 "sparse regions failed (error %d).", 8375 (unsigned long long)ni->mft_no, err); 8376 goto err; 8377 } 8378 /* The instantiation may not be partial. */ 8379 if (new_end < aligned_end) 8380 panic("%s(): new_end < aligned_end\n", __FUNCTION__); 8381 } 8382 /* 8383 * Only $DATA attributes can be encrypted/compressed. Index root can 8384 * have the flags set but this means to create compressed/encrypted 8385 * files, not that the attribute is compressed/encrypted. Note we need 8386 * to check for AT_INDEX_ALLOCATION since this is the type of directory 8387 * index inodes. 8388 */ 8389 if (ni->type != AT_INDEX_ALLOCATION) { 8390 /* TODO: Deny access to encrypted attributes, just like NT4. */ 8391 if (NInoEncrypted(ni)) { 8392 if (ni->type != AT_DATA) 8393 panic("%s(): Encrypted non-data attribute.\n", 8394 __FUNCTION__); 8395 ntfs_warning(vol->mp, "Denying write to encrypted " 8396 "attribute (EACCES)."); 8397 err = EACCES; 8398 goto err; 8399 } 8400 /* Compressed data streams need special handling. */ 8401 if (NInoNonResident(ni) && NInoCompressed(ni) && !NInoRaw(ni)) { 8402 if (ni->type != AT_DATA) 8403 panic("%s(): Compressed non-data attribute.\n", 8404 __FUNCTION__); 8405 goto compressed; 8406 } 8407 } 8408 /* NInoNonResident() == NInoIndexAllocPresent() */ 8409 if (NInoNonResident(ni)) { 8410 if (NInoMstProtected(ni)) 8411 err = ntfs_mst_pageout(ni, upl, upl_ofs, size, 8412 attr_ofs, attr_size, flags); 8413 else { 8414 err = cluster_pageout_ext(a->a_vp, upl, upl_ofs, 8415 attr_ofs, size, attr_size, flags, NULL, 8416 NULL); 8417 if (!err) 8418 ntfs_debug("Done (cluster_pageout_ext())."); 8419 else 8420 ntfs_error(vol->mp, "Failed " 8421 "(cluster_pageout_ext(), " 8422 "error %d).", err); 8423 } 8424 goto done; 8425 } 8426compressed: 8427 /* The attribute is resident and/or compressed. */ 8428 to_write = size; 8429 bytes = attr_size - attr_ofs; 8430 if (to_write > bytes) 8431 to_write = bytes; 8432 /* 8433 * Calculate the number of bytes available in the attribute starting at 8434 * offset @attr_ofs up to a maximum of the number of bytes to be 8435 * written rounded up to a multiple of the system page size. 8436 */ 8437 bytes = (to_write + PAGE_MASK) & ~PAGE_MASK; 8438 /* Abort any pages outside the end of the attribute. */ 8439 if (size > bytes && !(flags & UPL_NOCOMMIT)) { 8440 ubc_upl_abort_range(upl, upl_ofs + bytes, size - bytes, 8441 UPL_ABORT_FREE_ON_EMPTY); 8442 /* Update @size. */ 8443 size = bytes; 8444 } 8445 /* To access the page list contents, we need to map the page list. */ 8446 kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); 8447 if (kerr != KERN_SUCCESS) { 8448 ntfs_error(vol->mp, "ubc_upl_map() failed (error %d).", 8449 (int)kerr); 8450 err = EIO; 8451 goto err; 8452 } 8453 if (!NInoNonResident(ni)) { 8454 /* 8455 * Write the data from the page list into the resident 8456 * attribute in its mft record. 8457 */ 8458 err = ntfs_resident_attr_write(ni, kaddr + upl_ofs, to_write, 8459 attr_ofs); 8460 // TODO: If !err and synchronous i/o, write the mft record now. 8461 // This should probably happen in ntfs_resident_attr_write(). 8462 if (err && err != EAGAIN) 8463 ntfs_error(vol->mp, "ntfs_resident_attr_write() " 8464 "failed (error %d).", err); 8465 } else if (NInoCompressed(ni)) { 8466 ntfs_error(vol->mp, "Writing to compressed files is not " 8467 "implemented yet, sorry."); 8468 err = ENOTSUP; 8469#if 0 8470 ntfs_inode *raw_ni; 8471 int ioflags; 8472 8473 /* 8474 * Get the raw inode and lock it for writing to protect against 8475 * concurrent readers and writers as the compressed data is 8476 * invalid whilst a write is in progress. 8477 */ 8478 err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_EXCLUSIVE, &raw_ni); 8479 if (err) 8480 ntfs_error(vol->mp, "Failed to get raw inode (error " 8481 "%d).", err); 8482 else { 8483 if (!NInoRaw(raw_ni)) 8484 panic("%s(): Requested raw inode but got " 8485 "non-raw one.\n", __FUNCTION__); 8486 ioflags = 0; 8487 if (vnode_isnocache(ni->vn) || 8488 vnode_isnocache(raw_ni->vn)) 8489 ioflags |= IO_NOCACHE; 8490 if (vnode_isnoreadahead(ni->vn) || 8491 vnode_isnoreadahead(raw_ni->vn)) 8492 ioflags |= IO_RAOFF; 8493 err = ntfs_write_compressed(ni, raw_ni, attr_ofs, size, 8494 kaddr + upl_ofs, NULL, ioflags); 8495 if (err) 8496 ntfs_error(vol->mp, "ntfs_write_compressed() " 8497 "failed (error %d).", err); 8498 lck_rw_unlock_exclusive(&raw_ni->lock); 8499 (void)vnode_put(raw_ni->vn); 8500 } 8501#endif 8502 } else { 8503 /* 8504 * The attribute was converted to non-resident under our nose 8505 * we need to retry the pageout. 8506 * 8507 * TODO: This may no longer be possible to happen now that we 8508 * lock against changes in initialized size and thus 8509 * truncation... Revisit this issue when the write code has 8510 * been finished and replace this with a panic(). 8511 */ 8512 err = EAGAIN; 8513 } 8514 kerr = ubc_upl_unmap(upl); 8515 if (kerr != KERN_SUCCESS) { 8516 ntfs_error(vol->mp, "ubc_upl_unmap() failed (error %d).", 8517 (int)kerr); 8518 if (!err) 8519 err = EIO; 8520 } 8521 if (!err) { 8522 if (!(flags & UPL_NOCOMMIT)) { 8523 /* Commit the page range we wrote out. */ 8524 ubc_upl_commit_range(upl, upl_ofs, size, 8525 UPL_COMMIT_FREE_ON_EMPTY); 8526 } 8527 // TODO: If we wrote anything at all we have to clear the 8528 // setuid and setgid bits as a precaution against tampering 8529 // (see xnu/bsd/hfs/hfs_readwrite.c::hfs_vnop_pageout()). 8530 ntfs_debug("Done (%s).", !NInoNonResident(ni) ? 8531 "ntfs_resident_attr_write()" : 8532 "ntfs_write_compressed()"); 8533 } else /* if (err) */ { 8534 /* 8535 * If the attribute was converted to non-resident under our 8536 * nose, retry the pageout. 8537 * 8538 * TODO: This may no longer be possible to happen now that we 8539 * lock against changes in initialized size and thus 8540 * truncation... Revisit this issue when the write code has 8541 * been finished and remove the check and goto if appropriate. 8542 */ 8543 if (err == EAGAIN) 8544 goto retry_pageout; 8545err: 8546 if (!(flags & UPL_NOCOMMIT)) 8547 ubc_upl_abort_range(upl, upl_ofs, size, 8548 UPL_ABORT_FREE_ON_EMPTY); 8549 ntfs_error(vol->mp, "Failed (error %d).", err); 8550 } 8551done: 8552 // TODO: If we wrote anything at all we have to clear the setuid and 8553 // setgid bits as a precaution against tampering (see 8554 // xnu/bsd/hfs/hfs_readwrite.c::hfs_vnop_pageout()). 8555 /* 8556 * If this is not a directory or it is an encrypted directory, set the 8557 * needs archiving bit except for the core system files. 8558 */ 8559 if (!err && (!S_ISDIR(base_ni->mode) || NInoEncrypted(base_ni))) { 8560 BOOL need_set_archive_bit = TRUE; 8561 if (vol->major_ver > 1) { 8562 if (base_ni->mft_no <= FILE_Extend) 8563 need_set_archive_bit = FALSE; 8564 } else { 8565 if (base_ni->mft_no <= FILE_UpCase) 8566 need_set_archive_bit = FALSE; 8567 } 8568 if (need_set_archive_bit) { 8569 base_ni->file_attributes |= FILE_ATTR_ARCHIVE; 8570 NInoSetDirtyFileAttributes(base_ni); 8571 } 8572 } 8573 /* 8574 * Update the last_data_change_time (mtime) and last_mft_change_time 8575 * (ctime) on the base ntfs inode @base_ni but not on the core system 8576 * files. However do set it on the root directory. 8577 * 8578 * Do not update the times on symbolic links. 8579 */ 8580 if (!err && !S_ISLNK(base_ni->mode)) { 8581 BOOL need_update_time = TRUE; 8582 if (vol->major_ver > 1) { 8583 if (base_ni->mft_no <= FILE_Extend && 8584 base_ni != vol->root_ni) 8585 need_update_time = FALSE; 8586 } else { 8587 if (base_ni->mft_no <= FILE_UpCase && 8588 base_ni != vol->root_ni) 8589 need_update_time = FALSE; 8590 } 8591 if (need_update_time) { 8592 base_ni->last_mft_change_time = 8593 base_ni->last_data_change_time = 8594 ntfs_utc_current_time(); 8595 NInoSetDirtyTimes(base_ni); 8596 } 8597 } 8598 if (locked) { 8599 if (lock_type == LCK_RW_TYPE_SHARED) 8600 lck_rw_unlock_shared(&ni->lock); 8601 else 8602 lck_rw_unlock_exclusive(&ni->lock); 8603 } 8604 return err; 8605} 8606 8607/** 8608 * ntfs_vnop_searchfs - 8609 * 8610 */ 8611static int ntfs_vnop_searchfs(struct vnop_searchfs_args *a) 8612{ 8613 errno_t err; 8614 8615 ntfs_debug("Entering."); 8616 // TODO: 8617 err = err_searchfs(a); 8618 ntfs_debug("Done (error %d).", (int)err); 8619 return err; 8620} 8621 8622/** 8623 * ntfs_vnop_getxattr - get the data of an extended attribute of an ntfs inode 8624 * @a: arguments to getxattr function 8625 * 8626 * @a contains: 8627 * vnode_t a_vp; vnode whose extended attribute to get 8628 * char *a_name; name of extented attribute to get in utf8 8629 * uio_t a_uio; destination in which to return the exteneded attribute 8630 * size_t *a_size; size of the extended attribute in bytes 8631 * int a_options; flags controlling how the attribute is obtained 8632 * vfs_context_t a_context; 8633 * 8634 * Get the named stream with the name @a->a_name (we map named streams 1:1 with 8635 * extended attributes for NTFS as the NTFS native EAs are useless) contained 8636 * in the vnode @a->a_vp and return its data in the destination specified by 8637 * @a->a_uio. 8638 * 8639 * If there was not enough space to return the whole extended attribute in the 8640 * destination @a->a_uio we return error ERANGE. The only exception to this is 8641 * the resource fork (@a->a_name is XATTR_RESOURCEFORK_NAME) for which we just 8642 * return up to uio_resid(@a->a_uio) bytes (or up to the end of the resource 8643 * fork if that is smaller). 8644 * 8645 * Note that uio_offset(@a->a_uio) must be zero except for the resource fork 8646 * where it can specify the offset into the resource fork at which to begin 8647 * returning the data. 8648 * 8649 * If @a->a_uio is NULL, do not return the data of the attribute and instead 8650 * return the current data size of the named stream in *@a->a_size. Note that 8651 * when @a->a_uio is not NULL @a->a_size is ignored as the size of the named 8652 * stream is implicitly returned in the @a->a_uio and it can be obtained by 8653 * taking the original buffer size and subtracting uio_resid(@a->a_uio) from 8654 * it. 8655 * 8656 * The flags in @a->a_options control how the attribute is obtained. The 8657 * following flags are currently defined in OS X kernel: 8658 * XATTR_NOFOLLOW - Do not follow symbolic links. 8659 * XATTR_CREATE - Set the value, fail if already exists (setxattr only). 8660 * XATTR_REPLACE - Set the value, fail if does not exist (setxattr only). 8661 * XATTR_NOSECURITY- Bypass authorization checking. 8662 * XATTR_NODEFAULT - Bypass default extended attribute file ('._' file). 8663 * 8664 * Return 0 on success and errno on error. 8665 */ 8666static int ntfs_vnop_getxattr(struct vnop_getxattr_args *a) 8667{ 8668 s64 size; 8669 user_ssize_t start_count; 8670 off_t start_ofs; 8671 ntfs_inode *ani, *ni = NTFS_I(a->a_vp); 8672 const char *name = a->a_name; 8673 uio_t uio = a->a_uio; 8674 ntfs_volume *vol; 8675 ntfschar *ntfs_name; 8676 size_t ntfs_name_size; 8677 signed ntfs_name_len; 8678 errno_t err; 8679 ntfschar ntfs_name_buf[NTFS_MAX_ATTR_NAME_LEN]; 8680 8681 if (!ni) { 8682 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 8683 return EINVAL; 8684 } 8685 vol = ni->vol; 8686 /* Check for invalid names. */ 8687 if (!name || name[0] == '\0') 8688 return EINVAL; 8689 start_ofs = uio_offset(uio); 8690 start_count = uio_resid(uio); 8691 ntfs_debug("Entering for mft_no 0x%llx, extended attribute name %s, " 8692 "offset 0x%llx, size 0x%llx, options 0x%x.", 8693 (unsigned long long)ni->mft_no, name, start_ofs, 8694 start_count, a->a_options); 8695 lck_rw_lock_shared(&ni->lock); 8696 /* Do not allow messing with the inode once it has been deleted. */ 8697 if (NInoDeleted(ni)) { 8698 /* Remove the inode from the name cache. */ 8699 cache_purge(ni->vn); 8700 ntfs_debug("Mft_no 0x%llx is deleted.", 8701 (unsigned long long)ni->mft_no); 8702 err = ENOENT; 8703 goto err; 8704 } 8705 /* 8706 * Only regular files, directories, and symbolic links can have 8707 * extended attributes. (Specifically named streams cannot have them.) 8708 * 8709 * Thus the check is for attribute inodes as all base inodes are 8710 * allowed. Raw inodes are also attribute inodes so they are excluded 8711 * automatically, too. 8712 */ 8713 if (NInoAttr(ni)) { 8714 ntfs_debug("Mft_no 0x%llx is an attribute inode.", 8715 (unsigned long long)ni->mft_no); 8716 err = EPERM; 8717 goto err; 8718 } 8719 /* 8720 * First of all deal with requests for the Finder info as that is 8721 * special because we cache it in the base ntfs inode @ni and we only 8722 * want to return it if the Finder info is non-zero. This is what HFS 8723 * does, too. 8724 * 8725 * Thus we need to check the status of the cache in the ntfs inode 8726 * first and if that it valid we can use it to check the content of the 8727 * Finder info for being zero. And if it is not valid then we need to 8728 * read it into the cache in the ntfs inode and then we can check the 8729 * Finder info in the cache for being zero. In fact we do this the 8730 * other way round, i.e. if the Finder info cache is not valid we read 8731 * the Finder info into the cache first and then the cache is 8732 * definitely valid thus we can check the Finder info for being 8733 * non-zero and the Finder info data if so. 8734 * 8735 * A further complication is in the event of symbolic links where we do 8736 * not return the type and creator and instead return zero for them as 8737 * that is what HFS+ does, too. 8738 * 8739 * FIXME: This comparison is case sensitive. 8740 */ 8741 if (!bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME))) { 8742 FINDER_INFO fi; 8743 8744 if (!NInoValidFinderInfo(ni)) { 8745 if (!lck_rw_lock_shared_to_exclusive(&ni->lock)) { 8746 lck_rw_lock_exclusive(&ni->lock); 8747 if (NInoDeleted(ni)) { 8748 cache_purge(ni->vn); 8749 lck_rw_unlock_exclusive(&ni->lock); 8750 ntfs_debug("Mft_no 0x%llx is deleted.", 8751 (unsigned long long) 8752 ni->mft_no); 8753 return ENOENT; 8754 } 8755 } 8756 /* 8757 * Load the AFP_AfpInfo stream and initialize the 8758 * backup time and Finder info (if they are not already 8759 * valid). 8760 */ 8761 err = ntfs_inode_afpinfo_read(ni); 8762 if (err) { 8763 ntfs_error(vol->mp, "Failed to obtain AfpInfo " 8764 "for mft_no 0x%llx (error %d).", 8765 (unsigned long long)ni->mft_no, 8766 err); 8767 lck_rw_unlock_exclusive(&ni->lock); 8768 return err; 8769 } 8770 lck_rw_lock_exclusive_to_shared(&ni->lock); 8771 if (!NInoValidFinderInfo(ni)) 8772 panic("%s(): !NInoValidFinderInfo(ni)\n", 8773 __FUNCTION__); 8774 } 8775 /* 8776 * Make a copy of the Finder info and mask out the hidden bit 8777 * if this is the root directory and the type and creator if 8778 * this is a symbolic link. 8779 */ 8780 memcpy(&fi, &ni->finder_info, sizeof(fi)); 8781 if (ni == vol->root_ni) 8782 fi.attrs &= ~FINDER_ATTR_IS_HIDDEN; 8783 if (S_ISLNK(ni->mode)) { 8784 fi.type = 0; 8785 fi.creator = 0; 8786 } 8787 /* If the Finder info is zero, pretend it does not exist. */ 8788 if (!bcmp(&fi, &ntfs_empty_finder_info, 8789 sizeof(ni->finder_info))) { 8790 ntfs_debug("Mft_no 0x%llx has zero Finder info, " 8791 "returning ENOATTR.", 8792 (unsigned long long)ni->mft_no); 8793 err = ENOATTR; 8794 goto err; 8795 } 8796 /* The Finder info is not zero, return it. */ 8797 if (!uio) { 8798 *a->a_size = sizeof(FINDER_INFO); 8799 err = 0; 8800 } else if (start_ofs) 8801 err = EINVAL; 8802 else if (uio_resid(uio) < (user_ssize_t)sizeof(FINDER_INFO)) 8803 err = ERANGE; 8804 else { 8805 err = uiomove((caddr_t)&fi, sizeof(fi), uio); 8806 if (err) 8807 ntfs_error(vol->mp, "uiomove() failed (error " 8808 "%d).", err); 8809 } 8810 goto err; 8811 } 8812 /* 8813 * Now deal with requests for the resource fork as that is special 8814 * because on one hand we need to translate its name from 8815 * XATTR_RESOURCEFORK_NAME to AFP_Resource so we do not need to convert 8816 * the utf8 name @name to Unicode and on the other hand the offset 8817 * @start_ofs may be non-zero and the read may be only from a partial 8818 * region of the resource fork. 8819 * 8820 * FIXME: This comparison is case sensitive. 8821 */ 8822 if (!bcmp(name, XATTR_RESOURCEFORK_NAME, 8823 sizeof(XATTR_RESOURCEFORK_NAME))) { 8824 ntfs_name = NTFS_SFM_RESOURCEFORK_NAME; 8825 ntfs_name_len = 12; 8826 } else { 8827 /* 8828 * The request is not for the resource fork (nor for the Finder 8829 * info). This means that the offset @start_ofs must be zero. 8830 */ 8831 if (start_ofs) { 8832 err = EINVAL; 8833 goto err; 8834 } 8835 /* Convert the requested name from utf8 to Unicode. */ 8836 ntfs_name = ntfs_name_buf; 8837 ntfs_name_size = sizeof(ntfs_name_buf); 8838 ntfs_name_len = utf8_to_ntfs(vol, (const u8*)name, strlen(name), 8839 &ntfs_name, &ntfs_name_size); 8840 if (ntfs_name_len < 0) { 8841 err = -ntfs_name_len; 8842 if (err == ENAMETOOLONG) 8843 ntfs_debug("Failed (name is too long)."); 8844 else 8845 ntfs_error(vol->mp, "Failed to convert name to " 8846 "Unicode (error %d).", err); 8847 goto err; 8848 } 8849 /* 8850 * If this is one of the SFM named streams, skip it, as they 8851 * contain effectively metadata information so should not be 8852 * exposed directly. 8853 */ 8854 if (ntfs_is_sfm_name(vol, ntfs_name, ntfs_name_len)) { 8855 ntfs_debug("Not allowing access to protected SFM name " 8856 "(returning EINVAL)."); 8857 err = EINVAL; 8858 goto err; 8859 } 8860 } 8861 /* 8862 * We now have the name of the requested attribute in @ntfs_name and it 8863 * is @ntfs_name_len characters long and we have verified that the 8864 * start offset is zero (unless this is the resource fork in which case 8865 * a non-zero start offset is fine). 8866 * 8867 * Start by getting the ntfs inode for the $DATA:@ntfs_name attribute. 8868 */ 8869 err = ntfs_attr_inode_get(ni, AT_DATA, ntfs_name, ntfs_name_len, FALSE, 8870 LCK_RW_TYPE_SHARED, &ani); 8871 if (err) { 8872 if (err == ENOENT) 8873 err = ENOATTR; 8874 else if (err != ENOATTR) 8875 ntfs_error(vol->mp, "Failed to get $DATA/%s attribute " 8876 "inode mft_no 0x%llx (error %d).", name, 8877 (unsigned long long)ni->mft_no, err); 8878 goto err; 8879 } 8880 /* 8881 * TODO: This check may no longer be necessary now that we lock against 8882 * changes in initialized size and thus truncation... Revisit this 8883 * issue when the write code has been written and remove the check if 8884 * appropriate simply using ubc_getsize(ni->vn); without the size_lock. 8885 */ 8886 lck_spin_lock(&ani->size_lock); 8887 size = ubc_getsize(ani->vn); 8888 if (size > ani->data_size) 8889 size = ani->data_size; 8890 lck_spin_unlock(&ani->size_lock); 8891 if (!uio) 8892 *a->a_size = size; 8893 else if (ntfs_name != NTFS_SFM_RESOURCEFORK_NAME && 8894 start_count < size) { 8895 /* Partial reads are only allowed for the resource fork. */ 8896 err = ERANGE; 8897 } else { 8898 /* 8899 * Perform the actual read from the attribute inode. We pass 8900 * in IO_UNIT as we want an atomic i/o operation. 8901 * 8902 * FIXME: ntfs_read() currently ignores the IO_UNIT flag so we 8903 * still have to test for partial reads. 8904 */ 8905 err = ntfs_read(ani, uio, IO_UNIT, TRUE); 8906 /* 8907 * If the read was partial, reset @uio pretending that the read 8908 * never happened. This is because extended attribute i/o is 8909 * meant to be atomic, i.e. either we get it all or we do not 8910 * get anything. 8911 * 8912 * Note we also accept the case where uio_resid() has gone to 8913 * zero as this covers the exception of the resource fork for 8914 * which we do not need to return the whole resource fork in 8915 * one go. 8916 */ 8917 if (uio_resid(uio) && start_count - uio_resid(uio) != 8918 size - start_ofs) { 8919 /* 8920 * FIXME: Should we be trying to continue a partial 8921 * read in case we can complete it with multiple calls 8922 * to ntfs_read()? If we do that we could also drop 8923 * the IO_UNIT flag above. 8924 */ 8925 if (!err) { 8926 ntfs_debug("ntfs_read() returned a partial " 8927 "read, pretending the read " 8928 "never happened."); 8929 err = EIO; 8930 } 8931 uio_setoffset(uio, start_ofs); 8932 uio_setresid(uio, start_count); 8933 } 8934 } 8935 lck_rw_unlock_shared(&ani->lock); 8936 (void)vnode_put(ani->vn); 8937err: 8938 lck_rw_unlock_shared(&ni->lock); 8939 ntfs_debug("Done (error %d).", err); 8940 return err; 8941} 8942 8943/** 8944 * ntfs_vnop_setxattr - set the data of an extended attribute of an ntfs inode 8945 * @a: arguments to setxattr function 8946 * 8947 * @a contains: 8948 * vnode_t a_vp; vnode whose extended attribute to set 8949 * char *a_name; name of extented attribute to set in utf8 8950 * uio_t a_uio; source data to which to set the exteneded attribute 8951 * int a_options; flags controlling how the attribute is set 8952 * vfs_context_t a_context; 8953 * 8954 * Get the named stream with the name @a->a_name (we map named streams 1:1 with 8955 * extended attributes for NTFS as the NTFS native EAs are useless) contained 8956 * in the vnode @a->a_vp and set its data to the source specified by @a->a_uio. 8957 * 8958 * If @a->a_options does not specify XATTR_CREATE nor XATTR_REPLACE the 8959 * attribute will be created if it does not exist already and if it exists 8960 * already the old value will be replaced with the new one, i.e. if the old 8961 * value does not have the same size as the new value the attribute is 8962 * truncated to the new size. 8963 * 8964 * If @a->a_options specifies XATTR_CREATE the call will fail if the attribute 8965 * already exists, i.e. the existing attribute will not be replaced. 8966 * 8967 * If @a->a_options specifies XATTR_REPLACE the call will fail if the attribute 8968 * does not exist, i.e. the new attribute will not be created. 8969 * 8970 * An exception is the resource fork (@a->a_name is XATTR_RESOURCEFORK_NAME) 8971 * for which we do not replace the existing attribute and instead we write over 8972 * the existing attribute starting at offset uio_offset(@a->a_uio) and writing 8973 * uio_resid(@a->a_uio) bytes. Writing past the end of the resource fork will 8974 * cause the resource fork to be extended just like a regular file write would 8975 * do but a write to any existing part of the attribute will not cause the 8976 * attribute to be shrunk. 8977 * 8978 * Simillar to other extended attributes, if @a->a_options specifies 8979 * XATTR_CREATE the call will fail if the resource fork already exists, i.e. 8980 * the write to the existing resource fork will be denied and if @a->a_options 8981 * specified XATTR_REPLACE the call will fail if the resource fork does not yet 8982 * exist, i.e. the new resource fork will not be created. 8983 * 8984 * Note that uio_offset(@a->a_uio) must be zero except for the resource fork 8985 * where it can specify the offset into the resource fork at which to begin 8986 * writing the data. 8987 * 8988 * The flags in @a->a_options control how the attribute is set. The following 8989 * flags are currently defined in OS X kernel: 8990 * XATTR_NOFOLLOW - Do not follow symbolic links. 8991 * XATTR_CREATE - Set the value, fail if already exists (setxattr only). 8992 * XATTR_REPLACE - Set the value, fail if does not exist (setxattr only). 8993 * XATTR_NOSECURITY- Bypass authorization checking. 8994 * XATTR_NODEFAULT - Bypass default extended attribute file ('._' file). 8995 * 8996 * Return 0 on success and errno on error. 8997 */ 8998static int ntfs_vnop_setxattr(struct vnop_setxattr_args *a) 8999{ 9000 s64 size; 9001 user_ssize_t start_count; 9002 off_t start_ofs; 9003 ntfs_inode *ani, *ni = NTFS_I(a->a_vp); 9004 ntfs_volume *vol; 9005 const char *name = a->a_name; 9006 uio_t uio = a->a_uio; 9007 ntfschar *ntfs_name; 9008 size_t ntfs_name_size; 9009 signed ntfs_name_len; 9010 const int options = a->a_options; 9011 errno_t err; 9012 ntfschar ntfs_name_buf[NTFS_MAX_ATTR_NAME_LEN]; 9013 9014 if (!ni) { 9015 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 9016 return EINVAL; 9017 } 9018 vol = ni->vol; 9019 /* Check for invalid names. */ 9020 if (!name || name[0] == '\0') 9021 return EINVAL; 9022 start_ofs = uio_offset(uio); 9023 start_count = uio_resid(uio); 9024 ntfs_debug("Entering for mft_no 0x%llx, extended attribute name %s, " 9025 "offset 0x%llx, size 0x%llx, options 0x%x.", 9026 (unsigned long long)ni->mft_no, name, start_ofs, 9027 start_count, options); 9028 /* 9029 * Access to extended attributes must be atomic which we ensure by 9030 * locking the base ntfs inode for writing. 9031 */ 9032 lck_rw_lock_exclusive(&ni->lock); 9033 /* Do not allow messing with the inode once it has been deleted. */ 9034 if (NInoDeleted(ni)) { 9035 /* Remove the inode from the name cache. */ 9036 cache_purge(ni->vn); 9037 ntfs_debug("Mft_no 0x%llx is deleted.", 9038 (unsigned long long)ni->mft_no); 9039 err = ENOENT; 9040 goto err; 9041 } 9042 /* 9043 * Only regular files, directories, and symbolic links can have 9044 * extended attributes. (Specifically named streams cannot have them.) 9045 * 9046 * Thus the check is for attribute inodes as all base inodes are 9047 * allowed. Raw inodes are also attribute inodes so they are excluded 9048 * automatically, too. 9049 */ 9050 if (NInoAttr(ni)) { 9051 ntfs_debug("Mft_no 0x%llx is an attribute inode.", 9052 (unsigned long long)ni->mft_no); 9053 err = EPERM; 9054 goto err; 9055 } 9056 /* 9057 * XATTR_CREATE and XATTR_REPLACE may not be specified at the same time 9058 * or weird things would happen so test for and abort this case here. 9059 */ 9060 if ((options & (XATTR_CREATE | XATTR_REPLACE)) == 9061 (XATTR_CREATE | XATTR_REPLACE)) { 9062 ntfs_debug("Either XATTR_CREATE or XATTR_REPLACE but not both " 9063 "may be specified."); 9064 err = EINVAL; 9065 goto err; 9066 } 9067 /* 9068 * First of all deal with requests to set the Finder info as that is 9069 * special because we cache it in the base ntfs inode @ni thus we need 9070 * to copy the new Finder info into the cache and then write the 9071 * changes out to the AFP_AfpInfo attribute (creating it if it did not 9072 * exist before). 9073 * 9074 * The only exception to the above description is when the XATTR_CREATE 9075 * or XATTR_REPLACE flags are set in @options in which case we need to 9076 * know whether the Finder info extists already or not and thus if the 9077 * Finder info cache is not valid we need to make it valid first and 9078 * then we can check it against being zero to determine whether the 9079 * Finder info exists already or not and then we know whether or not to 9080 * proceed with setting the Finder info. 9081 * 9082 * FIXME: This comparison is case sensitive. 9083 */ 9084 if (!bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME))) { 9085 FINDER_INFO fi; 9086 9087 if (start_count != sizeof(ni->finder_info)) { 9088 ntfs_debug("Number of bytes to write (%lld) does not " 9089 "equal Finder info size (%ld), " 9090 "returning ERANGE.", 9091 (unsigned long long)start_count, 9092 sizeof(ni->finder_info)); 9093 err = ERANGE; 9094 goto err; 9095 } 9096 /* 9097 * If @options does not specify XATTR_CREATE nor XATTR_REPLACE 9098 * there is no need to bring the Finder info up-to-date before 9099 * the write. 9100 */ 9101 if (options & (XATTR_CREATE | XATTR_REPLACE)) { 9102 if (!NInoValidFinderInfo(ni)) { 9103 /* 9104 * Load the AFP_AfpInfo stream and initialize 9105 * the backup time and Finder info (at least 9106 * the Finder info is not yet valid). 9107 */ 9108 err = ntfs_inode_afpinfo_read(ni); 9109 if (err) { 9110 ntfs_error(vol->mp, "Failed to obtain " 9111 "AfpInfo for mft_no " 9112 "0x%llx (error %d).", 9113 (unsigned long long) 9114 ni->mft_no, err); 9115 goto err; 9116 } 9117 if (!NInoValidFinderInfo(ni)) 9118 panic("%s(): !NInoValidFinderInfo(ni)" 9119 "\n", __FUNCTION__); 9120 } 9121 /* 9122 * Make a copy of the Finder info and mask out the 9123 * hidden bit if this is the root directory and the 9124 * type and creator if this is a symbolic link. 9125 */ 9126 memcpy(&fi, &ni->finder_info, sizeof(fi)); 9127 if (ni == vol->root_ni) 9128 fi.attrs &= ~FINDER_ATTR_IS_HIDDEN; 9129 if (S_ISLNK(ni->mode)) { 9130 fi.type = 0; 9131 fi.creator = 0; 9132 } 9133 if (bcmp(&ni->finder_info, &ntfs_empty_finder_info, 9134 sizeof(ni->finder_info))) { 9135 /* 9136 * Finder info is non-zero, i.e. it exists, and 9137 * XATTR_CREATE was specified. 9138 */ 9139 if (options & XATTR_CREATE) { 9140 ntfs_debug("Mft_no 0x%llx has " 9141 "non-zero Finder info " 9142 "and XATTR_CREATE was " 9143 "specified, returning " 9144 "EEXIST.", 9145 (unsigned long long) 9146 ni->mft_no); 9147 err = EEXIST; 9148 goto err; 9149 } 9150 } else { 9151 /* 9152 * Finder info is zero, i.e. it does not exist, 9153 * and XATTR_REPLACE was specified. 9154 */ 9155 if (options & XATTR_REPLACE) { 9156 ntfs_debug("Mft_no 0x%llx has zero " 9157 "Finder info and " 9158 "XATTR_REPLACE was " 9159 "specified, returning " 9160 "ENOATTR.", 9161 (unsigned long long) 9162 ni->mft_no); 9163 err = ENOATTR; 9164 goto err; 9165 } 9166 } 9167 } 9168 /* Copy the new Finder info value to our buffer. */ 9169 err = uiomove((caddr_t)&fi, sizeof(fi), uio); 9170 if (!err) { 9171 /* 9172 * Set the Finder info to the new value after masking 9173 * out the hidden bit if this is the root directory and 9174 * enforcing the type and creator if this is a symbolic 9175 * link to be our private values for symbolic links. 9176 */ 9177 if (ni == vol->root_ni) 9178 fi.attrs &= ~FINDER_ATTR_IS_HIDDEN; 9179 if (S_ISLNK(ni->mode)) { 9180 fi.type = FINDER_TYPE_SYMBOLIC_LINK; 9181 fi.creator = FINDER_CREATOR_SYMBOLIC_LINK; 9182 } 9183 memcpy((u8*)&ni->finder_info, (u8*)&fi, sizeof(fi)); 9184 NInoSetValidFinderInfo(ni); 9185 NInoSetDirtyFinderInfo(ni); 9186 /* 9187 * If the file is not hidden but the Finder info hidden 9188 * bit is being set, we need to cause the file to be 9189 * hidden, i.e. we need to set the FILE_ATTR_HIDDEN bit 9190 * in the file_attributes of the $STANDARD_INFORMATION 9191 * attribute. 9192 */ 9193 if (fi.attrs & FINDER_ATTR_IS_HIDDEN && 9194 !(ni->file_attributes & 9195 FILE_ATTR_HIDDEN)) { 9196 ni->file_attributes |= FILE_ATTR_HIDDEN; 9197 NInoSetDirtyFileAttributes(ni); 9198 } 9199 /* 9200 * Updating the Finder info causes both the 9201 * last_data_change_time (mtime) and 9202 * last_mft_change_time (ctime) to be updated. 9203 */ 9204 ni->last_mft_change_time = ni->last_data_change_time = 9205 ntfs_utc_current_time(); 9206 NInoSetDirtyTimes(ni); 9207 /* 9208 * Now write (if needed creating) the AFP_AfpInfo 9209 * attribute with the specified Finder Info. 9210 */ 9211 err = ntfs_inode_afpinfo_write(ni); 9212 if (err) 9213 ntfs_error(vol->mp, "Failed to write/create " 9214 "AFP_AfpInfo attribute in " 9215 "inode 0x%llx (error %d).", 9216 (unsigned long long)ni->mft_no, 9217 err); 9218 } else 9219 ntfs_error(vol->mp, "uiomove() failed (error %d).", 9220 err); 9221 goto err; 9222 } 9223 /* 9224 * Now deal with requests to write to the resource fork as that is 9225 * special because on one hand we need to translate its name from 9226 * XATTR_RESOURCEFORK_NAME to AFP_Resource so we do not need to convert 9227 * the utf8 name @name to Unicode and on the other hand the offset 9228 * @start_ofs may be non-zero, the write may be only to a partial 9229 * region of the resource fork, and the write may not shrink the 9230 * resource fork though it may extend it. 9231 * 9232 * FIXME: This comparison is case sensitive. 9233 */ 9234 if (!bcmp(name, XATTR_RESOURCEFORK_NAME, 9235 sizeof(XATTR_RESOURCEFORK_NAME))) { 9236 ntfs_name = NTFS_SFM_RESOURCEFORK_NAME; 9237 ntfs_name_len = 12; 9238 } else { 9239 /* 9240 * The request is not for the resource fork (nor for the Finder 9241 * info). This means that the offset @start_ofs must be zero. 9242 */ 9243 if (start_ofs) { 9244 err = EINVAL; 9245 goto err; 9246 } 9247 /* Convert the requested name from utf8 to Unicode. */ 9248 ntfs_name = ntfs_name_buf; 9249 ntfs_name_size = sizeof(ntfs_name_buf); 9250 ntfs_name_len = utf8_to_ntfs(vol, (const u8*)name, strlen(name), 9251 &ntfs_name, &ntfs_name_size); 9252 if (ntfs_name_len < 0) { 9253 err = -ntfs_name_len; 9254 if (err == ENAMETOOLONG) 9255 ntfs_debug("Failed (name is too long)."); 9256 else 9257 ntfs_error(vol->mp, "Failed to convert name to " 9258 "Unicode (error %d).", err); 9259 goto err; 9260 } 9261 /* 9262 * If this is one of the SFM named streams, skip it, as they 9263 * contain effectively metadata information so should not be 9264 * exposed directly. 9265 */ 9266 if (ntfs_is_sfm_name(vol, ntfs_name, ntfs_name_len)) { 9267 ntfs_debug("Not allowing access to protected SFM name " 9268 "(returning EINVAL)."); 9269 err = EINVAL; 9270 goto err; 9271 } 9272 } 9273 /* 9274 * We now have the name of the requested attribute in @ntfs_name and it 9275 * is @ntfs_name_len characters long and we have verified that the 9276 * start offset is zero (unless this is the resource fork in which case 9277 * a non-zero start offset is fine). 9278 * 9279 * Get the ntfs attribute inode of the $DATA:@ntfs_name attribute 9280 * (unless XATTR_CREATE is specified in @options) and if it does not 9281 * exist create it first (unless XATTR_REPLACE is specified in 9282 * @options). 9283 */ 9284 err = ntfs_attr_inode_get_or_create(ni, AT_DATA, ntfs_name, 9285 ntfs_name_len, FALSE, FALSE, options, 9286 LCK_RW_TYPE_EXCLUSIVE, &ani); 9287 if (err) { 9288 if (err == ENOENT) 9289 err = ENOATTR; 9290 else if (err != ENOATTR && err != EEXIST) 9291 ntfs_error(vol->mp, "Failed to get or create $DATA/%s " 9292 "attribute inode mft_no 0x%llx (error " 9293 "%d).", name, 9294 (unsigned long long)ni->mft_no, err); 9295 goto err; 9296 } 9297 /* 9298 * TODO: This check may no longer be necessary now that we lock against 9299 * changes in initialized size and thus truncation... Revisit this 9300 * issue when the write code has been written and remove the check if 9301 * appropriate simply using ubc_getsize(ni->vn); without the size_lock. 9302 */ 9303 lck_spin_lock(&ani->size_lock); 9304 size = ubc_getsize(ani->vn); 9305 if (size > ani->data_size) 9306 size = ani->data_size; 9307 lck_spin_unlock(&ani->size_lock); 9308 /* 9309 * Perform the actual write to the attribute inode. We pass in IO_UNIT 9310 * as we want an atomic i/o operation. 9311 * 9312 * FIXME: ntfs_write() does not always honour the IO_UNIT flag so we 9313 * still have to test for partial writes. 9314 */ 9315 err = ntfs_write(ani, uio, IO_UNIT, TRUE); 9316 /* 9317 * If the write was successful, need to shrink the attribute if the new 9318 * size is smaller than the old size. 9319 * 9320 * If the write was partial or failed, reset @uio pretending that the 9321 * write never happened. This is because extended attribute i/o is 9322 * meant to be atomic, i.e. either we get it all or we do not get 9323 * anything. 9324 * 9325 * In the partial/failed case, if @options specifies XATTR_REPLACE we 9326 * know the extended attribute existed already thus we truncate it to 9327 * zero size to simulate that the old value has been replaced. And if 9328 * @options specifies XATTR_CREATE we know we created the extended 9329 * attribute thus we delete it again. And if @options does not specify 9330 * XATTR_REPLACE nor XATTR_CREATE then we do not know whether we 9331 * created it or not and in this case we assume the caller does not 9332 * care so we delete it to conserve disk space. 9333 */ 9334 if (!err && !uio_resid(uio)) { 9335 /* 9336 * Shrink the attribute if the new value is smaller than the 9337 * old value. We do not do this for the resource fork as that 9338 * is a special case. 9339 */ 9340 if (ntfs_name != NTFS_SFM_RESOURCEFORK_NAME) { 9341 if (size > start_count) { 9342 err = ntfs_attr_resize(ani, start_count, 0, 9343 NULL); 9344 if (err) { 9345 ntfs_error(vol->mp, "Failed to resize " 9346 "extended attribute " 9347 "to its new size " 9348 "(error %d).", err); 9349 goto undo_err; 9350 } 9351 } 9352 } 9353 } else { 9354 /* 9355 * FIXME: Should we be trying to continue a partial write in 9356 * case we can complete it with multiple calls to ntfs_write()? 9357 */ 9358 if (!err) { 9359 ntfs_debug("ntfs_write() returned a partial write, " 9360 "pretending the write never happened " 9361 "and removing or truncating to zero " 9362 "size the old attribute value."); 9363 err = EIO; 9364 } 9365undo_err: 9366 uio_setoffset(uio, start_ofs); 9367 uio_setresid(uio, start_count); 9368 if (options & XATTR_REPLACE) { 9369 errno_t err2; 9370 9371 err2 = ntfs_attr_resize(ani, 0, 0, NULL); 9372 if (err2) { 9373 ntfs_error(vol->mp, "Failed to truncate " 9374 "extended attribute to zero " 9375 "size in error code path " 9376 "(error %d), attempting to " 9377 "delete it instead.", err2); 9378 goto rm_err; 9379 } 9380 } else { 9381rm_err: 9382 /* 9383 * Unlink the named stream. The last close will cause 9384 * the VFS to call ntfs_vnop_inactive() which will do 9385 * the actual removal. 9386 */ 9387 ani->link_count = 0; 9388 /* 9389 * Update the last_mft_change_time (ctime) in the inode 9390 * as named stream/extended attribute semantics expect 9391 * on OS X. 9392 */ 9393 ni->last_mft_change_time = ntfs_utc_current_time(); 9394 NInoSetDirtyTimes(ni); 9395 /* 9396 * If this is not a directory or it is an encrypted 9397 * directory, set the needs archiving bit except for 9398 * the core system files. 9399 */ 9400 if (!S_ISDIR(ni->mode) || NInoEncrypted(ni)) { 9401 BOOL need_set_archive_bit = TRUE; 9402 if (ni->vol->major_ver >= 2) { 9403 if (ni->mft_no <= FILE_Extend) 9404 need_set_archive_bit = FALSE; 9405 } else { 9406 if (ni->mft_no <= FILE_UpCase) 9407 need_set_archive_bit = FALSE; 9408 } 9409 if (need_set_archive_bit) { 9410 ni->file_attributes |= 9411 FILE_ATTR_ARCHIVE; 9412 NInoSetDirtyFileAttributes(ni); 9413 } 9414 } 9415 } 9416 } 9417 lck_rw_unlock_exclusive(&ani->lock); 9418 (void)vnode_put(ani->vn); 9419err: 9420 lck_rw_unlock_exclusive(&ni->lock); 9421 ntfs_debug("Done (error %d).", err); 9422 return err; 9423} 9424 9425/** 9426 * ntfs_vnop_removexattr - remove an extended attribute from an ntfs inode 9427 * @a: arguments to removexattr function 9428 * 9429 * @a contains: 9430 * vnode_t a_vp; vnode whose extended attribute to remove 9431 * char *a_name; name of extented attribute to remove in utf8 9432 * int a_options; flags controlling how the attribute is removed 9433 * vfs_context_t a_context; 9434 * 9435 * Remove the named stream with the name @a->a_name (we map named streams 1:1 9436 * with extended attributes for NTFS as the NTFS native EAs are useless) from 9437 * the vnode @a->a_vp. 9438 * 9439 * The flags in @a->a_options control how the attribute is set. The following 9440 * flags are currently defined in OS X kernel: 9441 * XATTR_NOFOLLOW - Do not follow symbolic links. 9442 * XATTR_CREATE - Set the value, fail if already exists (setxattr only). 9443 * XATTR_REPLACE - Set the value, fail if does not exist (setxattr only). 9444 * XATTR_NOSECURITY- Bypass authorization checking. 9445 * XATTR_NODEFAULT - Bypass default extended attribute file ('._' file). 9446 * 9447 * Return 0 on success and errno on error. 9448 */ 9449static int ntfs_vnop_removexattr(struct vnop_removexattr_args *a) 9450{ 9451 ntfs_inode *ani, *ni = NTFS_I(a->a_vp); 9452 const char *name = a->a_name; 9453 ntfs_volume *vol; 9454 ntfschar *ntfs_name; 9455 size_t ntfs_name_size; 9456 signed ntfs_name_len; 9457 errno_t err; 9458 ntfschar ntfs_name_buf[NTFS_MAX_ATTR_NAME_LEN]; 9459 9460 if (!ni) { 9461 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 9462 return EINVAL; 9463 } 9464 vol = ni->vol; 9465 /* Check for invalid names. */ 9466 if (!name || name[0] == '\0') 9467 return EINVAL; 9468 ntfs_debug("Entering for mft_no 0x%llx, extended attribute name %s, " 9469 "options 0x%x.", (unsigned long long)ni->mft_no, name, 9470 a->a_options); 9471 /* 9472 * Access to extended attributes must be atomic which we ensure by 9473 * locking the base ntfs inode for writing. 9474 */ 9475 lck_rw_lock_exclusive(&ni->lock); 9476 /* Do not allow messing with the inode once it has been deleted. */ 9477 if (NInoDeleted(ni)) { 9478 /* Remove the inode from the name cache. */ 9479 cache_purge(ni->vn); 9480 ntfs_debug("Mft_no 0x%llx is deleted.", 9481 (unsigned long long)ni->mft_no); 9482 err = ENOENT; 9483 goto err; 9484 } 9485 /* 9486 * Only regular files, directories, and symbolic links can have 9487 * extended attributes. (Specifically named streams cannot have them.) 9488 * 9489 * Thus the check is for attribute inodes as all base inodes are 9490 * allowed. Raw inodes are also attribute inodes so they are excluded 9491 * automatically, too. 9492 */ 9493 if (NInoAttr(ni)) { 9494 ntfs_debug("Mft_no 0x%llx is an attribute inode.", 9495 (unsigned long long)ni->mft_no); 9496 err = EPERM; 9497 goto err; 9498 } 9499 /* 9500 * First of all deal with requests to remove the Finder info as that is 9501 * special because we cache it in the base ntfs inode @ni thus we need 9502 * to zero the cached Finder info and then write the changes out to the 9503 * AFP_AfpInfo attribute (deleting it if it is no longer needed). This 9504 * is sufficient as a zero Finder info is treated the same as 9505 * non-existent Finder info and vice versa. 9506 * 9507 * Note if the Finder info is already zero it does not exist thus we 9508 * need to return ENOATTR instead thus we may need to load the Finder 9509 * info first to find out whether it is zero or not. 9510 * 9511 * FIXME: This comparison is case sensitive. 9512 */ 9513 if (!bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME))) { 9514 FINDER_INFO fi; 9515 9516 if (!NInoValidFinderInfo(ni)) { 9517 /* 9518 * Load the AFP_AfpInfo stream and initialize the 9519 * backup time and Finder info (at least the Finder 9520 * info is not yet valid). 9521 */ 9522 err = ntfs_inode_afpinfo_read(ni); 9523 if (err) { 9524 ntfs_error(vol->mp, "Failed to obtain AfpInfo " 9525 "for mft_no 0x%llx (error %d).", 9526 (unsigned long long)ni->mft_no, 9527 err); 9528 goto err; 9529 } 9530 if (!NInoValidFinderInfo(ni)) 9531 panic("%s(): !NInoValidFinderInfo(ni)\n", 9532 __FUNCTION__); 9533 } 9534 /* 9535 * Make a copy of the Finder info and mask out the hidden bit 9536 * if this is the root directory and the type and creator if 9537 * this is a symbolic link. 9538 */ 9539 memcpy(&fi, &ni->finder_info, sizeof(fi)); 9540 if (ni == vol->root_ni) 9541 fi.attrs &= ~FINDER_ATTR_IS_HIDDEN; 9542 if (S_ISLNK(ni->mode)) { 9543 fi.type = 0; 9544 fi.creator = 0; 9545 } 9546 if (!bcmp(&fi, &ntfs_empty_finder_info, sizeof(fi))) { 9547 /* Finder info is zero, i.e. it does not exist. */ 9548 ntfs_debug("Mft_no 0x%llx has zero Finder info, " 9549 "returning ENOATTR.", 9550 (unsigned long long)ni->mft_no); 9551 err = ENOATTR; 9552 goto err; 9553 } 9554 /* Zero the Finder info. */ 9555 bzero(&ni->finder_info, sizeof(ni->finder_info)); 9556 /* 9557 * If the file is hidden, we need to reflect this fact in the 9558 * Finder info, too. 9559 */ 9560 if (ni->file_attributes & FILE_ATTR_HIDDEN) 9561 ni->finder_info.attrs |= FINDER_ATTR_IS_HIDDEN; 9562 /* 9563 * Also, enforce the type and creator if this is a symbolic 9564 * link to be our private values for symbolic links. This in 9565 * fact causes the Finder info not to be deleted on disk and we 9566 * cannot allow that to happen as we would then no longer know 9567 * that this is a symbolic link. 9568 */ 9569 if (S_ISLNK(ni->mode)) { 9570 ni->finder_info.type = FINDER_TYPE_SYMBOLIC_LINK; 9571 ni->finder_info.creator = FINDER_CREATOR_SYMBOLIC_LINK; 9572 } 9573 NInoSetValidFinderInfo(ni); 9574 NInoSetDirtyFinderInfo(ni); 9575 /* 9576 * Updating the Finder info causes both the 9577 * last_data_change_time (mtime) and last_mft_change_time 9578 * (ctime) to be updated. 9579 */ 9580 ni->last_mft_change_time = ni->last_data_change_time = 9581 ntfs_utc_current_time(); 9582 NInoSetDirtyTimes(ni); 9583 /* Now write (if needed deleting) the AFP_AfpInfo attribute. */ 9584 err = ntfs_inode_afpinfo_write(ni); 9585 if (!err) 9586 ntfs_debug("Deleted Finder info from mft_no 0x%llx.", 9587 (unsigned long long)ni->mft_no); 9588 else 9589 ntfs_error(vol->mp, "Failed to write/delete " 9590 "AFP_AfpInfo attribute in inode " 9591 "0x%llx (error %d).", 9592 (unsigned long long)ni->mft_no, err); 9593 goto err; 9594 } 9595 /* 9596 * Now deal with requests to remove the resource fork as that is 9597 * special because we need to translate its name from 9598 * XATTR_RESOURCEFORK_NAME to AFP_Resource so we do not need to convert 9599 * the utf8 name @name to Unicode. 9600 * 9601 * FIXME: This comparison is case sensitive. 9602 */ 9603 if (!bcmp(name, XATTR_RESOURCEFORK_NAME, 9604 sizeof(XATTR_RESOURCEFORK_NAME))) { 9605 ntfs_name = NTFS_SFM_RESOURCEFORK_NAME; 9606 ntfs_name_len = 12; 9607 } else { 9608 /* 9609 * The request is not for the resource fork (nor for the Finder 9610 * info). 9611 * 9612 * Convert the requested name from utf8 to Unicode. 9613 */ 9614 ntfs_name = ntfs_name_buf; 9615 ntfs_name_size = sizeof(ntfs_name_buf); 9616 ntfs_name_len = utf8_to_ntfs(vol, (const u8*)name, strlen(name), 9617 &ntfs_name, &ntfs_name_size); 9618 if (ntfs_name_len < 0) { 9619 err = -ntfs_name_len; 9620 if (err == ENAMETOOLONG) 9621 ntfs_debug("Failed (name is too long)."); 9622 else 9623 ntfs_error(vol->mp, "Failed to convert name to " 9624 "Unicode (error %d).", err); 9625 goto err; 9626 } 9627 /* 9628 * If this is one of the SFM named streams, skip it, as they 9629 * contain effectively metadata information so should not be 9630 * exposed directly. 9631 */ 9632 if (ntfs_is_sfm_name(vol, ntfs_name, ntfs_name_len)) { 9633 ntfs_debug("Not allowing access to protected SFM name " 9634 "%s in mft_no 0x%llx (returning " 9635 "EINVAL).", name, 9636 (unsigned long long)ni->mft_no); 9637 err = EINVAL; 9638 goto err; 9639 } 9640 } 9641 /* 9642 * We now have the name of the requested attribute in @ntfs_name and it 9643 * is @ntfs_name_len characters long. 9644 * 9645 * Get the ntfs attribute inode of the $DATA:@ntfs_name attribute. 9646 */ 9647 err = ntfs_attr_inode_get(ni, AT_DATA, ntfs_name, ntfs_name_len, FALSE, 9648 LCK_RW_TYPE_EXCLUSIVE, &ani); 9649 if (err) { 9650 if (err == ENOENT) 9651 err = ENOATTR; 9652 else if (err != ENOATTR) 9653 ntfs_error(vol->mp, "Failed to get $DATA/%s attribute " 9654 "inode mft_no 0x%llx (error %d).", 9655 name, (unsigned long long)ni->mft_no, 9656 err); 9657 goto err; 9658 } 9659 /* 9660 * Unlink the named stream. The last close will cause the VFS to call 9661 * ntfs_vnop_inactive() which will do the actual removal. 9662 */ 9663 ani->link_count = 0; 9664 /* 9665 * Update the last_mft_change_time (ctime) in the inode as named 9666 * stream/extended attribute semantics expect on OS X. 9667 */ 9668 ni->last_mft_change_time = ntfs_utc_current_time(); 9669 NInoSetDirtyTimes(ni); 9670 /* 9671 * If this is not a directory or it is an encrypted directory, set the 9672 * needs archiving bit except for the core system files. 9673 */ 9674 if (!S_ISDIR(ni->mode) || NInoEncrypted(ni)) { 9675 BOOL need_set_archive_bit = TRUE; 9676 if (ni->vol->major_ver >= 2) { 9677 if (ni->mft_no <= FILE_Extend) 9678 need_set_archive_bit = FALSE; 9679 } else { 9680 if (ni->mft_no <= FILE_UpCase) 9681 need_set_archive_bit = FALSE; 9682 } 9683 if (need_set_archive_bit) { 9684 ni->file_attributes |= FILE_ATTR_ARCHIVE; 9685 NInoSetDirtyFileAttributes(ni); 9686 } 9687 } 9688 ntfs_debug("Done."); 9689 lck_rw_unlock_exclusive(&ani->lock); 9690 (void)vnode_put(ani->vn); 9691err: 9692 lck_rw_unlock_exclusive(&ni->lock); 9693 return err; 9694} 9695 9696/** 9697 * ntfs_vnop_listxattr - list the names of the extended attributes of an inode 9698 * @args: arguments to listxattr function 9699 * 9700 * @args contains: 9701 * vnode_t a_vp; vnode whose extended attributes to list 9702 * uio_t a_uio; destination in which to return the list 9703 * size_t *a_size; size of the list of extended attributes in bytes 9704 * int a_options; flags controlling how the attribute list is generated 9705 * vfs_context_t a_context; 9706 * 9707 * Iterate over the list of named streams (which we map 1:1 with extended 9708 * attributes for NTFS as the NTFS native EAs are useless) in the vnode 9709 * @args->a_vp and for each encountered stream copy its name (converted to an 9710 * NULL-terminated utf8 string) to the destination as specified by 9711 * @args->a_uio. 9712 * 9713 * If @args->a_uio is NULL, do not copy anything and simply iterate over all 9714 * named streams and add up the number of bytes needed to create a full list of 9715 * their names and return that in *@args->a_size. Note that when @args->a_uio 9716 * is not NULL @args->a_size is ignored as the number of bytes is implicitly 9717 * returned in the @args->a_uio and it can be obtained by taking the original 9718 * buffer size and subtracting uio_resid(@args->a_uio) from it. 9719 * 9720 * The flags in @args->a_options control how the attribute list is generated. 9721 * The following flags are currently defined in OS X kernel: 9722 * XATTR_NOFOLLOW - Do not follow symbolic links. 9723 * XATTR_CREATE - Set the value, fail if already exists (setxattr only). 9724 * XATTR_REPLACE - Set the value, fail if does not exist (setxattr only). 9725 * XATTR_NOSECURITY- Bypass authorization checking. 9726 * XATTR_NODEFAULT - Bypass default extended attribute file ('._' file). 9727 * 9728 * Return 0 on success and errno on error. 9729 */ 9730static int ntfs_vnop_listxattr(struct vnop_listxattr_args *args) 9731{ 9732 ntfs_inode *ni = NTFS_I(args->a_vp); 9733 uio_t uio = args->a_uio; 9734 ntfs_volume *vol; 9735 MFT_RECORD *m; 9736 ntfs_attr_search_ctx *ctx; 9737 u8 *utf8_name; 9738 ntfschar *upcase; 9739 unsigned upcase_len; 9740 size_t size, utf8_size; 9741 errno_t err; 9742 BOOL case_sensitive; 9743 FINDER_INFO fi; 9744 9745 if (!ni) { 9746 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 9747 return EINVAL; 9748 } 9749 vol = ni->vol; 9750 upcase = vol->upcase; 9751 upcase_len = vol->upcase_len; 9752 case_sensitive = NVolCaseSensitive(vol); 9753 ntfs_debug("Entering."); 9754 lck_rw_lock_shared(&ni->lock); 9755 /* Do not allow messing with the inode once it has been deleted. */ 9756 if (NInoDeleted(ni)) { 9757 /* Remove the inode from the name cache. */ 9758 cache_purge(ni->vn); 9759 ntfs_debug("Mft_no 0x%llx is deleted.", 9760 (unsigned long long)ni->mft_no); 9761 err = ENOENT; 9762 goto err; 9763 } 9764 /* 9765 * Only regular files, directories, and symbolic links can have 9766 * extended attributes. (Specifically named streams cannot have them.) 9767 * 9768 * Thus the check is for attribute inodes as all base inodes are 9769 * allowed. Raw inodes are also attribute inodes so they are excluded 9770 * automatically, too. 9771 */ 9772 if (NInoAttr(ni)) { 9773 ntfs_debug("Mft_no 0x%llx is an attribute inode.", 9774 (unsigned long long)ni->mft_no); 9775 err = EPERM; 9776 goto err; 9777 } 9778 size = 0; 9779 /* 9780 * First of all deal with the Finder info as that is special because we 9781 * cache it in the base ntfs inode @ni and we only want to export the 9782 * name for the Finder info, XATTR_FINDERINFO_NAME, if the Finder info 9783 * is non-zero. This is what HFS does, too. 9784 * 9785 * Thus we need to check the status of the cache in the ntfs inode 9786 * first and if that it valid we can use it to check the content of the 9787 * Finder info for being zero. And if it is not valid then it must be 9788 * non-resident in which case we need to read it into the cache in the 9789 * ntfs inode and then we can check the Finder info in the cache for 9790 * being zero. In fact we do this the other way round, i.e. if the 9791 * Finder info cache is not valid we read the Finder info into the 9792 * cache first and then the cache is definitely valid thus we can check 9793 * the Finder info for being non-zero and export XATTR_FINDERINFO_NAME 9794 * if so. 9795 */ 9796 if (!NInoValidFinderInfo(ni)) { 9797 if (!lck_rw_lock_shared_to_exclusive(&ni->lock)) { 9798 lck_rw_lock_exclusive(&ni->lock); 9799 if (NInoDeleted(ni)) { 9800 cache_purge(ni->vn); 9801 lck_rw_unlock_exclusive(&ni->lock); 9802 ntfs_debug("Mft_no 0x%llx is deleted.", 9803 (unsigned long long)ni->mft_no); 9804 return ENOENT; 9805 } 9806 } 9807 /* 9808 * Load the AFP_AfpInfo stream and initialize the backup time 9809 * and Finder info (if they are not already valid). 9810 */ 9811 err = ntfs_inode_afpinfo_read(ni); 9812 if (err) { 9813 ntfs_error(vol->mp, "Failed to obtain AfpInfo for " 9814 "mft_no 0x%llx (error %d).", 9815 (unsigned long long)ni->mft_no, err); 9816 lck_rw_unlock_exclusive(&ni->lock); 9817 return err; 9818 } 9819 if (!NInoValidFinderInfo(ni)) 9820 panic("%s(): !NInoValidFinderInfo(ni)\n", __FUNCTION__); 9821 lck_rw_lock_exclusive_to_shared(&ni->lock); 9822 } 9823 /* 9824 * Make a copy of the Finder info and mask out the hidden bit if this 9825 * is the root directory and the type and creator if this is a symbolic 9826 * link. 9827 */ 9828 memcpy(&fi, &ni->finder_info, sizeof(fi)); 9829 if (ni == vol->root_ni) 9830 fi.attrs &= ~FINDER_ATTR_IS_HIDDEN; 9831 if (S_ISLNK(ni->mode)) { 9832 fi.type = 0; 9833 fi.creator = 0; 9834 } 9835 if (bcmp(&fi, &ntfs_empty_finder_info, sizeof(fi))) { 9836 if (!uio) 9837 size += sizeof(XATTR_FINDERINFO_NAME); 9838 else if (uio_resid(uio) < 9839 (user_ssize_t)sizeof(XATTR_FINDERINFO_NAME)) { 9840 err = ERANGE; 9841 goto err; 9842 } else { 9843 err = uiomove((caddr_t)XATTR_FINDERINFO_NAME, 9844 sizeof(XATTR_FINDERINFO_NAME), uio); 9845 if (err) { 9846 ntfs_error(vol->mp, "uiomove() failed (error " 9847 "%d).", err); 9848 goto err; 9849 } 9850 } 9851 ntfs_debug("Exporting Finder info name %s.", 9852 XATTR_FINDERINFO_NAME); 9853 } 9854 /* Iterate over all the named $DATA attributes. */ 9855 err = ntfs_mft_record_map(ni, &m); 9856 if (err) { 9857 ntfs_error(vol->mp, "Failed to map mft record (error %d).", 9858 err); 9859 goto err; 9860 } 9861 ctx = ntfs_attr_search_ctx_get(ni, m); 9862 if (!ctx) { 9863 ntfs_error(vol->mp, "Failed to allocate search context."); 9864 err = ENOMEM; 9865 goto unm_err; 9866 } 9867 /* 9868 * Allocate a buffer we can use when converting the names of the named 9869 * $DATA attributes to utf8. We want enough space to definitely be 9870 * able to convert the name as well as a byte for the NULL terminator. 9871 */ 9872 utf8_size = NTFS_MAX_ATTR_NAME_LEN * 4 + 1; 9873 utf8_name = OSMalloc(utf8_size, ntfs_malloc_tag); 9874 if (!utf8_name) { 9875 ntfs_error(vol->mp, "Failed to allocate name buffer."); 9876 err = ENOMEM; 9877 goto put_err; 9878 } 9879 do { 9880 ntfs_inode *ani; 9881 ATTR_RECORD *a; 9882 ntfschar *name; 9883 unsigned name_len; 9884 signed utf8_len; 9885 9886 /* Get the next $DATA attribute. */ 9887 err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, NULL, 0, ctx); 9888 if (err) { 9889 if (err == ENOENT) { 9890 err = 0; 9891 break; 9892 } 9893 ntfs_error(vol->mp, "Failed to iterate over named " 9894 "$DATA attributes (error %d).", err); 9895 goto free_err; 9896 } 9897 /* Got the next attribute, deal with it. */ 9898 a = ctx->a; 9899 /* If this is the unnamed $DATA attribute, skip it. */ 9900 if (!a->name_length) { 9901 ntfs_debug("Skipping unnamed $DATA attribute."); 9902 continue; 9903 } 9904 name = (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset)); 9905 name_len = a->name_length; 9906 if ((u8*)name < (u8*)a || (u8*)name + name_len > (u8*)a + 9907 le32_to_cpu(a->length)) { 9908 ntfs_error(vol->mp, "Found corrupt named $DATA " 9909 "attribute. Run chkdsk."); 9910 NVolSetErrors(vol); 9911 err = EIO; 9912 goto free_err; 9913 } 9914 /* 9915 * Check if this attribute currently has a cached inode/vnode 9916 * and if so check if it has been unlinked/deleted and if so 9917 * skip it. 9918 */ 9919 err = ntfs_attr_inode_lookup(ni, a->type, name, name_len, 9920 FALSE, &ani); 9921 if (err != ENOENT) { 9922 BOOL skip_it; 9923 9924 if (err) 9925 panic("%s() inode lookup failed (error %d).\n", 9926 __FUNCTION__, err); 9927 /* Got the cached attribute inode. */ 9928 skip_it = FALSE; 9929 if (NInoDeleted(ani) || !ani->link_count || 9930 (ntfs_are_names_equal(name, name_len, 9931 NTFS_SFM_RESOURCEFORK_NAME, 12, 9932 case_sensitive, upcase, upcase_len) && 9933 !ubc_getsize(ani->vn))) 9934 skip_it = TRUE; 9935 if (skip_it) { 9936 if (NInoDeleted(ani) || !ani->link_count) 9937 ntfs_debug("Skipping deleted/unlinked " 9938 "attribute."); 9939 else 9940 ntfs_debug("Mft_no 0x%llx has zero " 9941 "size resource fork, " 9942 "pretending it does " 9943 "not exist.", 9944 (unsigned long long) 9945 ani->mft_no); 9946 (void)vnode_put(ani->vn); 9947 continue; 9948 } 9949 (void)vnode_put(ani->vn); 9950 } 9951 /* 9952 * If AFP_Resource named stream exists, i.e. the resource fork 9953 * is present, and it is non-empty export the name 9954 * XATTR_RESOURCEFORK_NAME. This is what HFS does, too. 9955 */ 9956 if (ntfs_are_names_equal(name, name_len, 9957 NTFS_SFM_RESOURCEFORK_NAME, 12, case_sensitive, 9958 upcase, upcase_len)) { 9959 if (!ntfs_attr_size(a)) { 9960 ntfs_debug("Skipping empty resource fork " 9961 "name %s.", 9962 XATTR_RESOURCEFORK_NAME); 9963 continue; 9964 } 9965 if (!uio) 9966 size += sizeof(XATTR_RESOURCEFORK_NAME); 9967 else if (uio_resid(uio) < (user_ssize_t)sizeof( 9968 XATTR_RESOURCEFORK_NAME)) { 9969 err = ERANGE; 9970 goto free_err; 9971 } else { 9972 err = uiomove((caddr_t)XATTR_RESOURCEFORK_NAME, 9973 sizeof(XATTR_RESOURCEFORK_NAME), 9974 uio); 9975 if (err) { 9976 ntfs_error(vol->mp, "uiomove() failed " 9977 "(error %d).", err); 9978 goto free_err; 9979 } 9980 } 9981 ntfs_debug("Exporting resource fork name %s.", 9982 XATTR_RESOURCEFORK_NAME); 9983 continue; 9984 } 9985 /* 9986 * If this is one of the SFM named streams, skip it, as they 9987 * contain effectively metadata information so should not be 9988 * exposed directly. 9989 */ 9990 if (ntfs_is_sfm_name(vol, name, name_len)) { 9991 ntfs_debug("Skipping protected SFM name."); 9992 continue; 9993 } 9994 /* Convert the name to utf8. */ 9995 utf8_len = ntfs_to_utf8(vol, name, name_len << 9996 NTFSCHAR_SIZE_SHIFT, &utf8_name, &utf8_size); 9997 if (utf8_len < 0) { 9998 ntfs_warning(vol->mp, "Skipping unrepresentable name " 9999 "in mft_no 0x%llx (error %d).", 10000 (unsigned long long)ni->mft_no, 10001 -utf8_len); 10002 continue; 10003 } 10004 /* 10005 * If this is a protected attribute, skip it. 10006 * 10007 * FIXME: xattr_protected() is case sensitive so it does not 10008 * exclude protected attributes when they are not correctly 10009 * cased on disk. 10010 * 10011 * However we do call it to be consistent with HFS and SMB but 10012 * it is pointless as anyone can call getxattr() for a case 10013 * variant and the getxattr() system call would use 10014 * xattr_protected() which would not filter it out so the 10015 * VNOP_GETXATTR() call would happen and we would return the 10016 * attribute just fine. Simillarly anyone could set and remove 10017 * such "protected" attributes by just calling the system call 10018 * with a case variant even when they are correctly filtered 10019 * out here. 10020 */ 10021 if (xattr_protected((char*)utf8_name)) { 10022 ntfs_debug("Skipping protected name %.*s.", utf8_len, 10023 utf8_name); 10024 continue; 10025 } 10026 /* 10027 * Increment the length of the name by one for the NULL 10028 * terminator. 10029 */ 10030 utf8_len++; 10031 /* Export the utf8_name. */ 10032 if (!uio) 10033 size += utf8_len; 10034 else if (uio_resid(uio) < utf8_len) { 10035 err = ERANGE; 10036 goto free_err; 10037 } else { 10038 err = uiomove((caddr_t)utf8_name, utf8_len, uio); 10039 if (err) { 10040 ntfs_error(vol->mp, "uiomove() failed (error " 10041 "%d).", err); 10042 goto free_err; 10043 } 10044 } 10045 ntfs_debug("Exporting name %.*s.", utf8_len, utf8_name); 10046 /* Continue to the next name. */ 10047 } while (1); 10048 if (!uio) 10049 *args->a_size = size; 10050 ntfs_debug("Done."); 10051free_err: 10052 OSFree(utf8_name, utf8_size, ntfs_malloc_tag); 10053put_err: 10054 ntfs_attr_search_ctx_put(ctx); 10055unm_err: 10056 ntfs_mft_record_unmap(ni); 10057err: 10058 lck_rw_unlock_shared(&ni->lock); 10059 return err; 10060} 10061 10062/** 10063 * ntfs_vnop_blktooff - map a logical block number to its byte offset 10064 * @a: arguments to blktooff function 10065 * 10066 * @a contains: 10067 * vnode_t a_vp; vnode to which the logical block number belongs 10068 * daddr64_t a_lblkno; logical block number to map 10069 * off_t *a_offset; destination for returning the result 10070 * 10071 * Map the logical block number @a->a_lblkno belonging to the vnode @a->a_vp to 10072 * the corresponding byte offset, i.e. the offset in the vnode in bytes and 10073 * return the result in @a->a_offset. 10074 * 10075 * Return 0 on success and EINVAL if no vnode was specified in @a->a_vp. 10076 */ 10077static int ntfs_vnop_blktooff(struct vnop_blktooff_args *a) 10078{ 10079 ntfs_inode *ni; 10080 ntfs_volume *vol; 10081 unsigned block_size_shift; 10082 10083 if (!a->a_vp) { 10084 ntfs_warning(NULL, "Called with NULL vnode!"); 10085 return EINVAL; 10086 } 10087 ni = NTFS_I(a->a_vp); 10088 if (!ni) { 10089 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10090 return EINVAL; 10091 } 10092 if (S_ISDIR(ni->mode)) { 10093 ntfs_error(ni->vol->mp, "Called for directory vnode."); 10094 return EINVAL; 10095 } 10096 ntfs_debug("Entering for logical block 0x%llx, mft_no 0x%llx, type " 10097 "0x%x, name_len 0x%x.", (unsigned long long)a->a_lblkno, 10098 (unsigned long long)ni->mft_no, le32_to_cpu(ni->type), 10099 (unsigned)ni->name_len); 10100 vol = ni->vol; 10101 block_size_shift = PAGE_SHIFT; 10102 /* 10103 * For $MFT/$DATA and $MFTMirr/$DATA the logical block number is the 10104 * mft record number and the block size is the mft record size which is 10105 * also in @ni->block_size{,_shift}. 10106 */ 10107 if (ni == vol->mft_ni || ni == vol->mftmirr_ni) 10108 block_size_shift = ni->block_size_shift; 10109 *a->a_offset = a->a_lblkno << block_size_shift; 10110 ntfs_debug("Done (byte offset 0x%llx).", 10111 (unsigned long long)*a->a_offset); 10112 return 0; 10113} 10114 10115/** 10116 * ntfs_vnop_offtoblk - map a byte offset to its logical block number 10117 * @a: arguments to offtoblk function 10118 * 10119 * @a contains: 10120 * vnode_t a_vp; vnode to which the byte offset belongs 10121 * off_t a_offset; byte offset to map 10122 * daddr64_t *a_lblkno; destination for returning the result 10123 * 10124 * Map the byte offset @a->a_offset belonging to the vnode @a->a_vp to the 10125 * corresponding logical block number, i.e. the offset in the vnode in units of 10126 * the vnode block size and return the result in @a->a_lblkno. 10127 * 10128 * Return 0 on success and EINVAL if no vnode was specified in @a->a_vp. 10129 */ 10130static int ntfs_vnop_offtoblk(struct vnop_offtoblk_args *a) 10131{ 10132 ntfs_inode *ni; 10133 ntfs_volume *vol; 10134 unsigned block_size_shift; 10135 10136 if (!a->a_vp) { 10137 ntfs_warning(NULL, "Called with NULL vnode."); 10138 return EINVAL; 10139 } 10140 ni = NTFS_I(a->a_vp); 10141 if (!ni) { 10142 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10143 return EINVAL; 10144 } 10145 if (S_ISDIR(ni->mode)) { 10146 ntfs_error(ni->vol->mp, "Called for directory vnode."); 10147 return EINVAL; 10148 } 10149 ntfs_debug("Entering for byte offset 0x%llx, mft_no 0x%llx, type " 10150 "0x%x, name_len 0x%x.", (unsigned long long)a->a_offset, 10151 (unsigned long long)ni->mft_no, le32_to_cpu(ni->type), 10152 (unsigned)ni->name_len); 10153 vol = ni->vol; 10154 block_size_shift = PAGE_SHIFT; 10155 /* 10156 * For $MFT/$DATA and $MFTMirr/$DATA the logical block number is the 10157 * mft record number and the block size is the mft record size which is 10158 * also in @ni->block_size{,_shift}. 10159 */ 10160 if (ni == vol->mft_ni || ni == vol->mftmirr_ni) 10161 block_size_shift = ni->block_size_shift; 10162 *a->a_lblkno = a->a_offset >> block_size_shift; 10163 ntfs_debug("Done (logical block 0x%llx).", 10164 (unsigned long long)*a->a_lblkno); 10165 return 0; 10166} 10167 10168/** 10169 * ntfs_vnop_blockmap - map a file offset to its physical block number 10170 * @a: arguments to blockmap function 10171 * 10172 * @a contains: 10173 * vnode_t a_vp; vnode to which the byte offset belongs 10174 * off_t a_foffset; starting byte offset to map 10175 * size_t a_size; number of bytes to map starting at @a_foffset 10176 * daddr64_t *a_bpn; destination for starting physical block number 10177 * size_t *a_run; destination for contiguous bytes from @a_bpn 10178 * void *a_poff; physical offset into @a_bpn 10179 * int a_flags; reason for map (VNODE_READ, VNODE_WRITE, or 0) 10180 * vfs_context_t a_context; 10181 * 10182 * Map @a->a_size bytes starting at the file offset @a->a_foffset to the 10183 * corresponding physical block number and return the result in @a->a_bpn 10184 * (starting block number), @a->a_run (number of contiguous bytes starting at 10185 * @a->a_bpn), and @a->a_poff (byte offset into @a->a_bpn corresponding to the 10186 * file offset @a->a_foffset, this will be zero if @a_foffset is block aligned 10187 * and non-zero otherwise). 10188 * 10189 * FIXME: At present the OS X kernel completely ignores @a->a_poff and in fact 10190 * it is always either NULL on entry or the returned value is ignored. Thus, 10191 * for now, if @a->a_foffset is not aligned to the physical block size, we 10192 * always return error (EINVAL) unless @a->a_foffset equals the initialized 10193 * size in the ntfs inode in which case we return a block number of -1 in 10194 * @a->a_bpn thus alignment to the block and hence @a->a_poff are not relevant. 10195 * Thus we always return 0 in @a->a_poff. 10196 * 10197 * @a->a_flags is either VNODE_READ or VNODE_WRITE but can be 0 in certain call 10198 * paths such as the system call fcntl(F_LOG2PHYS) for example. 10199 * 10200 * Note, all the return pointers (@a->a_bpn, @a->a_run, @a->a_poff) are NULL in 10201 * some code paths in xnu (one or more of them at a time), thus all of them 10202 * need to be checked for being NULL before writing to them. If @a->a_bpn is 10203 * NULL then there is nothing to do and success is returned immediately. 10204 * 10205 * For ntfs mapping to physical blocks is special because some attributes do 10206 * not have block aligned data. This is the case for all resident attributes 10207 * as well as for all non-resident attributes which are compressed or 10208 * encrypted. For all of those it would be logical to return an error however 10209 * this leads to a kernel panic in current xnu because a buf_bread() can cause 10210 * ntfs_vnop_blockmap() to be called when an uptodate page is in memory but no 10211 * buffer is in memory. This can happen under memory pressure when the buffer 10212 * has been recycled for something else but the page has not been reused yet. 10213 * In that case ntfs_vnop_blockmap() is only called to recreate the physical 10214 * mapping of the buffer and is not actually used for anything as the data is 10215 * already present in the uptodate page. Thus, instead of returning error, we 10216 * set the physical block @a->a_bpn to equal the logical block corresponding to 10217 * the byte offset @a->a_foffset and return success. Doing this signals to the 10218 * VFS that the physical mapping cannot be cached in the buffer and all is 10219 * well. Note this call path always has a non-zero @a->a_flags whilst other 10220 * "weird" code paths like fcntl(F_LOG2PHYS) set @a->a_flags to zero, thus we 10221 * can do the above workaround when @a->a_flags is not zero and return error 10222 * EINVAL when @a->a_flags is zero. 10223 * 10224 * In the read case and when @a->a_flags is zero, if @a->a_foffset is beyond 10225 * the end of the attribute, return error ERANGE. HFS returns ERANGE in this 10226 * case so we follow suit. Although some other OS X file systems return EFBIG 10227 * and some E2BIG instead so it does not seem to be very standardized, so maybe 10228 * we should return the IMHO more correct "invalid seek" (ESPIPE), instead. (-; 10229 * 10230 * In the write case we need to allow the mapping of blocks beyond the end of 10231 * the attribute as we will already have extended the allocated size but not 10232 * yet the data size nor the initialized size. Thus in this case we only 10233 * return ERANGE if the requested @a->a_foffset is beyond the end of the 10234 * allocated size. 10235 * 10236 * Return 0 on success and errno on error. 10237 */ 10238static int ntfs_vnop_blockmap(struct vnop_blockmap_args *a) 10239{ 10240 const s64 byte_offset = a->a_foffset; 10241 const s64 byte_size = a->a_size; 10242 s64 max_size, data_size, init_size, clusters, bytes = 0; 10243 VCN vcn; 10244 LCN lcn; 10245 ntfs_inode *ni = NTFS_I(a->a_vp); 10246 ntfs_volume *vol; 10247 unsigned vcn_ofs; 10248 BOOL is_write = (a->a_flags & VNODE_WRITE); 10249 10250 if (!ni) { 10251 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10252 return EINVAL; 10253 } 10254 vol = ni->vol; 10255 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len 0x%x, " 10256 "offset 0x%llx, size 0x%llx, for %s operation.", 10257 (unsigned long long)ni->mft_no, 10258 (unsigned)le32_to_cpu(ni->type), 10259 (unsigned)ni->name_len, 10260 (unsigned long long)byte_offset, 10261 (unsigned long long)byte_size, 10262 a->a_flags ? (is_write ? "write" : "read") : 10263 "unspecified"); 10264 if (S_ISDIR(ni->mode)) { 10265 ntfs_error(vol->mp, "Called for directory vnode."); 10266 return EINVAL; 10267 } 10268 if (is_write && NVolReadOnly(vol)) { 10269 ntfs_warning(vol->mp, "Called for VNODE_WRITE but mount is " 10270 "read-only."); 10271 return EROFS; 10272 } 10273 if (!a->a_bpn) { 10274 ntfs_debug("Called with a_bpn == NULL, nothing to do. " 10275 "Returning success (0)."); 10276 return 0; 10277 } 10278 /* 10279 * We cannot take the inode lock as it may be held already so we just 10280 * check the deleted bit and abort if it is set which is better than 10281 * nothing. 10282 */ 10283 if (NInoDeleted(ni)) { 10284 /* Remove the inode from the name cache. */ 10285 cache_purge(ni->vn); 10286 ntfs_debug("Inode has been deleted."); 10287 return ENOENT; 10288 } 10289 /* 10290 * Note it does not matter if we are racing with truncate because that 10291 * will be detected during the runlist lookup below. 10292 */ 10293 lck_spin_lock(&ni->size_lock); 10294 if (is_write) 10295 max_size = ni->allocated_size; 10296 else 10297 max_size = ni->data_size; 10298 data_size = ni->data_size; 10299 init_size = ni->initialized_size; 10300 lck_spin_unlock(&ni->size_lock); 10301 if (byte_offset >= max_size) { 10302eof: 10303 ntfs_error(vol->mp, "Called for inode 0x%llx, size 0x%llx, " 10304 "byte offset 0x%llx, for %s operation, which " 10305 "is beyond the end of the inode %s size " 10306 "0x%llx. Returning error: ERANGE.", 10307 (unsigned long long)ni->mft_no, 10308 (unsigned long long)byte_size, 10309 (unsigned long long)byte_offset, a->a_flags ? 10310 (is_write ? "write" : "read") : "unspecified", 10311 is_write ? "allocated" : "data", 10312 (unsigned long long)max_size); 10313 return ERANGE; 10314 } 10315 if (byte_offset & vol->sector_size_mask && byte_offset != init_size) { 10316 ntfs_error(vol->mp, "Called for inode 0x%llx, byte offset " 10317 "0x%llx. This is not a multiple of the " 10318 "physical block size %u thus the mapping " 10319 "cannot be performed. Returning error: " 10320 "EINVAL.", (unsigned long long)ni->mft_no, 10321 (unsigned long long)byte_offset, 10322 (unsigned)vol->sector_size); 10323 return EINVAL; 10324 } 10325 /* 10326 * In the read case, if the requested byte offset is at or beyond the 10327 * initialized size simply return a hole. We already checked for being 10328 * at or beyond the data size so we know we are in an uninitialized 10329 * region in this case rather than at or beyond the end of the 10330 * attribute. 10331 */ 10332 if (!is_write && byte_offset >= init_size) { 10333 *a->a_bpn = -1; /* -1 means hole. */ 10334 /* 10335 * Set the size of the block to the number of uninitialized 10336 * bytes in the attribute starting at the requested byte offset 10337 * @a->a_foffset. 10338 */ 10339 bytes = data_size - byte_offset; 10340 goto done; 10341 } 10342 /* 10343 * Blockmap does not make sense for resident attributes and neither 10344 * does it make sense for non-resident, compressed or encrypted 10345 * attributes. The only special case is for directory inodes because 10346 * their flags are only defaults to be used when creating new files 10347 * rather than having any meaning for their actual data contents. 10348 */ 10349 if (!NInoNonResident(ni) || (ni->type != AT_INDEX_ALLOCATION && 10350 (NInoCompressed(ni) || NInoEncrypted(ni)) && 10351 !NInoRaw(ni))) { 10352 if (!a->a_flags) { 10353 ntfs_error(vol->mp, "Called for inode 0x%llx, which " 10354 "is resident, compressed, or " 10355 "encrypted and VNOP_BLOCKMAP() does " 10356 "not make sense for such inodes. " 10357 "Returning error: EINVAL.", 10358 (unsigned long long)ni->mft_no); 10359 return EINVAL; 10360 } 10361 *a->a_bpn = byte_offset >> PAGE_SHIFT; 10362 bytes = ni->block_size; 10363 ntfs_debug("Called for inode 0x%llx which is resident, " 10364 "compressed, or encrypted and VNOP_BLOCKMAP() " 10365 "does not make sense for such inodes. " 10366 "Returning success and setting physical == " 10367 "logical block number to signal to VFS that " 10368 "the mapping cannot be cached in the buffer.", 10369 (unsigned long long)ni->mft_no); 10370 goto done; 10371 } 10372 /* 10373 * All is ok, do the mapping. First, work out the vcn and vcn offset 10374 * corresponding to the @a->a_foffset. 10375 */ 10376 vcn = byte_offset >> vol->cluster_size_shift; 10377 vcn_ofs = (u32)byte_offset & vol->cluster_size_mask; 10378 /* 10379 * Convert the vcn to the corresponding lcn and obtain the number of 10380 * contiguous clusters starting at the vcn. 10381 */ 10382 lck_rw_lock_shared(&ni->rl.lock); 10383 lcn = ntfs_attr_vcn_to_lcn_nolock(ni, vcn, FALSE, 10384 a->a_run ? &clusters : 0); 10385 if (lcn < LCN_HOLE) { 10386 errno_t err; 10387 10388 /* Error: deal with it. */ 10389 lck_rw_unlock_shared(&ni->rl.lock); 10390 switch (lcn) { 10391 case LCN_ENOENT: 10392 /* 10393 * Raced with a concurrent truncate which caused the 10394 * byte offset @a->a_foffset to become outside the 10395 * attribute size. 10396 */ 10397 goto eof; 10398 case LCN_ENOMEM: 10399 ntfs_error(vol->mp, "Not enough memory to complete " 10400 "mapping for inode 0x%llx. " 10401 "Returning error: ENOMEM.", 10402 (unsigned long long)ni->mft_no); 10403 err = ENOMEM; 10404 break; 10405 default: 10406 ntfs_error(vol->mp, "Failed to complete mapping for " 10407 "inode 0x%llx. Run chkdsk. " 10408 "Returning error: EIO.", 10409 (unsigned long long)ni->mft_no); 10410 err = EIO; 10411 break; 10412 } 10413 return err; 10414 } 10415 if (lcn < 0) { 10416 /* 10417 * It is a hole, return it. If this is a VNODE_WRITE request, 10418 * output a warning as this should never happen. Both 10419 * VNOP_WRITE() and VNOP_PAGEOUT() should have instantiated the 10420 * hole before performing the write. 10421 * 10422 * Note we could potentially fill the hole here in the write 10423 * case. However this is quite hard to do as the caller will 10424 * likely have pages around the hole locked in UBC UPLs thus we 10425 * would have difficulties zeroing the surrounding regions when 10426 * the cluster size is larger than the page size. Also a 10427 * problem is what happens if the write fails for some reason 10428 * but we have instantiated the hole here and not zeroed it 10429 * completely (because we are expecting the write to go into 10430 * the allocated clusters). We would have no way of fixing up 10431 * in this case and we would end up exposing stale data. This 10432 * all is why we choose not to fill the hole here but to do it 10433 * in advance in ntfs_vnop_write() and ntfs_vnop_pageout(). 10434 * 10435 * The only thing that will happen when we return a hole in the 10436 * write case is that when the caller is cluster_io(), it will 10437 * page out page by page and this will fill the hole in pieces 10438 * which will degrade performance. 10439 */ 10440 if (is_write) 10441 ntfs_warning(vol->mp, "Returning hole but flags " 10442 "specify VNODE_WRITE. This causes " 10443 "very inefficient allocation and I/O " 10444 "patterns."); 10445 /* Return the hole. */ 10446 lck_rw_unlock_shared(&ni->rl.lock); 10447 *a->a_bpn = -1; /* -1 means hole. */ 10448 if (a->a_run) { 10449 bytes = (clusters << vol->cluster_size_shift) - vcn_ofs; 10450 /* 10451 * If the run overlaps the initialized size, extend the 10452 * run length so it goes up to the data size thus 10453 * merging the hole with the uninitialized region. 10454 * 10455 * Note, do not do this in the write case as we want to 10456 * return the real clusters even beyond the initialized 10457 * size as the initialized size will only be updated 10458 * after the write has completed. 10459 */ 10460 if (!is_write && byte_offset + bytes > init_size) 10461 bytes = data_size - byte_offset; 10462 } 10463 goto done; 10464 } else 10465 lck_rw_unlock_shared(&ni->rl.lock); 10466 /* The vcn was mapped successfully to a physical lcn, return it. */ 10467 *a->a_bpn = ((lcn << vol->cluster_size_shift) + vcn_ofs) >> 10468 vol->sector_size_shift; 10469 if (a->a_run) { 10470 bytes = (clusters << vol->cluster_size_shift) - vcn_ofs; 10471 /* 10472 * In the read case, if the run overlaps the initialized size, 10473 * truncate the run length so it only goes up to the 10474 * initialized size. The caller will then be able to access 10475 * this region on disk directly and will then call us again 10476 * with a byte offset equal to the initialized size and we will 10477 * then return the entire initialized region as a hole. Thus 10478 * the caller does not need to know about the fact that NTFS 10479 * has such a thing as the initialized_size. 10480 * 10481 * We already handled the case where the byte offset is beyond 10482 * the initialized size so no need to check for that here. 10483 * 10484 * However do not do this if the initialized size is equal to 10485 * the data size. The caller is responsible for not returning 10486 * data beyond the attribute size to user space. If this is 10487 * not done the last page of an attribute read is broken into 10488 * two separate i/os, one with a read and one with a hole. 10489 * cluster_io() will zero beyond the end of attribute in any 10490 * case so it is faster to do it with a single call. 10491 */ 10492 if (!is_write && byte_offset + bytes > init_size && 10493 init_size < data_size) 10494 bytes = init_size - byte_offset; 10495 } 10496done: 10497 if (a->a_run) { 10498 if (bytes > byte_size) 10499 bytes = byte_size; 10500 *a->a_run = bytes; 10501 } 10502 if (a->a_poff) 10503 *(int*)a->a_poff = 0; 10504 ntfs_debug("Done (a_bpn 0x%llx, a_run 0x%lx, a_poff 0x%x).", 10505 (unsigned long long)*a->a_bpn, 10506 a->a_run ? (unsigned long)*a->a_run : 0, 10507 a->a_poff ? *(int*)a->a_poff : 0); 10508 return 0; 10509} 10510 10511/** 10512 * ntfs_vnop_getnamedstream - find a named stream in an inode given its name 10513 * @a: arguments to getnamedstream function 10514 * 10515 * @a contains: 10516 * vnode_t a_vp; vnode containing the named stream 10517 * vnode_t *a_svpp; destination for the named stream vnode 10518 * const char *a_name; name of the named stream to get 10519 * enum nsoperation a_operation; reason for getnamedstream 10520 * int a_flags; flags describing the request 10521 * vfs_context_t a_context; 10522 * 10523 * Find the named stream with name @a->a_name in the vnode @a->a_vp and return 10524 * the vnode of the named stream in *@a->a_svpp if it was found. 10525 * 10526 * @a->a_operation specifies the reason for the lookup of the named stream. 10527 * The following operations are currently defined in OS X kernel: 10528 * NS_OPEN - Want to open the named stream for access. 10529 * NS_CREATE - Want to create the named stream so checking it does not 10530 * exist already. 10531 * NS_DELETE - Want to delete the named stream so making sure it exists. 10532 * 10533 * The flags in @a->a_flags further describe the getnamedstream request. At 10534 * present no flags are defined in OS X kernel. 10535 * 10536 * Note that at present Mac OS X only supports the "com.apple.ResourceFork" 10537 * stream so we follow suit. 10538 * 10539 * Return 0 on success and the error code on error. A return value of ENOATTR 10540 * does not signify an error as such but merely the fact that the named stream 10541 * @name is not present in the vnode @a->a_vp. 10542 */ 10543static int ntfs_vnop_getnamedstream(struct vnop_getnamedstream_args *a) 10544{ 10545 vnode_t vn = a->a_vp; 10546 ntfs_inode *sni, *ni = NTFS_I(vn); 10547 const char *name = a->a_name; 10548 int options; 10549 const enum nsoperation op = a->a_operation; 10550 errno_t err; 10551 10552 if (!ni) { 10553 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10554 return EINVAL; 10555 } 10556 ntfs_debug("Entering for mft_no 0x%llx, stream name %s, operation %s " 10557 "(0x%x), flags 0x%x.", (unsigned long long)ni->mft_no, 10558 name, op == NS_OPEN ? "NS_OPEN" : 10559 (op == NS_CREATE ? "NS_CREATE" : 10560 (op == NS_DELETE ? "NS_DELETE" : "unknown")), op, 10561 a->a_flags); 10562 /* 10563 * Mac OS X only supports the resource fork stream. 10564 * Note that this comparison is case sensitive. 10565 */ 10566 if (bcmp(name, XATTR_RESOURCEFORK_NAME, 10567 sizeof(XATTR_RESOURCEFORK_NAME))) { 10568 ntfs_warning(ni->vol->mp, "Unsupported named stream %s " 10569 "specified, only the resource fork named " 10570 "stream (%s) is supported at present. " 10571 "Returning ENOATTR.", name, 10572 XATTR_RESOURCEFORK_NAME); 10573 return ENOATTR; 10574 } 10575 /* Only regular files may have a resource fork stream. */ 10576 if (!S_ISREG(ni->mode)) { 10577 ntfs_warning(ni->vol->mp, "The resource fork may only be " 10578 "attached to regular files and mft_no 0x%llx " 10579 "is not a regular file. Returning EPERM.", 10580 (unsigned long long)ni->mft_no); 10581 return EPERM; 10582 } 10583 /* 10584 * Attempt to get the inode for the named stream. For the resource 10585 * fork we need to return it even if it is zero size if the caller has 10586 * specified @op == NS_OPEN so we set @options to zero in this case. 10587 * Otherwise we want to treat a zero size resource fork as a 10588 * non-existent resource fork se we set @options to XATTR_REPLACE which 10589 * is the behaviour of ntfs_attr_inode_get(). 10590 */ 10591 if (op == NS_OPEN) { 10592 options = 0; 10593 lck_rw_lock_exclusive(&ni->lock); 10594 } else { 10595 options = XATTR_REPLACE; 10596 lck_rw_lock_shared(&ni->lock); 10597 } 10598 /* Do not allow messing with the inode once it has been deleted. */ 10599 if (NInoDeleted(ni)) { 10600 /* Remove the inode from the name cache. */ 10601 cache_purge(vn); 10602 if (op == NS_OPEN) 10603 lck_rw_unlock_exclusive(&ni->lock); 10604 else 10605 lck_rw_unlock_shared(&ni->lock); 10606 ntfs_debug("Mft_no 0x%llx is deleted.", 10607 (unsigned long long)ni->mft_no); 10608 return ENOENT; 10609 } 10610 err = ntfs_attr_inode_get_or_create(ni, AT_DATA, 10611 NTFS_SFM_RESOURCEFORK_NAME, 12, FALSE, FALSE, options, 10612 LCK_RW_TYPE_SHARED, &sni); 10613 if (!err) { 10614 /* We have successfully opened the named stream. */ 10615 *a->a_svpp = sni->vn; 10616 lck_rw_unlock_shared(&sni->lock); 10617 ntfs_debug("Done."); 10618 } else { 10619 if (err == ENOENT) { 10620 err = ENOATTR; 10621 ntfs_debug("Done (named stream %s does not exist in " 10622 "mft_no 0x%llx.", name, 10623 (unsigned long long)ni->mft_no); 10624 } else 10625 ntfs_error(ni->vol->mp, "Failed to get named stream " 10626 "%s, mft_no 0x%llx (error %d).", name, 10627 (unsigned long long)ni->mft_no, err); 10628 } 10629 if (op == NS_OPEN) 10630 lck_rw_unlock_exclusive(&ni->lock); 10631 else 10632 lck_rw_unlock_shared(&ni->lock); 10633 return err; 10634} 10635 10636/** 10637 * ntfs_vnop_makenamedstream - create a named stream in an ntfs inode 10638 * @a: arguments to makenamedstream function 10639 * 10640 * @a contains: 10641 * vnode_t a_vp; vnode in which to create the named stream 10642 * vnode_t *a_svpp; destination for the named stream vnode 10643 * const char *a_name; name of the named stream to create 10644 * int a_flags; flags describing the request 10645 * vfs_context_t a_context; 10646 * 10647 * Create the named stream with name @a->a_name in the vnode @a->a_vp and 10648 * return the created vnode of the named stream in *@a->a_svpp. If the named 10649 * stream already exists than it is obtained instead, i.e. if the named stream 10650 * already exists then ntfs_vnop_makenamedstream() does exactly the same thing 10651 * as ntfs_vnop_getnamedstream(). 10652 * 10653 * The flags in @a->a_flags further describe the makenamedstream request. At 10654 * present no flags are defined in OS X kernel. 10655 * 10656 * Note that at present Mac OS X only supports the "com.apple.ResourceFork" 10657 * stream so we follow suit. 10658 * 10659 * Return 0 on success and the error code on error. 10660 */ 10661static int ntfs_vnop_makenamedstream(struct vnop_makenamedstream_args *a) 10662{ 10663 vnode_t vn = a->a_vp; 10664 ntfs_inode *sni, *ni = NTFS_I(vn); 10665 const char *name = a->a_name; 10666 errno_t err; 10667 10668 if (!ni) { 10669 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10670 return EINVAL; 10671 } 10672 ntfs_debug("Entering for mft_no 0x%llx, stream name %s, flags 0x%x.", 10673 (unsigned long long)ni->mft_no, name, a->a_flags); 10674 /* 10675 * Mac OS X only supports the resource fork stream. 10676 * Note that this comparison is case sensitive. 10677 */ 10678 if (bcmp(name, XATTR_RESOURCEFORK_NAME, 10679 sizeof(XATTR_RESOURCEFORK_NAME))) { 10680 ntfs_warning(ni->vol->mp, "Unsupported named stream %s " 10681 "specified, only the resource fork named " 10682 "stream (%s) is supported at present. " 10683 "Returning ENOATTR.", name, 10684 XATTR_RESOURCEFORK_NAME); 10685 return ENOATTR; 10686 } 10687 /* Only regular files may have a resource fork stream. */ 10688 if (!S_ISREG(ni->mode)) { 10689 ntfs_warning(ni->vol->mp, "The resource fork may only be " 10690 "attached to regular files and mft_no 0x%llx " 10691 "is not a regular file. Returning EPERM.", 10692 (unsigned long long)ni->mft_no); 10693 return EPERM; 10694 } 10695 lck_rw_lock_exclusive(&ni->lock); 10696 /* Do not allow messing with the inode once it has been deleted. */ 10697 if (NInoDeleted(ni)) { 10698 /* Remove the inode from the name cache. */ 10699 cache_purge(vn); 10700 lck_rw_unlock_exclusive(&ni->lock); 10701 ntfs_debug("Mft_no 0x%llx is deleted.", 10702 (unsigned long long)ni->mft_no); 10703 return ENOENT; 10704 } 10705 /* 10706 * Attempt to create the named stream. 10707 * 10708 * HFS allows an existing resource fork to be opened. We want to 10709 * follow suit so we specify 0 for @options when calling 10710 * ntfs_attr_inode_get_or_create(). 10711 * 10712 * FIXME: I think this is actually wrong behaviour. If I am right and 10713 * this is one day fixed in HFS, then we can trivially fix the 10714 * behaviour here by setting @options to XATTR_CREATE. 10715 */ 10716 err = ntfs_attr_inode_get_or_create(ni, AT_DATA, 10717 NTFS_SFM_RESOURCEFORK_NAME, 12, FALSE, FALSE, 0, 10718 LCK_RW_TYPE_SHARED, &sni); 10719 if (!err) { 10720 /* We have successfully opened the (created) named stream. */ 10721 *a->a_svpp = sni->vn; 10722 lck_rw_unlock_shared(&sni->lock); 10723 ntfs_debug("Done."); 10724 } else { 10725 if (err == EEXIST) 10726 ntfs_debug("Named stream %s already exists in mft_no " 10727 "0x%llx.", name, 10728 (unsigned long long)ni->mft_no); 10729 else 10730 ntfs_error(ni->vol->mp, "Failed to create named " 10731 "stream %s in mft_no 0x%llx (error " 10732 "%d).", name, 10733 (unsigned long long)ni->mft_no, err); 10734 } 10735 lck_rw_unlock_exclusive(&ni->lock); 10736 return err; 10737} 10738 10739/** 10740 * ntfs_vnop_removenamedstream - remove a named stream from an ntfs inode 10741 * @a: arguments to removenamedstream function 10742 * 10743 * @a contains: 10744 * vnode_t a_vp; vnode from which to remove the named stream 10745 * vnode_t a_svp; vnode of named stream to remove 10746 * const char *a_name; name of the named stream to remove 10747 * int a_flags; flags describing the request 10748 * vfs_context_t a_context; 10749 * 10750 * Delete the named stream described by the vnode @a->a_svp with name 10751 * @a->a_name from the vnode @a->a_vp. 10752 * 10753 * The flags in @a->a_flags further describe the removenamedstream request. At 10754 * present no flags are defined in OS X kernel. 10755 * 10756 * Note we obey POSIX open unlink semantics thus an open named stream will 10757 * remain accessible for read/write/lseek purproses until the last open 10758 * instance is closed when the VFS will call ntfs_vnop_inactive() which will in 10759 * turn actually remove the named stream. 10760 * 10761 * Note that at present Mac OS X only supports the "com.apple.ResourceFork" 10762 * stream so we follow suit. 10763 * 10764 * Return 0 on success and the error code on error. A return value of ENOATTR 10765 * does not signify an error as such but merely the fact that the named stream 10766 * @name is not present in the vnode @a->a_vp. 10767 */ 10768static int ntfs_vnop_removenamedstream(struct vnop_removenamedstream_args *a) 10769{ 10770 vnode_t svn, vn = a->a_vp; 10771 ntfs_inode *sni, *ni = NTFS_I(vn); 10772 const char *vname, *name = a->a_name; 10773 10774 svn = a->a_svp; 10775 sni = NTFS_I(svn); 10776 if (!ni || !sni) { 10777 ntfs_debug("Entered with NULL ntfs_inode, aborting."); 10778 return EINVAL; 10779 } 10780 vname = vnode_getname(svn); 10781 ntfs_debug("Entering for mft_no 0x%llx, stream mft_no 0x%llx, stream " 10782 "name %s, flags 0x%x, stream vnode name %s.", 10783 (unsigned long long)ni->mft_no, 10784 (unsigned long long)sni->mft_no, name, a->a_flags, 10785 vname ? vname : "not present"); 10786 if (vname) 10787 (void)vnode_putname(vname); 10788 /* 10789 * Mac OS X only supports the resource fork stream. 10790 * Note that this comparison is case sensitive. 10791 */ 10792 if (bcmp(name, XATTR_RESOURCEFORK_NAME, 10793 sizeof(XATTR_RESOURCEFORK_NAME))) { 10794 ntfs_warning(ni->vol->mp, "Unsupported named stream %s " 10795 "specified, only the resource fork named " 10796 "stream (%s) is supported at present. " 10797 "Returning ENOATTR.", name, 10798 XATTR_RESOURCEFORK_NAME); 10799 return ENOATTR; 10800 } 10801 /* Only regular files may have a resource fork stream. */ 10802 if (!S_ISREG(ni->mode)) { 10803 ntfs_warning(ni->vol->mp, "The resource fork may only be " 10804 "attached to regular files and mft_no 0x%llx " 10805 "is not a regular file. Returning EPERM.", 10806 (unsigned long long)ni->mft_no); 10807 return EPERM; 10808 } 10809 lck_rw_lock_exclusive(&ni->lock); 10810 /* Do not allow messing with the inode once it has been deleted. */ 10811 if (NInoDeleted(ni)) { 10812 /* Remove the inode from the name cache. */ 10813 cache_purge(vn); 10814 lck_rw_unlock_exclusive(&ni->lock); 10815 ntfs_debug("Mft_no 0x%llx is deleted.", 10816 (unsigned long long)ni->mft_no); 10817 return ENOATTR; 10818 } 10819 lck_rw_lock_exclusive(&sni->lock); 10820 /* Do not allow messing with the stream once it has been deleted. */ 10821 if (NInoDeleted(sni)) { 10822 /* Remove the inode from the name cache. */ 10823 cache_purge(svn); 10824 lck_rw_unlock_exclusive(&sni->lock); 10825 lck_rw_unlock_exclusive(&ni->lock); 10826 ntfs_debug("Stream mft_no 0x%llx, name %s is deleted.", 10827 (unsigned long long)sni->mft_no, name); 10828 return ENOATTR; 10829 } 10830 /* 10831 * The base inode of the stream inode must be the same as the parent 10832 * inode specified by the caller. 10833 */ 10834 if (!NInoAttr(sni) || sni->base_ni != ni) 10835 panic("%s(): !NInoAttr(sni) || sni->base_ni != ni\n", 10836 __FUNCTION__); 10837 /* 10838 * Unlink the named stream. The last close will cause the VFS to call 10839 * ntfs_vnop_inactive() which will do the actual removal. 10840 * 10841 * And if the named stream is already unlinked there is nothing to do. 10842 * This is what HFS does so we follow suit. 10843 */ 10844 if (sni->link_count) { 10845 sni->link_count = 0; 10846 /* 10847 * Update the last_mft_change_time (ctime) in the inode as 10848 * named stream/extended attribute semantics expect on OS X. 10849 */ 10850 ni->last_mft_change_time = ntfs_utc_current_time(); 10851 NInoSetDirtyTimes(ni); 10852 /* 10853 * If this is not a directory or it is an encrypted directory, 10854 * set the needs archiving bit except for the core system 10855 * files. 10856 */ 10857 if (!S_ISDIR(ni->mode) || NInoEncrypted(ni)) { 10858 BOOL need_set_archive_bit = TRUE; 10859 if (ni->vol->major_ver >= 2) { 10860 if (ni->mft_no <= FILE_Extend) 10861 need_set_archive_bit = FALSE; 10862 } else { 10863 if (ni->mft_no <= FILE_UpCase) 10864 need_set_archive_bit = FALSE; 10865 } 10866 if (need_set_archive_bit) { 10867 ni->file_attributes |= FILE_ATTR_ARCHIVE; 10868 NInoSetDirtyFileAttributes(ni); 10869 } 10870 } 10871 ntfs_debug("Done."); 10872 } else 10873 ntfs_debug("$DATA/%s attribute has already been unlinked from " 10874 "mft_no 0x%llx.", name, 10875 (unsigned long long)sni->mft_no); 10876 lck_rw_unlock_exclusive(&sni->lock); 10877 lck_rw_unlock_exclusive(&ni->lock); 10878 return 0; 10879} 10880 10881static struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = { 10882 /* 10883 * Set vn_default_error() to be our default vnop, thus any vnops we do 10884 * not specify (or specify as NULL) will be set to it and this function 10885 * just returns ENOTSUP. 10886 */ 10887 { &vnop_default_desc, (vnop_t*)vn_default_error }, 10888 { &vnop_strategy_desc, (vnop_t*)ntfs_vnop_strategy }, 10889 /* 10890 * vn_bwrite() is a simple wrapper for buf_bwrite() which in turn uses 10891 * VNOP_STRATEGY() and hence ntfs_vnop_strategy() to do the i/o and the 10892 * latter handles all NTFS specifics thus we can simply use the generic 10893 * vn_bwrite() for our VNOP_BWRITE() method. 10894 */ 10895 { &vnop_bwrite_desc, (vnop_t*)vn_bwrite }, 10896 { &vnop_lookup_desc, (vnop_t*)ntfs_vnop_lookup }, 10897 { &vnop_create_desc, (vnop_t*)ntfs_vnop_create }, 10898 { &vnop_mknod_desc, (vnop_t*)ntfs_vnop_mknod }, 10899 { &vnop_open_desc, (vnop_t*)ntfs_vnop_open }, 10900 { &vnop_close_desc, (vnop_t*)ntfs_vnop_close }, 10901 { &vnop_access_desc, (vnop_t*)ntfs_vnop_access }, 10902 { &vnop_getattr_desc, (vnop_t*)ntfs_vnop_getattr }, 10903 { &vnop_setattr_desc, (vnop_t*)ntfs_vnop_setattr }, 10904 { &vnop_read_desc, (vnop_t*)ntfs_vnop_read }, 10905 { &vnop_write_desc, (vnop_t*)ntfs_vnop_write }, 10906 { &vnop_ioctl_desc, (vnop_t*)ntfs_vnop_ioctl }, 10907 { &vnop_select_desc, (vnop_t*)ntfs_vnop_select }, 10908 { &vnop_exchange_desc, (vnop_t*)ntfs_vnop_exchange }, 10909 /* Let the VFS deal with revoking a vnode. */ 10910 { &vnop_revoke_desc, (vnop_t*)nop_revoke }, 10911 { &vnop_mmap_desc, (vnop_t*)ntfs_vnop_mmap }, 10912 { &vnop_mnomap_desc, (vnop_t*)ntfs_vnop_mnomap }, 10913 { &vnop_fsync_desc, (vnop_t*)ntfs_vnop_fsync }, 10914 { &vnop_remove_desc, (vnop_t*)ntfs_vnop_remove }, 10915 { &vnop_link_desc, (vnop_t*)ntfs_vnop_link }, 10916 { &vnop_rename_desc, (vnop_t*)ntfs_vnop_rename }, 10917 { &vnop_mkdir_desc, (vnop_t*)ntfs_vnop_mkdir }, 10918 { &vnop_rmdir_desc, (vnop_t*)ntfs_vnop_rmdir }, 10919 { &vnop_symlink_desc, (vnop_t*)ntfs_vnop_symlink }, 10920 { &vnop_readdir_desc, (vnop_t*)ntfs_vnop_readdir }, 10921 { &vnop_readdirattr_desc, (vnop_t*)ntfs_vnop_readdirattr }, 10922 { &vnop_readlink_desc, (vnop_t*)ntfs_vnop_readlink }, 10923 { &vnop_inactive_desc, (vnop_t*)ntfs_vnop_inactive }, 10924 { &vnop_reclaim_desc, (vnop_t*)ntfs_vnop_reclaim }, 10925 { &vnop_pathconf_desc, (vnop_t*)ntfs_vnop_pathconf }, 10926 /* 10927 * Let the VFS deal with advisory locking for us, so our advlock method 10928 * should never get called and if it were to get called for some 10929 * reason, we make sure to return error (ENOTSUP). 10930 */ 10931 { &vnop_advlock_desc, (vnop_t*)err_advlock }, 10932 { &vnop_allocate_desc, (vnop_t*)ntfs_vnop_allocate }, 10933 { &vnop_pagein_desc, (vnop_t*)ntfs_vnop_pagein }, 10934 { &vnop_pageout_desc, (vnop_t*)ntfs_vnop_pageout }, 10935 { &vnop_searchfs_desc, (vnop_t*)ntfs_vnop_searchfs }, 10936 /* 10937 * Nothing supports copyfile in current xnu and it is not documented so 10938 * we do not support it either. 10939 */ 10940 { &vnop_copyfile_desc, (vnop_t*)err_copyfile }, 10941 { &vnop_getxattr_desc, (vnop_t*)ntfs_vnop_getxattr }, 10942 { &vnop_setxattr_desc, (vnop_t*)ntfs_vnop_setxattr }, 10943 { &vnop_removexattr_desc, (vnop_t*)ntfs_vnop_removexattr }, 10944 { &vnop_listxattr_desc, (vnop_t*)ntfs_vnop_listxattr }, 10945 { &vnop_blktooff_desc, (vnop_t*)ntfs_vnop_blktooff }, 10946 { &vnop_offtoblk_desc, (vnop_t*)ntfs_vnop_offtoblk }, 10947 { &vnop_blockmap_desc, (vnop_t*)ntfs_vnop_blockmap }, 10948 { &vnop_getnamedstream_desc, (vnop_t*)ntfs_vnop_getnamedstream }, 10949 { &vnop_makenamedstream_desc, (vnop_t*)ntfs_vnop_makenamedstream }, 10950 { &vnop_removenamedstream_desc, (vnop_t*)ntfs_vnop_removenamedstream }, 10951 { NULL, (vnop_t*)NULL } 10952}; 10953 10954struct vnodeopv_desc ntfs_vnodeopv_desc = { 10955 &ntfs_vnodeop_p, ntfs_vnodeop_entries 10956}; 10957