1/* 2 * ntfs_dir.c - NTFS kernel directory operations. 3 * 4 * Copyright (c) 2006-2010 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2010 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/buf.h> 39#include <sys/param.h> 40#include <sys/dirent.h> 41#include <sys/errno.h> 42#include <sys/mount.h> 43#include <sys/stat.h> 44#include <sys/ucred.h> 45#include <sys/uio.h> 46#include <sys/vnode.h> 47 48#include <string.h> 49 50#include <libkern/OSAtomic.h> 51#include <libkern/OSMalloc.h> 52 53#include <kern/debug.h> 54#include <kern/locks.h> 55 56#include "ntfs.h" 57#include "ntfs_attr.h" 58#include "ntfs_debug.h" 59#include "ntfs_dir.h" 60#include "ntfs_endian.h" 61#include "ntfs_index.h" 62#include "ntfs_inode.h" 63#include "ntfs_layout.h" 64#include "ntfs_mft.h" 65#include "ntfs_page.h" 66#include "ntfs_time.h" 67#include "ntfs_types.h" 68#include "ntfs_unistr.h" 69#include "ntfs_volume.h" 70 71/** 72 * The little endian Unicode string $I30 as a global constant. 73 */ 74ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), 75 const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 }; 76 77/** 78 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 79 * @dir_ni: ntfs inode of the directory in which to search for the name 80 * @uname: Unicode name for which to search in the directory 81 * @uname_len: length of the name @uname in Unicode characters 82 * @res_mref: return the mft reference of the inode of the found name 83 * @res_name: return the found filename if necessary (see below) 84 * 85 * Look for an inode with name @uname of length @uname_len Unicode characters 86 * in the directory with inode @dir_ni. This is done by walking the contents 87 * of the directory B+tree looking for the Unicode name. 88 * 89 * If the name is found in the directory, 0 is returned and the corresponding 90 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it is 91 * a 64-bit number containing the sequence number, in *@res_mref. 92 * 93 * On error, the error code is returned. In particular if the inode is not 94 * found ENOENT is returned which is not an error as such. 95 * 96 * Note, @uname_len does not include the (optional) terminating NUL character. 97 * 98 * Note, we look for a case sensitive match first but we also look for a case 99 * insensitive match at the same time. If we find a case insensitive match, we 100 * save that for the case that we do not find an exact match, where we return 101 * the case insensitive match and setup *@res_name (which we allocate) with the 102 * mft reference, the filename type, length and with a copy of the little 103 * endian Unicode filename itself. If we match a filename which is in the DOS 104 * namespace, we only return the mft reference and filename type in *@res_name. 105 * ntfs_vnop_lookup() then uses this to find the long filename in the inode 106 * itself. This is so it can use the name cache effectively. 107 * 108 * Locking: Caller must hold @dir_ni->lock. 109 * 110 * TODO: From Mark's review comments: pull the iteration code into a separate 111 * function and call it both for the index root and index allocation iteration. 112 * See the ntfs_index_lookup() function in ntfs_index.c... 113 */ 114errno_t ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname, 115 const signed uname_len, MFT_REF *res_mref, 116 ntfs_dir_lookup_name **res_name) 117{ 118 VCN vcn, old_vcn; 119 ntfs_volume *vol = dir_ni->vol; 120 mount_t mp = vol->mp; 121 ntfs_inode *ia_ni; 122 vnode_t ia_vn = NULL; 123 MFT_RECORD *m; 124 INDEX_ROOT *ir; 125 INDEX_ENTRY *ie; 126 ntfs_dir_lookup_name *name = NULL; 127 upl_t upl; 128 upl_page_info_array_t pl; 129 u8 *kaddr; 130 INDEX_ALLOCATION *ia; 131 u8 *index_end; 132 ntfs_attr_search_ctx *ctx; 133 int rc; 134 errno_t err; 135 136 if (!S_ISDIR(dir_ni->mode)) 137 panic("%s(): !S_ISDIR(dir_ni->mode\n", __FUNCTION__); 138 if (NInoAttr(dir_ni)) 139 panic("%s(): NInoAttr(dir_ni)\n", __FUNCTION__); 140 /* Get the index allocation inode. */ 141 err = ntfs_index_inode_get(dir_ni, I30, 4, FALSE, &ia_ni); 142 if (err) { 143 ntfs_error(mp, "Failed to get index vnode (error %d).", err); 144 return err; 145 } 146 ia_vn = ia_ni->vn; 147 lck_rw_lock_shared(&ia_ni->lock); 148 /* Get hold of the mft record for the directory. */ 149 err = ntfs_mft_record_map(dir_ni, &m); 150 if (err) { 151 ntfs_error(mp, "Failed to map mft record for directory (error " 152 "%d).", err); 153 goto err; 154 } 155 ctx = ntfs_attr_search_ctx_get(dir_ni, m); 156 if (!ctx) { 157 ntfs_error(mp, "Failed to get attribute search context."); 158 err = ENOMEM; 159 goto unm_err; 160 } 161 /* Find the index root attribute in the mft record. */ 162 err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, 0, NULL, 0, ctx); 163 if (err) { 164 if (err == ENOENT) { 165 ntfs_error(mp, "Index root attribute missing in " 166 "directory inode 0x%llx.", 167 (unsigned long long)dir_ni->mft_no); 168 err = EIO; 169 } 170 goto put_err; 171 } 172 /* 173 * Get to the index root value (it has been verified in 174 * ntfs_index_inode_read()). 175 */ 176 ir = (INDEX_ROOT*)((u8*)ctx->a + le16_to_cpu(ctx->a->value_offset)); 177 index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); 178 /* The first index entry. */ 179 ie = (INDEX_ENTRY*)((u8*)&ir->index + 180 le32_to_cpu(ir->index.entries_offset)); 181 /* 182 * Loop until we exceed valid memory (corruption case) or until we 183 * reach the last entry. 184 */ 185 for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { 186 ntfs_debug("In index root, offset 0x%x.", 187 (unsigned)((u8*)ie - (u8*)ir)); 188 /* Bounds checks. */ 189 if ((u8*)ie < (u8*)&ir->index || (u8*)ie + 190 sizeof(INDEX_ENTRY_HEADER) > index_end || 191 (u8*)ie + le16_to_cpu(ie->key_length) > 192 index_end) 193 goto dir_err; 194 /* 195 * The last entry cannot contain a name. It can however 196 * contain a pointer to a child node in the B+tree so we just 197 * break out. 198 */ 199 if (ie->flags & INDEX_ENTRY_END) 200 break; 201 /* 202 * We perform a case sensitive comparison and if that matches 203 * we are done and return the mft reference of the inode (i.e. 204 * the inode number together with the sequence number for 205 * consistency checking). We convert it to cpu format before 206 * returning. 207 */ 208 if (ntfs_are_names_equal(uname, uname_len, 209 (ntfschar*)&ie->key.filename.filename, 210 ie->key.filename.filename_length, TRUE, 211 vol->upcase, vol->upcase_len)) { 212found_it: 213 /* 214 * We have a perfect match, so we do not need to care 215 * about having matched imperfectly before, so we can 216 * free name and set *res_name to NULL. 217 * 218 * However, if the perfect match is a short filename, 219 * we need to signal this through *res_name, so that 220 * the caller can deal with the name cache effectively. 221 * 222 * As an optimization we just reuse an existing 223 * allocation of *res_name. 224 */ 225 if (ie->key.filename.filename_type == FILENAME_DOS) { 226 u8 len; 227 228 if (!name) { 229 *res_name = name = OSMalloc( 230 sizeof(*name), 231 ntfs_malloc_tag); 232 if (!name) { 233 err = ENOMEM; 234 goto put_err; 235 } 236 } 237 name->mref = le64_to_cpu(ie->indexed_file); 238 name->type = FILENAME_DOS; 239 name->len = len = ie->key.filename. 240 filename_length; 241 memcpy(name->name, ie->key.filename.filename, 242 len * sizeof(ntfschar)); 243 } else { 244 if (name) 245 OSFree(name, sizeof(*name), 246 ntfs_malloc_tag); 247 *res_name = NULL; 248 } 249 *res_mref = le64_to_cpu(ie->indexed_file); 250 ntfs_attr_search_ctx_put(ctx); 251 ntfs_mft_record_unmap(dir_ni); 252 lck_rw_unlock_shared(&ia_ni->lock); 253 (void)vnode_put(ia_vn); 254 return 0; 255 } 256 /* 257 * For a case insensitive mount, we also perform a case 258 * insensitive comparison. If the comparison matches, we cache 259 * the filename in *res_name so that the caller can work on it. 260 */ 261 if (!NVolCaseSensitive(vol) && 262 ntfs_are_names_equal(uname, uname_len, 263 (ntfschar*)&ie->key.filename.filename, 264 ie->key.filename.filename_length, FALSE, 265 vol->upcase, vol->upcase_len)) { 266 u8 type; 267 268 /* 269 * If no name is cached yet, cache it or if the current 270 * name is the WIN32 name, replace the already cached 271 * name with the WIN32 name. Otherwise continue 272 * caching the first match. 273 */ 274 type = ie->key.filename.filename_type; 275 if (!name || type == FILENAME_WIN32 || type == 276 FILENAME_WIN32_AND_DOS) { 277 u8 len; 278 279 if (!name) { 280 *res_name = name = OSMalloc( 281 sizeof(*name), 282 ntfs_malloc_tag); 283 if (!name) { 284 err = ENOMEM; 285 goto put_err; 286 } 287 } 288 name->mref = le64_to_cpu(ie->indexed_file); 289 name->type = type; 290 name->len = len = ie->key.filename. 291 filename_length; 292 memcpy(name->name, ie->key.filename.filename, 293 len * sizeof(ntfschar)); 294 } 295 } 296 /* 297 * Not a perfect match, need to do full blown collation so we 298 * know which way in the B+tree we have to go. 299 */ 300 rc = ntfs_collate_names(uname, uname_len, 301 (ntfschar*)&ie->key.filename.filename, 302 ie->key.filename.filename_length, 1, FALSE, 303 vol->upcase, vol->upcase_len); 304 /* 305 * If uname collates before the name of the current entry, 306 * there is definitely no such name in this index but we might 307 * need to descend into the B+tree so we just break out of the 308 * loop. 309 */ 310 if (rc == -1) 311 break; 312 /* The names are not equal, continue the search. */ 313 if (rc) 314 continue; 315 /* 316 * Names match with case insensitive comparison, now try the 317 * case sensitive comparison, which is required for proper 318 * collation. 319 */ 320 rc = ntfs_collate_names(uname, uname_len, 321 (ntfschar*)&ie->key.filename.filename, 322 ie->key.filename.filename_length, 1, TRUE, 323 vol->upcase, vol->upcase_len); 324 if (rc == -1) 325 break; 326 if (rc) 327 continue; 328 /* 329 * Perfect match, this will never happen as the 330 * ntfs_are_names_equal() call will have gotten a match but we 331 * still treat it correctly. 332 */ 333 goto found_it; 334 } 335 /* 336 * We have finished with this index without success. Check for the 337 * presence of a child node and if not present return ENOENT, unless we 338 * have got a matching name cached in @name in which case return the 339 * mft reference associated with it. 340 */ 341 if (!(ie->flags & INDEX_ENTRY_NODE)) { 342 ntfs_attr_search_ctx_put(ctx); 343 ntfs_mft_record_unmap(dir_ni); 344 goto not_found; 345 } /* Child node present, descend into it. */ 346 /* Consistency check: Verify that an index allocation exists. */ 347 if (!NInoIndexAllocPresent(ia_ni)) { 348 ntfs_error(mp, "No index allocation attribute but index entry " 349 "requires one. Directory inode 0x%llx is " 350 "corrupt or driver bug.", 351 (unsigned long long)dir_ni->mft_no); 352 NVolSetErrors(vol); 353 goto put_err; 354 } 355 /* Get the starting vcn of the index block holding the child node. */ 356 vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8)); 357 /* 358 * We are done with the index root and the mft record. Release them, 359 * otherwise we deadlock with ntfs_page_map(). 360 */ 361 ntfs_attr_search_ctx_put(ctx); 362 ntfs_mft_record_unmap(dir_ni); 363 m = NULL; 364 ctx = NULL; 365descend_into_child_node: 366 /* 367 * Convert vcn to byte offset in the index allocation attribute and map 368 * the corresponding page. 369 */ 370 err = ntfs_page_map(ia_ni, (vcn << ia_ni->vcn_size_shift) & 371 ~PAGE_MASK_64, &upl, &pl, &kaddr, FALSE); 372 if (err) { 373 ntfs_error(mp, "Failed to map directory index page (error " 374 "%d).", err); 375 goto err; 376 } 377fast_descend_into_child_node: 378 /* Get to the index allocation block. */ 379 ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << ia_ni->vcn_size_shift) & 380 PAGE_MASK)); 381 /* Bounds checks. */ 382 if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) { 383 ntfs_error(mp, "Out of bounds check failed. Corrupt " 384 "directory inode 0x%llx or driver bug.", 385 (unsigned long long)dir_ni->mft_no); 386 goto page_err; 387 } 388 /* Catch multi sector transfer fixup errors. */ 389 if (!ntfs_is_indx_record(ia->magic)) { 390 ntfs_error(mp, "Directory index record with VCN 0x%llx is " 391 "corrupt. Corrupt inode 0x%llx. Run chkdsk.", 392 (unsigned long long)vcn, 393 (unsigned long long)dir_ni->mft_no); 394 goto page_err; 395 } 396 if (sle64_to_cpu(ia->index_block_vcn) != vcn) { 397 ntfs_error(mp, "Actual VCN (0x%llx) of index buffer is " 398 "different from expected VCN (0x%llx). " 399 "Directory inode 0x%llx is corrupt or driver " 400 "bug.", (unsigned long long) 401 sle64_to_cpu(ia->index_block_vcn), 402 (unsigned long long)vcn, 403 (unsigned long long)dir_ni->mft_no); 404 goto page_err; 405 } 406 if (offsetof(INDEX_BLOCK, index) + 407 le32_to_cpu(ia->index.allocated_size) != 408 ia_ni->block_size) { 409 ntfs_error(mp, "Index buffer (VCN 0x%llx) of directory inode " 410 "0x%llx has a size (%u) differing from the " 411 "directory specified size (%u). Directory " 412 "inode is corrupt or driver bug.", 413 (unsigned long long)vcn, 414 (unsigned long long)dir_ni->mft_no, (unsigned) 415 (offsetof(INDEX_BLOCK, index) + 416 le32_to_cpu(ia->index.allocated_size)), 417 (unsigned)ia_ni->block_size); 418 goto page_err; 419 } 420 index_end = (u8*)ia + ia_ni->block_size; 421 if (index_end > kaddr + PAGE_SIZE) { 422 ntfs_error(mp, "Index buffer (VCN 0x%llx) of directory inode " 423 "0x%llx crosses page boundary. Impossible! " 424 "Cannot access! This is probably a bug in " 425 "the driver.", (unsigned long long)vcn, 426 (unsigned long long)dir_ni->mft_no); 427 goto page_err; 428 } 429 index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); 430 if (index_end > (u8*)ia + ia_ni->block_size) { 431 ntfs_error(mp, "Size of index buffer (VCN 0x%llx) of directory " 432 "inode 0x%llx exceeds maximum size.", 433 (unsigned long long)vcn, 434 (unsigned long long)dir_ni->mft_no); 435 goto page_err; 436 } 437 /* The first index entry. */ 438 ie = (INDEX_ENTRY*)((u8*)&ia->index + 439 le32_to_cpu(ia->index.entries_offset)); 440 /* 441 * Iterate similar to above big loop but applied to index buffer, thus 442 * loop until we exceed valid memory (corruption case) or until we 443 * reach the last entry. 444 */ 445 for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { 446 /* Bounds check. */ 447 if ((u8*)ie < (u8*)&ia->index || (u8*)ie + 448 sizeof(INDEX_ENTRY_HEADER) > index_end || 449 (u8*)ie + le16_to_cpu(ie->key_length) > 450 index_end) { 451 ntfs_error(mp, "Index entry out of bounds in " 452 "directory inode 0x%llx.", 453 (unsigned long long)dir_ni->mft_no); 454 goto page_err; 455 } 456 /* 457 * The last entry cannot contain a name. It can however 458 * contain a pointer to a child node in the B+tree so we just 459 * break out. 460 */ 461 if (ie->flags & INDEX_ENTRY_END) 462 break; 463 /* 464 * We perform a case sensitive comparison and if that matches 465 * we are done and return the mft reference of the inode (i.e. 466 * the inode number together with the sequence number for 467 * consistency checking). We convert it to cpu format before 468 * returning. 469 */ 470 if (ntfs_are_names_equal(uname, uname_len, 471 (ntfschar*)&ie->key.filename.filename, 472 ie->key.filename.filename_length, TRUE, 473 vol->upcase, vol->upcase_len)) { 474found_it2: 475 /* 476 * We have a perfect match, so we do not need to care 477 * about having matched imperfectly before, so we can 478 * free name and set *res_name to NULL. 479 * 480 * However, if the perfect match is a short filename, 481 * we need to signal this through *res_name, so that 482 * the caller can deal with the name cache effectively. 483 * 484 * As an optimization we just reuse an existing 485 * allocation of *res_name. 486 */ 487 if (ie->key.filename.filename_type == FILENAME_DOS) { 488 u8 len; 489 490 if (!name) { 491 *res_name = name = OSMalloc( 492 sizeof(*name), 493 ntfs_malloc_tag); 494 if (!name) { 495 err = ENOMEM; 496 goto page_err; 497 } 498 } 499 name->mref = le64_to_cpu(ie->indexed_file); 500 name->type = FILENAME_DOS; 501 name->len = len = ie->key.filename. 502 filename_length; 503 memcpy(name->name, ie->key.filename.filename, 504 len * sizeof(ntfschar)); 505 } else { 506 if (name) 507 OSFree(name, sizeof(*name), 508 ntfs_malloc_tag); 509 *res_name = NULL; 510 } 511 *res_mref = le64_to_cpu(ie->indexed_file); 512 ntfs_page_unmap(ia_ni, upl, pl, FALSE); 513 lck_rw_unlock_shared(&ia_ni->lock); 514 (void)vnode_put(ia_vn); 515 return 0; 516 } 517 /* 518 * For a case insensitive mount, we also perform a case 519 * insensitive comparison. If the comparison matches, we cache 520 * the filename in *res_name so that the caller can work on it. 521 * If the comparison matches, and the name is in the DOS 522 * namespace, we only cache the mft reference and the filename 523 * type (we set the name length to zero for simplicity). 524 */ 525 if (!NVolCaseSensitive(vol) && 526 ntfs_are_names_equal(uname, uname_len, 527 (ntfschar*)&ie->key.filename.filename, 528 ie->key.filename.filename_length, FALSE, 529 vol->upcase, vol->upcase_len)) { 530 u8 type; 531 532 /* 533 * If no name is cached yet, cache it or if the current 534 * name is the WIN32 name, replace the already cached 535 * name with the WIN32 name. Otherwise continue 536 * caching the first match. 537 */ 538 type = ie->key.filename.filename_type; 539 if (!name || type == FILENAME_WIN32 || type == 540 FILENAME_WIN32_AND_DOS) { 541 u8 len; 542 543 if (!name) { 544 *res_name = name = OSMalloc( 545 sizeof(*name), 546 ntfs_malloc_tag); 547 if (!name) { 548 err = ENOMEM; 549 goto page_err; 550 } 551 } 552 name->mref = le64_to_cpu(ie->indexed_file); 553 name->type = type; 554 name->len = len = ie->key.filename. 555 filename_length; 556 memcpy(name->name, ie->key.filename.filename, 557 len * sizeof(ntfschar)); 558 } 559 } 560 /* 561 * Not a perfect match, need to do full blown collation so we 562 * know which way in the B+tree we have to go. 563 */ 564 rc = ntfs_collate_names(uname, uname_len, 565 (ntfschar*)&ie->key.filename.filename, 566 ie->key.filename.filename_length, 1, FALSE, 567 vol->upcase, vol->upcase_len); 568 /* 569 * If uname collates before the name of the current entry, 570 * there is definitely no such name in this index but we might 571 * need to descend into the B+tree so we just break out of the 572 * loop. 573 */ 574 if (rc == -1) 575 break; 576 /* The names are not equal, continue the search. */ 577 if (rc) 578 continue; 579 /* 580 * Names match with case insensitive comparison, now try the 581 * case sensitive comparison, which is required for proper 582 * collation. 583 */ 584 rc = ntfs_collate_names(uname, uname_len, 585 (ntfschar*)&ie->key.filename.filename, 586 ie->key.filename.filename_length, 1, TRUE, 587 vol->upcase, vol->upcase_len); 588 if (rc == -1) 589 break; 590 if (rc) 591 continue; 592 /* 593 * Perfect match, this will never happen as the 594 * ntfs_are_names_equal() call will have gotten a match but we 595 * still treat it correctly. 596 */ 597 goto found_it2; 598 } 599 /* 600 * We have finished with this index buffer without success. Check for 601 * the presence of a child node. 602 */ 603 if (ie->flags & INDEX_ENTRY_NODE) { 604 if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { 605 ntfs_error(mp, "Index entry with child node found in " 606 "a leaf node in directory inode " 607 "0x%llx.", 608 (unsigned long long)dir_ni->mft_no); 609 goto page_err; 610 } 611 /* Child node present, descend into it. */ 612 old_vcn = vcn; 613 vcn = sle64_to_cpup((sle64*)((u8*)ie + 614 le16_to_cpu(ie->length) - 8)); 615 if (vcn >= 0) { 616 /* 617 * If @vcn is in the same page cache page as @old_vcn 618 * we recycle the mapped page. 619 */ 620 if (old_vcn << ia_ni->vcn_size_shift >> PAGE_SHIFT == 621 vcn << ia_ni->vcn_size_shift >> 622 PAGE_SHIFT) 623 goto fast_descend_into_child_node; 624 ntfs_page_unmap(ia_ni, upl, pl, FALSE); 625 goto descend_into_child_node; 626 } 627 ntfs_error(mp, "Negative child node vcn in directory inode " 628 "0x%llx.", (unsigned long long)dir_ni->mft_no); 629 goto page_err; 630 } 631 /* 632 * No child node present, return ENOENT, unless we have got a matching 633 * name cached in @name in which case return the mft reference 634 * associated with it. 635 */ 636 ntfs_page_unmap(ia_ni, upl, pl, FALSE); 637not_found: 638 lck_rw_unlock_shared(&ia_ni->lock); 639 (void)vnode_put(ia_vn); 640 if (name) { 641 *res_mref = name->mref; 642 return 0; 643 } 644 ntfs_debug("Entry not found."); 645 return ENOENT; 646page_err: 647 ntfs_page_unmap(ia_ni, upl, pl, FALSE); 648 goto err; 649dir_err: 650 ntfs_error(mp, "Corrupt directory inode 0x%llx. Run chkdsk.", 651 (unsigned long long)dir_ni->mft_no); 652put_err: 653 ntfs_attr_search_ctx_put(ctx); 654unm_err: 655 ntfs_mft_record_unmap(dir_ni); 656err: 657 if (name) 658 OSFree(name, sizeof(*name), ntfs_malloc_tag); 659 lck_rw_unlock_shared(&ia_ni->lock); 660 (void)vnode_put(ia_vn); 661 if (!err) 662 err = EIO; 663 ntfs_debug("Failed (error %d).", err); 664 return err; 665} 666 667/** 668 * ntfs_do_dirent - generate a dirent structure and copy it to the destination 669 * @vol: ntfs volume the index entry belongs to 670 * @ie: index entry to return after conversion to a dirent structure 671 * @de: buffer to generate the dirent structure in 672 * @uio: destination in which to return the generated dirent structure 673 * @entries: [IN/OUT] pointer to number of entries that have been returned 674 * 675 * This is a helper function for ntfs_readdir(). 676 * 677 * First, check if we want to return this index entry @ie and if not return 0. 678 * 679 * Assuming we want to return this index entry, convert the NTFS specific 680 * directory index entry @ie into the file system independent dirent structure 681 * and store it in the supplied buffer @de. 682 * 683 * If there is not enough space in the destination @uio to return the converted 684 * dirent structure @de, return -1. 685 * 686 * Return the converted dirent structure @de in the destination @uio and return 687 * 0 on success or errno on error. 688 * 689 * If we successfully returned an entry *@entries is incremented. 690 */ 691static inline int ntfs_do_dirent(ntfs_volume *vol, INDEX_ENTRY *ie, 692 struct dirent *de, uio_t uio, int *entries) 693{ 694 ino64_t mref; 695 u8 *utf8_name; 696 size_t utf8_size; 697 signed res_size, padding; 698 int err; 699 FILENAME_TYPE_FLAGS name_type; 700#ifdef DEBUG 701 static const char *dts[15] = { "UNKNOWN", "FIFO", "CHR", "UNKNOWN", 702 "DIR", "UNKNOWN", "BLK", "UNKNOWN", "REG", "UNKNOWN", 703 "LNK", "UNKNOWN", "SOCK", "UNKNOWN", "WHT" }; 704#endif 705 706 name_type = ie->key.filename.filename_type; 707 if (name_type == FILENAME_DOS) { 708 ntfs_debug("Skipping DOS namespace entry."); 709 return 0; 710 } 711 mref = MREF_LE(ie->indexed_file); 712 /* 713 * Remove all NTFS core system files from the name space so we do not 714 * need to worry about users damaging a volume by writing to them or 715 * deleting/renaming them and so that we can return fsRtParID (1) as 716 * the inode number of the parent of the volume root directory and 717 * fsRtDirID (2) as the inode number of the volume root directory which 718 * are both expected by Carbon and various applications. 719 */ 720 if (mref < FILE_first_user) { 721 ntfs_debug("Removing core NTFS system file (mft_no 0x%x) from " 722 "name space.", (unsigned)mref); 723 return 0; 724 } 725 if (sizeof(de->d_ino) < 8 && mref & 0xffffffff00000000ULL) { 726 ntfs_warning(vol->mp, "Skipping dirent because its inode " 727 "number 0x%llx does not fit in 32-bits.", 728 (unsigned long long)mref); 729 return 0; 730 } 731 utf8_name = (u8*)de->d_name; 732 utf8_size = sizeof(de->d_name); 733 res_size = ntfs_to_utf8(vol, (ntfschar*)&ie->key.filename.filename, 734 ie->key.filename.filename_length << NTFSCHAR_SIZE_SHIFT, 735 &utf8_name, &utf8_size); 736 if (res_size <= 0) { 737 ntfs_warning(vol->mp, "Skipping unrepresentable inode 0x%llx " 738 "(error %d).", (unsigned long long)mref, 739 -res_size); 740 return 0; 741 } 742 /* 743 * The name is now in @de->d_name. Set up the remainder of the dirent 744 * structure. 745 */ 746 de->d_ino = mref; 747 /* 748 * If a filename index is present it must be a directory. Otherwise it 749 * could be a file or a symbolic link (or something else but we do not 750 * support anything else yet). 751 */ 752 if (ie->key.filename.file_attributes & 753 FILE_ATTR_DUP_FILENAME_INDEX_PRESENT) 754 de->d_type = DT_DIR; 755 else { 756 /* 757 * If the file size is less than or equal to MAXPATHLEN it 758 * could be a symbolic link so return DT_UNKNOWN as it would be 759 * too expensive to get the inode to check what it is exactly. 760 * 761 * Also, system files need to be returned as DT_UNKNOWN as they 762 * could be fifos, sockets, or block or character device 763 * special files. Note that the size check will actually catch 764 * all relevant system files so we do not need to check for 765 * them specifically here. 766 */ 767 if (ie->key.filename.data_size > MAXPATHLEN) 768 de->d_type = DT_REG; 769 else 770 de->d_type = DT_UNKNOWN; 771 } 772 /* 773 * Note @de->d_namlen is only 8-bit thus @res_size may not be above 774 * 255. This is not a problem since sizeof(de->d_name) is 256 which 775 * includes the terminating NUL byte thus ntfs_to_utf8() would have 776 * aborted if the name translated to something longer than 255 bytes. 777 * 778 * As a little BUG check test it anyway... 779 */ 780 if (res_size > 0xff) 781 panic("%s(): res_size (0x%x) does not fit in 8 bits. This is " 782 "a bug!", __FUNCTION__, res_size); 783 de->d_namlen = res_size; 784 /* Add the NUL terminator byte to the name length. */ 785 res_size += offsetof(struct dirent, d_name) + 1; 786 de->d_reclen = (u16)(res_size + 3) & (u16)~3; 787 padding = de->d_reclen - res_size; 788 if (padding) 789 bzero((u8*)de + res_size, padding); 790 /* 791 * If the remaining buffer space is not big enough to store the dirent 792 * structure, return -1 to indicate that fact. 793 */ 794 if (uio_resid(uio) < de->d_reclen) 795 return -1; 796 ntfs_debug("Returning dirent with d_ino 0x%llx, d_reclen 0x%x, d_type " 797 "DT_%s, d_namlen %d, d_name \"%s\".", 798 (unsigned long long)mref, (unsigned)de->d_reclen, 799 de->d_type < 15 ? dts[de->d_type] : dts[0], 800 (unsigned)de->d_namlen, de->d_name); 801 /* 802 * Copy the dirent structure to the result buffer. uiomove() returns 803 * zero to indicate success and the (positive) error code on error so 804 * it can be clearly distinguished from us returning -1 to indicate 805 * that the buffer does not have enough space remaining. 806 */ 807 err = uiomove((caddr_t)de, de->d_reclen, uio); 808 if (!err) { 809 /* We have successfully returned another dirent structure. */ 810 (*entries)++; 811 } 812 return err; 813} 814 815/** 816 * ntfs_dirhint_get - get a directory hint 817 * @ni: ntfs index inode of directory index for which to get a hint 818 * @ofs: offset (containing tag and B+tree position) for hint to get 819 * 820 * Search through the list of hints attached to the ntfs directory index inode 821 * @ni for a directory hint with an offset @ofs. If found return that hint and 822 * if not found either allocate a new directory hint or recycle an the oldest 823 * directory hint in the list and set it up ready to use. 824 * 825 * Return the directory hint or NULL if allocating a new hint failed and no 826 * hints are present in the list so a hint could not be recycled either. 827 * 828 * The caller can tell if the hint matched by the fact that if it matched it 829 * will have a filename attached to it, i.e. ->fn_size and thus also ->fn will 830 * be non-zero and non-NULL, respectively. 831 * 832 * Locking: Caller must hold @ni->lock for writing. 833 */ 834static ntfs_dirhint *ntfs_dirhint_get(ntfs_inode *ni, unsigned ofs) 835{ 836 ntfs_dirhint *dh; 837 BOOL need_init, need_remove; 838 struct timeval tv; 839 840 microuptime(&tv); 841 /* 842 * Look for an existing hint first. If not found, create a new one 843 * (when the list is not full) or recycle the oldest hint. Since new 844 * hints are always added to the head of the list, the last hint is 845 * always the oldest. 846 */ 847 dh = NULL; 848 if (ofs & ~NTFS_DIR_POS_MASK) { 849 TAILQ_FOREACH(dh, &ni->dirhint_list, link) { 850 if (dh->ofs == ofs) 851 break; 852 } 853 } 854 /* Assume we found a directory hint in which case it is initialized. */ 855 need_init = FALSE; 856 need_remove = TRUE; 857 if (!dh) { 858 /* No directory hint matched. */ 859 need_init = TRUE; 860 if (ni->nr_dirhints < NTFS_MAX_DIRHINTS) { 861 /* 862 * Allocate a new directory hint. If the allocation 863 * fails try to recycle an existing directory hint. 864 */ 865 dh = OSMalloc(sizeof(*dh), ntfs_malloc_tag); 866 if (dh) { 867 ni->nr_dirhints++; 868 need_remove = FALSE; 869 } 870 } 871 if (!dh) { 872 /* Recycle the last, i.e. oldest, directory hint. */ 873 dh = TAILQ_LAST(&ni->dirhint_list, ntfs_dirhint_head); 874 if (dh && dh->fn_size) 875 OSFree(dh->fn, dh->fn_size, ntfs_malloc_tag); 876 } 877 } 878 /* 879 * If we managed to get a hint, move it to (or place it at if we 880 * allocated it above) the head of the list of dircetory hints of the 881 * index inode. 882 */ 883 if (dh) { 884 if (need_remove) 885 TAILQ_REMOVE(&ni->dirhint_list, dh, link); 886 TAILQ_INSERT_HEAD(&ni->dirhint_list, dh, link); 887 /* 888 * Set up the hint if it is a new hint or we recycled an old 889 * hint. 890 */ 891 if (need_init) { 892 dh->ofs = ofs; 893 dh->fn_size = 0; 894 } 895 dh->time = tv.tv_sec; 896 } 897 return dh; 898} 899 900/** 901 * ntfs_dirhint_put - put a directory hint 902 * @ni: ntfs index inode to which the directory hint belongs 903 * @dh: directory hint to free 904 * 905 * Detach the directory hint @dh from the ntfs directory index inode @ni and 906 * free it and all its resources. 907 * 908 * Locking: Caller must hold @ni->lock for writing. 909 */ 910static void ntfs_dirhint_put(ntfs_inode *ni, ntfs_dirhint *dh) 911{ 912 TAILQ_REMOVE(&ni->dirhint_list, dh, link); 913 ni->nr_dirhints--; 914 if (dh->fn_size) 915 OSFree(dh->fn, dh->fn_size, ntfs_malloc_tag); 916 OSFree(dh, sizeof(*dh), ntfs_malloc_tag); 917} 918 919/** 920 * ntfs_dirhints_put - put all directory hints 921 * @ni: ntfs index inode whose directory hints to release 922 * @stale_only: if true only release expired directory hints 923 * 924 * If @stale_only is false release all directory hints from the ntfs directory 925 * index inode @ni freeing them and all their resources. 926 * 927 * If @stale_only is true do the same as above but only release expired hints. 928 * 929 * Note we iterate from the oldest to the newest so we can stop when we reach 930 * the first valid hint if @stale_only is true. 931 * 932 * Locking: Caller must hold @ni->lock for writing. 933 */ 934void ntfs_dirhints_put(ntfs_inode *ni, BOOL stale_only) 935{ 936 ntfs_dirhint *dh, *tdh; 937 struct timeval tv; 938 939 if (stale_only) 940 microuptime(&tv); 941 TAILQ_FOREACH_REVERSE_SAFE(dh, &ni->dirhint_list, ntfs_dirhint_head, 942 link, tdh) { 943 if (stale_only) { 944 /* Stop here if this entry is too new. */ 945 if (tv.tv_sec - dh->time < NTFS_DIRHINT_TTL) 946 break; 947 } 948 ntfs_dirhint_put(ni, dh); 949 } 950} 951 952/** 953 * ntfs_readdir - read directory entries into a supplied buffer 954 * @dir_ni: directory inode to read directory entries from 955 * @uio: destination in which to return the read entries 956 * @eofflag: return end of file status (can be NULL) 957 * @numdirent: return number of entries returned (can be NULL) 958 * 959 * ntfs_readdir() reads directory entries starting at the position described by 960 * uio_offset() into the buffer pointed to by @uio in a file system independent 961 * format. Up to uio_resid() bytes of data can be returned. The data in the 962 * buffer is a series of packed dirent structures where each contains the 963 * following elements: 964 * 965 * ino_t d_ino; inode number of this entry 966 * u16 d_reclen; length of this entry record 967 * u8 d_type; inode type (see below) 968 * u8 d_namlen; length of string in d_name 969 * char d_name[MAXNAMELEN + 1]; null terminated filename 970 * 971 * The length of the record (d_reclen) must be a multiple of four. 972 * 973 * The following file types are defined: 974 * DT_UNKNOWN, DT_FIFO, DT_CHR, DT_DIR, DT_BLK, DT_REG, DT_LNK, DT_SOCK, 975 * DT_WHT 976 * 977 * The name (d_name) must be at most MAXNAMELEN + 1 bytes long including the 978 * compulsory NUL terminator. 979 * 980 * If the name length (d_namlen) is not a multiple of four, the unused space 981 * between the NUL terminator of the name and the end of the record (as 982 * specified by d_reclen which is aligned to four bytes) is filled with NUL 983 * bytes. 984 * 985 * Note how the inode number (d_ino) is only 32 bits. Thus we do not return 986 * directory entries for inodes with an inode number that does not fit in 32 987 * bits. In practice (at the present time) this is not a problem as 2^32 988 * inodes are a lot of inodes so are unlikely to be reached with existing data 989 * storage hardware that is NTFS formatted and accessed by OS X. Further, up 990 * to and including Windows XP, Windows itself limits the maximum number of 991 * inodes to 2^32. 992 * 993 * When the current position (uio_offset()) is zero, we start at the first 994 * entry in the B+tree and then follow the entries in the B+tree in sequence. 995 * We cannot ignore the B+tree and just return all the index root entries 996 * followed by all the entries from each of the in-use index allocation blocks 997 * because when an entry is added to or deleted from the directory this can 998 * reshape the B+tree thus making it impossible to continue where we left of 999 * between two VNOP_READDIR() calls and thus makes it impossible to implement 1000 * POSIX seekdir()/telldir()/readdir() semantics. 1001 * 1002 * The current position (uio_offset()) refers to the next block of entries to 1003 * be returned. The offset can only be set to a value previously returned by 1004 * ntfs_vnop_readdir() or zero. This offset does not have to match the number 1005 * of bytes returned (in uio_resid()). 1006 * 1007 * Note that whilst uio_resid() is 32-bit, uio_offset() is of type off_t which 1008 * is 64-bit in OS X but it gets cast down to a 32-bit long on ppc and i386 by 1009 * the getdirentries() system call before it is returned to user space so we 1010 * cannot use more than the lower 32-bits of the uio_offset(). 1011 * 1012 * In fact, the offset used by NTFS is essentially a numerical position as 1013 * described above (26 bits) with a tag (6 bits). The tag is for associating 1014 * the next request with the current request. This enables us to have multiple 1015 * threads reading the directory while the directory is also being modified. 1016 * 1017 * Each tag/position pair is tied to a unique directory hint. The hint 1018 * contains information (filename) needed to build the B+tree index context 1019 * path for finding the next set of entries. 1020 * 1021 * The reason not to just use a unique tag each time that identifies a 1022 * directory hint is that we have no way to expire tags/directory hints when a 1023 * directory file descriptor is closed and instead only find out when all users 1024 * of the directory have closed it via our VNOP_INACTIVE() being called. Thus, 1025 * we only can afford to keep a bounded number of tags/directory hints per 1026 * vnode thus we have to expire old tags/directory hints as new ones are added. 1027 * And when ntfs_readdir() is called with an expired tag we would have no way 1028 * of knowing where in the directory to proceed without the associated 1029 * numerical offset into the B+tree which tells us the position at which to 1030 * continue if there had not been any modifications since the tag and position 1031 * were returned by ntfs_readdir(). In practice in most cases this will still 1032 * be approximately the same location as where we left off unless a lot of 1033 * files have been created in/deleted from the directory. This is not perfect 1034 * as it means we are only POSIX compliant when a tag/directory hint has not 1035 * expired but it is a lot better than nothing so is worth doing. Also, using 1036 * only 26 bits for the numerical position in the B+tree still alows for 1037 * directories with up to 2^26-1 entries, i.e. over 67 million entries which is 1038 * likely to be quite sufficient for most intents and purposes. 1039 * 1040 * If @eofflag is not NULL, set *eofflag to 0 if we have not reached the end of 1041 * the directory yet and set it to 1 if we have reached the end of the 1042 * directory, i.e. @uio either contains nothing or it contains the last entry 1043 * in the directory. 1044 * 1045 * If @numdirent is not NULL, set *@numdirent to the number of directory 1046 * entries returned in the buffer described by @uio. 1047 * 1048 * If the directory has been deleted, i.e. @dir_ni->link_count is zero, do not 1049 * synthesize entries for "." and "..". 1050 * 1051 * Locking: Caller must hold @dir_ni->lock. 1052 */ 1053errno_t ntfs_readdir(ntfs_inode *dir_ni, uio_t uio, int *eofflag, 1054 int *numdirent) 1055{ 1056 off_t ofs; 1057 ntfs_volume *vol; 1058 struct dirent *de; 1059 ntfs_inode *ia_ni; 1060 ntfs_index_context *ictx; 1061 ntfs_dirhint *dh; 1062 int eof, entries, err; 1063 unsigned tag; 1064 /* 1065 * This is quite big to go on the stack but only half the size of the 1066 * buffers placed on the stack in ntfs_vnop_lookup() so if they are ok 1067 * so should this be. 1068 */ 1069 u8 de_buf[sizeof(struct dirent) + 4]; 1070 1071 ofs = uio_offset(uio); 1072 vol = dir_ni->vol; 1073 de = (struct dirent*)&de_buf; 1074 ia_ni = NULL; 1075 ictx = NULL; 1076 dh = NULL; 1077 err = entries = eof = tag = 0; 1078 ntfs_debug("Entering for directory inode 0x%llx, offset 0x%llx, count " 1079 "0x%llx.", (unsigned long long)dir_ni->mft_no, 1080 (unsigned long long)ofs, 1081 (unsigned long long)uio_resid(uio)); 1082 /* 1083 * If we already reached the end of the directory, there is nothing to 1084 * do. 1085 */ 1086 if ((unsigned)ofs == (unsigned)-1) 1087 goto eof; 1088 tag = (unsigned)ofs & NTFS_DIR_TAG_MASK; 1089 ofs &= NTFS_DIR_POS_MASK; 1090 /* 1091 * Sanity check the uio data. The absolute minimum buffer size 1092 * required is the number of bytes taken by the entries in the dirent 1093 * structure up to the beginning of the name plus the minimum length 1094 * for a filename of one byte plus we need to align each dirent record 1095 * to a multiple of four bytes thus effectovely the minimum name length 1096 * is four and not one. 1097 */ 1098 if (uio_resid(uio) < (unsigned)offsetof(struct dirent, d_name) + 4) { 1099 err = EINVAL; 1100 goto err; 1101 } 1102 /* 1103 * Emulate "." and ".." for all directories unless the directory has 1104 * been deleted but not closed yet. 1105 */ 1106 while (ofs < 2) { 1107 if (!dir_ni->link_count) { 1108 ofs = 2; 1109 break; 1110 } 1111 *(u32*)de->d_name = 0; 1112 de->d_name[0] = '.'; 1113 if (!ofs) { 1114 /* 1115 * We have to remap the root directory inode to inode 1116 * number 2, i.e. fsRtDirID, for compatibility with 1117 * Carbon. 1118 */ 1119 if (dir_ni->mft_no == FILE_root) 1120 de->d_ino = 2; 1121 else { 1122 if (sizeof(de->d_ino) < 8 && dir_ni->mft_no & 1123 0xffffffff00000000ULL) { 1124 ntfs_warning(vol->mp, "Skipping " 1125 "emulated dirent for " 1126 "\".\" because its " 1127 "inode number 0x%llx " 1128 "does not fit in " 1129 "32-bits.", 1130 (unsigned long long) 1131 dir_ni->mft_no); 1132 goto do_next; 1133 } 1134 de->d_ino = dir_ni->mft_no; 1135 } 1136 de->d_namlen = 1; 1137 } else { 1138 vnode_t parent_vn; 1139 1140 /* 1141 * We have to return 1, i.e. fsRtParID, for the parent 1142 * inode number of the root directory inode for 1143 * compatibility with Carbon. 1144 */ 1145 if (dir_ni->mft_no == FILE_root) 1146 de->d_ino = 1; 1147 else if ((parent_vn = vnode_getparent(dir_ni->vn))) { 1148 if (sizeof(de->d_ino) < 8 && 1149 NTFS_I(parent_vn)->mft_no & 1150 0xffffffff00000000ULL) { 1151 ntfs_warning(vol->mp, "Skipping " 1152 "emulated dirent for " 1153 "\"..\" because its " 1154 "inode number 0x%llx " 1155 "does not fit in " 1156 "32-bits.", 1157 (unsigned long long) 1158 NTFS_I(parent_vn)-> 1159 mft_no); 1160 goto do_next; 1161 } 1162 de->d_ino = NTFS_I(parent_vn)->mft_no; 1163 /* 1164 * Remap the root directory inode to inode 1165 * number 2 (see above). 1166 */ 1167 if (de->d_ino == FILE_root) 1168 de->d_ino = 2; 1169 (void)vnode_put(parent_vn); 1170 } else { 1171 MFT_REF mref; 1172 1173 /* 1174 * Look up a filename attribute in the mft 1175 * record of the directory @dir_ni and use its 1176 * parent mft reference for "..". 1177 */ 1178 err = ntfs_inode_get_name_and_parent_mref( 1179 dir_ni, FALSE, &mref, NULL); 1180 if (err) { 1181 ntfs_warning(vol->mp, "Skipping " 1182 "emulated dirent for " 1183 "\"..\" because its " 1184 "inode number could " 1185 "not be determined " 1186 "(error %d).", err); 1187 goto do_next; 1188 } 1189 if (sizeof(de->d_ino) < 8 && MREF(mref) & 1190 0xffffffff00000000ULL) { 1191 ntfs_warning(vol->mp, "Skipping " 1192 "emulated dirent for " 1193 "\"..\" because its " 1194 "inode number 0x%llx " 1195 "does not fit in " 1196 "32-bits.", 1197 (unsigned long long) 1198 MREF(mref)); 1199 goto do_next; 1200 } 1201 de->d_ino = MREF(mref); 1202 /* 1203 * Remap the root directory inode to inode 1204 * number 2 (see above). 1205 */ 1206 if (de->d_ino == FILE_root) 1207 de->d_ino = 2; 1208 } 1209 de->d_namlen = 2; 1210 de->d_name[1] = '.'; 1211 } 1212 /* 1213 * The name is one or two bytes long but we need to align the 1214 * entry record to a multiple of four bytes, thus add four 1215 * instead of one or two to the name offset. 1216 */ 1217 de->d_reclen = offsetof(struct dirent, d_name) + 4; 1218 de->d_type = DT_DIR; 1219 ntfs_debug("Returning emulated \"%s\" dirent with d_ino " 1220 "0x%llx, d_reclen 0x%x, d_type DT_DIR, " 1221 "d_namlen %d.", de->d_name, 1222 (unsigned long long)de->d_ino, 1223 (unsigned)de->d_reclen, 1224 (unsigned)de->d_namlen); 1225 err = uiomove((caddr_t)de, de->d_reclen, uio); 1226 if (err) { 1227 ntfs_error(vol->mp, "uiomove() failed for emulated " 1228 "entry (error %d).", err); 1229 goto err; 1230 } 1231 entries++; 1232do_next: 1233 /* We are done with this entry. */ 1234 ofs++; 1235 if (uio_resid(uio) < (unsigned)offsetof(struct dirent, d_name) 1236 + 4) { 1237 err = -1; 1238 goto done; 1239 } 1240 } 1241 /* Get the index allocation inode. */ 1242 err = ntfs_index_inode_get(dir_ni, I30, 4, FALSE, &ia_ni); 1243 if (err) { 1244 ntfs_error(vol->mp, "Failed to get index vnode (error %d).", 1245 err); 1246 ia_ni = NULL; 1247 goto err; 1248 } 1249 /* We need the lock exclusive because of the directory hints code. */ 1250 lck_rw_lock_exclusive(&ia_ni->lock); 1251 ictx = ntfs_index_ctx_get(ia_ni); 1252 if (!ictx) { 1253 ntfs_error(vol->mp, "Not enough memory to allocate index " 1254 "context."); 1255 err = ENOMEM; 1256 goto err; 1257 } 1258 /* 1259 * Get the directory hint matching the current tag and offset if it 1260 * exists and if not get a new directory hint. 1261 */ 1262 dh = ntfs_dirhint_get(ia_ni, ofs | tag); 1263 if (!dh) { 1264 /* 1265 * We have run out of memory and failed to allocate a new hint. 1266 * This also implies that the hint was not found thus we might 1267 * as well reset the tag to zero so we do not bother searching 1268 * for it next time. We will just use the numerical position 1269 * in the directory in order to determine where to continue the 1270 * directory lookup. 1271 */ 1272 tag = 0; 1273 goto lookup_by_position; 1274 } 1275 /* 1276 * If there is no filename attached to the directory hint, use lookup 1277 * by position in stead of by filename. 1278 */ 1279 if (!dh->fn_size) 1280 goto lookup_by_position; 1281 /* 1282 * The directory hint contains a filename, look it up and return it 1283 * to the caller. Then, continue iterating over the directory B+tree 1284 * returning each entry. If the directory entry has been deleted, the 1285 * lookup up will return the next entry in the B+tree. This needs 1286 * special handling because the found entry could be an end entry in 1287 * which case we need to switch to the next real entry. 1288 */ 1289 if (!dh->fn) 1290 panic("%s(): !dh->fn\n", __FUNCTION__); 1291 /* If the lookup fails fall back to looking up by position. */ 1292 err = ntfs_index_lookup(dh->fn, dh->fn_size, &ictx); 1293 if (!err) 1294 goto do_dirent; 1295 if (err != ENOENT) { 1296 ntfs_warning(vol->mp, "Failed to look up filename from " 1297 "directory hint (error %d), using position in " 1298 "the B+tree to continue the lookup.", err); 1299 ntfs_index_ctx_reinit(ictx, ia_ni); 1300 goto lookup_by_position; 1301 } 1302 err = 0; 1303 /* 1304 * Entry was not found, but the next one was returned. If this is a 1305 * real entry pretend that this is the entry we were looking for. 1306 */ 1307 if (!(ictx->entry->flags & INDEX_ENTRY_END)) { 1308 ictx->is_match = 1; 1309 goto do_dirent; 1310 } 1311 /* 1312 * This is an end entry which does not contain a filename. Switch to 1313 * the next real entry in the B+tree. 1314 * 1315 * Note by definition we must be in a leaf node. 1316 */ 1317 if (ictx->entry->flags & INDEX_ENTRY_NODE) 1318 panic("%s(): ictx->entry->flags & INDEX_ENTRY_NODE\n", 1319 __FUNCTION__); 1320 /* 1321 * The next entry is the first real entry above the current node thus 1322 * keep moving up the B+tree until we find a real entry. 1323 */ 1324 do { 1325 ntfs_index_context *itmp; 1326 1327 /* If we are in the index root, we are done. */ 1328 if (ictx->is_root) 1329 goto eof; 1330 /* Save the current index context so we can free it. */ 1331 itmp = ictx; 1332 /* Move up to the parent node. */ 1333 ictx = ictx->up; 1334 /* 1335 * Disconnect the old index context from its path and free it 1336 * and all its resources. 1337 */ 1338 ntfs_index_ctx_put_single(itmp); 1339 } while (ictx->entry_nr == ictx->nr_entries - 1); 1340 /* 1341 * We have reached a node with a real index entry. Lock it so we can 1342 * work on it. 1343 */ 1344 err = ntfs_index_ctx_relock(ictx); 1345 if (err) 1346 goto err; 1347 ictx->is_match = 1; 1348 goto do_dirent; 1349lookup_by_position: 1350 /* 1351 * Start a search at the beginning of the B+tree and look for the entry 1352 * number @ofs - 2. 1353 * 1354 * We need the -2 to account for the synthesized ".." and "." entries. 1355 */ 1356 err = ntfs_index_lookup_by_position(ofs - 2, 0, &ictx); 1357 /* 1358 * Starting with the current entry, iterate over all remaining entries, 1359 * returning each via a call to ntfs_do_dirent(). 1360 */ 1361 while (!err) { 1362do_dirent: 1363 /* Submit the current directory entry to our helper function. */ 1364 err = ntfs_do_dirent(vol, ictx->entry, de, uio, &entries); 1365 if (err) { 1366 /* 1367 * A negative error code means the destination @uio 1368 * did not have enough space for the directory entry. 1369 */ 1370 if (err < 0) 1371 goto done; 1372 /* Positive error code; uiomove() returned error. */ 1373 ntfs_error(vol->mp, "uiomove() failed for index %s " 1374 "entry (error %d).", 1375 ictx->is_root ? "root" : "allocation", 1376 err); 1377 goto err; 1378 } 1379 /* We are done with this entry. */ 1380 ofs++; 1381 /* Go to the next directory entry. */ 1382 err = ntfs_index_lookup_next(&ictx); 1383 } 1384 if (err != ENOENT) { 1385 ntfs_error(vol->mp, "Failed to look up index entry with " 1386 "position 0x%llx.", 1387 (unsigned long long)(ofs - 2)); 1388 goto err; 1389 } 1390eof: 1391 eof = 1; 1392 ofs = (unsigned)-1; 1393done: 1394 /* 1395 * If @err is less than zero, we got here because the @uio does not 1396 * have enough space for the next directory entry. If we have not 1397 * returned any directory entries yet, this means the buffer is too 1398 * small for even one single entry so return the appropriate error code 1399 * instead of zero. 1400 */ 1401 if (err < 0 && !entries) 1402 err = EINVAL; 1403 else 1404 err = 0; 1405err: 1406 /* 1407 * If the offset has overflown NTFS_DIR_POS_MASK we cannot record it so 1408 * just set it to the maximum we can return. This is not a problem 1409 * when we record a directory hint as is the common case and then later 1410 * use it to continue as the offset is then not actually used and 1411 * instead the name is used which is independent of its location. In 1412 * this case however do update the tag so that we return a different 1413 * apparent offset to the caller between invocations. 1414 * 1415 * Note we have to avoid @ofs becomming (unsigned)-1 because we use 1416 * that to denote end of directory. 1417 */ 1418 if (!eof && ofs & ~(off_t)NTFS_DIR_POS_MASK) { 1419 ofs = NTFS_DIR_POS_MASK; 1420 tag = (unsigned)(++ia_ni->dirhint_tag) << NTFS_DIR_TAG_SHIFT; 1421 if (!tag || (tag | NTFS_DIR_POS_MASK) == (unsigned)-1) { 1422 ia_ni->dirhint_tag = 1; 1423 tag = (unsigned)1 << NTFS_DIR_TAG_SHIFT; 1424 } 1425 } 1426 /* 1427 * If we have a directory hint, update it with the current search state 1428 * so the next call can continue where we stopped. 1429 */ 1430 if (dh) { 1431 unsigned size; 1432 1433 if (eof || err) { 1434 /* 1435 * The end of the directory was reached or an error 1436 * occurred. Discard the directory hint. 1437 */ 1438 ntfs_dirhint_put(ia_ni, dh); 1439 goto dh_done; 1440 } 1441 /* 1442 * Add the current name to the directory hint. This is the 1443 * next name we need to return to the caller. If there is an 1444 * old name then reuse its buffer if the two are the same size 1445 * and otherwise free the old name first. 1446 */ 1447 size = le16_to_cpu(ictx->entry->key_length); 1448 if (dh->fn_size != size) { 1449 if (dh->fn_size) 1450 OSFree(dh->fn, dh->fn_size, ntfs_malloc_tag); 1451 dh->fn = OSMalloc(size, ntfs_malloc_tag); 1452 if (!dh->fn) { 1453 /* 1454 * Not enough memory to set up the directory 1455 * hint. Just throw it away and set the tag to 1456 * zero so we continue by position next time. 1457 */ 1458 dh->fn_size = 0; 1459 ntfs_dirhint_put(ia_ni, dh); 1460 tag = 0; 1461 goto dh_done; 1462 } 1463 dh->fn_size = size; 1464 } 1465 memcpy(dh->fn, &ictx->entry->key.filename, size); 1466 /* 1467 * If the current tag is zero, we need to assign a new tag. 1468 * 1469 * Note we have to avoid @ofs becomming (unsigned)-1 because we 1470 * use that to denote end of directory. 1471 */ 1472 if (!tag) { 1473 tag = (unsigned)(++ia_ni->dirhint_tag) << 1474 NTFS_DIR_TAG_SHIFT; 1475 if (!tag || (tag | NTFS_DIR_POS_MASK) == (unsigned)-1) { 1476 ia_ni->dirhint_tag = 1; 1477 tag = (unsigned)1 << NTFS_DIR_TAG_SHIFT; 1478 } 1479 } 1480 /* Finally set the directory hint to the current offset. */ 1481 dh->ofs = ofs | tag; 1482 } 1483dh_done: 1484 if (ictx) 1485 ntfs_index_ctx_put(ictx); 1486 if (ia_ni) { 1487 lck_rw_unlock_exclusive(&ia_ni->lock); 1488 (void)vnode_put(ia_ni->vn); 1489 } 1490 ntfs_debug("%s (returned 0x%x entries, %s, now at offset 0x%llx).", 1491 err ? "Failed" : "Done", entries, eof ? 1492 "reached end of directory" : "more entries to follow", 1493 (unsigned long long)ofs); 1494 if (eofflag) 1495 *eofflag = eof; 1496 if (numdirent) 1497 *numdirent = entries; 1498 uio_setoffset(uio, ofs | tag); 1499 return err; 1500} 1501 1502/** 1503 * ntfs_dir_is_empty - check if a directory is empty 1504 * @dir_ni: ntfs inode of directory to check 1505 * 1506 * Check if the directory inode @ni is empty. 1507 * 1508 * Return 0 if empty, ENOTEMPTY if not empty, and errno (not ENOTEMPTY) on 1509 * error. 1510 * 1511 * Locking: Caller must hold @dir_ni->lock for writing. 1512 */ 1513errno_t ntfs_dir_is_empty(ntfs_inode *dir_ni) 1514{ 1515 s64 bmp_size, prev_ia_pos, bmp_pos, ia_pos; 1516 ntfs_inode *ia_ni, *bmp_ni = NULL; 1517 ntfs_volume *vol = dir_ni->vol; 1518 MFT_RECORD *m; 1519 ntfs_attr_search_ctx *ctx; 1520 INDEX_ROOT *ir; 1521 u8 *index_end, *bmp, *kaddr; 1522 INDEX_ENTRY *ie; 1523 upl_t bmp_upl, ia_upl = NULL; 1524 upl_page_info_array_t bmp_pl, ia_pl; 1525 INDEX_ALLOCATION *ia; 1526 errno_t err; 1527 int bmp_ofs; 1528 static const char es[] = "%s. Directory mft_no 0x%llx is corrupt. " 1529 "Run chkdsk."; 1530 static const char es1[] = ". Directory mft_no 0x"; 1531 static const char es2[] = " is corrupt. Run chkdsk."; 1532 1533 ntfs_debug("Entering for directory mft_no 0x%llx.", 1534 (unsigned long long)dir_ni->mft_no); 1535 if (!S_ISDIR(dir_ni->mode)) 1536 return ENOTDIR; 1537 /* Get the index allocation inode. */ 1538 err = ntfs_index_inode_get(dir_ni, I30, 4, FALSE, &ia_ni); 1539 if (err) { 1540 ntfs_error(vol->mp, "Failed to get index inode (error %d).", 1541 err); 1542 return err; 1543 } 1544 lck_rw_lock_shared(&ia_ni->lock); 1545 /* Get the index bitmap inode if there is one. */ 1546 if (NInoIndexAllocPresent(ia_ni)) { 1547 err = ntfs_attr_inode_get(dir_ni, AT_BITMAP, I30, 4, FALSE, 1548 LCK_RW_TYPE_SHARED, &bmp_ni); 1549 if (err) { 1550 ntfs_error(vol->mp, "Failed to get index bitmap inode " 1551 "(error %d).", err); 1552 bmp_ni = NULL; 1553 goto err; 1554 } 1555 } 1556 /* Get hold of the mft record for the directory. */ 1557 err = ntfs_mft_record_map(dir_ni, &m); 1558 if (err) { 1559 ntfs_error(vol->mp, "Failed to map mft record for directory " 1560 "(error %d).", err); 1561 goto err; 1562 } 1563 ctx = ntfs_attr_search_ctx_get(dir_ni, m); 1564 if (!ctx) { 1565 ntfs_error(vol->mp, "Failed to get attribute search context."); 1566 err = ENOMEM; 1567 goto unm_err; 1568 } 1569 /* Find the index root attribute in the mft record. */ 1570 err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, 0, NULL, 0, ctx); 1571 if (err) { 1572 if (err == ENOENT) { 1573 ntfs_error(vol->mp, "Index root attribute missing in " 1574 "directory inode 0x%llx.", 1575 (unsigned long long)dir_ni->mft_no); 1576 NVolSetErrors(vol); 1577 err = EIO; 1578 } else 1579 ntfs_error(vol->mp, "Failed to lookup index root " 1580 "attribute in directory inode 0x%llx " 1581 "(error %d).", 1582 (unsigned long long)dir_ni->mft_no, 1583 err); 1584 goto put_err; 1585 } 1586 /* 1587 * Get to the index root value (it has been verified in 1588 * ntfs_inode_read()). 1589 */ 1590 ir = (INDEX_ROOT*)((u8*)ctx->a + le16_to_cpu(ctx->a->value_offset)); 1591 index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); 1592 /* The first index entry. */ 1593 ie = (INDEX_ENTRY*)((u8*)&ir->index + 1594 le32_to_cpu(ir->index.entries_offset)); 1595 /* Bounds checks. */ 1596 if ((u8*)ie < (u8*)&ir->index || 1597 (u8*)ie + sizeof(INDEX_ENTRY_HEADER) > index_end || 1598 (u8*)ie + le16_to_cpu(ie->key_length) > index_end) 1599 goto dir_err; 1600 /* 1601 * If this is not the end node, it is a filename and thus the directory 1602 * is not empty. 1603 * 1604 * If it is the end node, and there is no sub-node hanging off it, the 1605 * directory is empty. 1606 */ 1607 if (!(ie->flags & INDEX_ENTRY_END)) 1608 err = ENOTEMPTY; 1609 else if (!(ie->flags & INDEX_ENTRY_NODE)) { 1610 /* Set @err to 1 so we can detect that we are done below. */ 1611 err = 1; 1612 } 1613 ntfs_attr_search_ctx_put(ctx); 1614 ntfs_mft_record_unmap(dir_ni); 1615 if (err) { 1616 /* Undo the setting of @err to 1 we did above. */ 1617 if (err == 1) 1618 err = 0; 1619 goto done; 1620 } 1621 /* 1622 * We only get here if the index root indicated that there is a 1623 * sub-node thus there must be an index allocation attribute. 1624 */ 1625 if (!NInoIndexAllocPresent(ia_ni)) { 1626 ntfs_error(vol->mp, "No index allocation attribute but index " 1627 "entry requires one. Directory inode 0x%llx " 1628 "is corrupt or driver bug.", 1629 (unsigned long long)dir_ni->mft_no); 1630 goto dir_err; 1631 } 1632 lck_spin_lock(&bmp_ni->size_lock); 1633 bmp_size = bmp_ni->data_size; 1634 lck_spin_unlock(&bmp_ni->size_lock); 1635 ia_pos = bmp_pos = bmp_ofs = 0; 1636 prev_ia_pos = -1; 1637get_next_bmp_page: 1638 ntfs_debug("Reading index bitmap offset 0x%llx, bit offset 0x%x.", 1639 (unsigned long long)bmp_pos >> 3, bmp_ofs); 1640 /* 1641 * Convert bit position to byte offset in the index bitmap attribute 1642 * and map the corresponding page. 1643 */ 1644 err = ntfs_page_map(bmp_ni, (bmp_pos >> 3) & ~PAGE_MASK_64, &bmp_upl, 1645 &bmp_pl, &bmp, FALSE); 1646 if (err) { 1647 ntfs_error(vol->mp, "Failed to read directory index bitmap " 1648 "buffer (error %d).", err); 1649 bmp_upl = NULL; 1650 goto page_err; 1651 } 1652 /* Find the next index block which is marked in use. */ 1653 while (!(bmp[bmp_ofs >> 3] & (1 << (bmp_ofs & 7)))) { 1654find_next_index_buffer: 1655 bmp_ofs++; 1656 /* 1657 * If we have reached the end of the bitmap, the directory is 1658 * empty. 1659 */ 1660 if (((bmp_pos + bmp_ofs) >> 3) >= bmp_size) 1661 goto unm_done; 1662 ia_pos = (bmp_pos + bmp_ofs) << ia_ni->block_size_shift; 1663 /* 1664 * If we have reached the end of the bitmap block get the next 1665 * page and unmap away the old one. 1666 */ 1667 if ((bmp_ofs >> 3) >= PAGE_SIZE) { 1668 ntfs_page_unmap(bmp_ni, bmp_upl, bmp_pl, FALSE); 1669 bmp_pos += PAGE_SIZE * 8; 1670 bmp_ofs = 0; 1671 goto get_next_bmp_page; 1672 } 1673 } 1674 ntfs_debug("Handling index allocation block 0x%llx.", 1675 (unsigned long long)bmp_pos + bmp_ofs); 1676 /* If the current index block is in the same buffer we reuse it. */ 1677 if ((prev_ia_pos & ~PAGE_MASK_64) != (ia_pos & ~PAGE_MASK_64)) { 1678 prev_ia_pos = ia_pos; 1679 if (ia_upl) 1680 ntfs_page_unmap(ia_ni, ia_upl, ia_pl, FALSE); 1681 /* Map the page containing the index allocation block. */ 1682 err = ntfs_page_map(ia_ni, ia_pos & ~PAGE_MASK_64, &ia_upl, 1683 &ia_pl, &kaddr, FALSE); 1684 if (err) { 1685 ntfs_error(vol->mp, "Failed to read directory index " 1686 "allocation page (error %d).", err); 1687 ia_upl = NULL; 1688 goto page_err; 1689 } 1690 } 1691 /* Get the current index allocation block inside the mapped page. */ 1692 ia = (INDEX_ALLOCATION*)(kaddr + ((u32)ia_pos & PAGE_MASK & 1693 ~(ia_ni->block_size - 1))); 1694 /* Bounds checks. */ 1695 if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) { 1696 ntfs_error(vol->mp, es, "Out of bounds check failed", 1697 (unsigned long long)dir_ni->mft_no); 1698 goto vol_err; 1699 } 1700 /* Catch multi sector transfer fixup errors. */ 1701 if (!ntfs_is_indx_record(ia->magic)) { 1702 ntfs_error(vol->mp, "Multi sector transfer error detected in " 1703 "index record vcn 0x%llx%s%llx%s", 1704 (unsigned long long)ia_pos >> 1705 ia_ni->vcn_size_shift, es1, 1706 (unsigned long long)dir_ni->mft_no, es2); 1707 goto vol_err; 1708 } 1709 if (sle64_to_cpu(ia->index_block_vcn) != (ia_pos & 1710 ~(s64)(ia_ni->block_size - 1)) >> 1711 ia_ni->vcn_size_shift) { 1712 ntfs_error(vol->mp, "Actual VCN (0x%llx) of index record is " 1713 "different from expected VCN (0x%llx)%s%llx%s", 1714 (unsigned long long) 1715 sle64_to_cpu(ia->index_block_vcn), 1716 (unsigned long long)ia_pos >> 1717 ia_ni->vcn_size_shift, es1, 1718 (unsigned long long)dir_ni->mft_no, es2); 1719 goto vol_err; 1720 } 1721 if (offsetof(INDEX_BLOCK, index) + 1722 le32_to_cpu(ia->index.allocated_size) != 1723 ia_ni->block_size) { 1724 ntfs_error(vol->mp, "Index buffer (VCN 0x%llx) has a size " 1725 "(%u) differing from the directory specified " 1726 "size (%u)%s%llx%s", (unsigned long long) 1727 (unsigned long long) 1728 sle64_to_cpu(ia->index_block_vcn), 1729 (unsigned)(offsetof(INDEX_BLOCK, index) + 1730 le32_to_cpu(ia->index.allocated_size)), 1731 (unsigned)ia_ni->block_size, es1, 1732 (unsigned long long)dir_ni->mft_no, es2); 1733 goto vol_err; 1734 } 1735 index_end = (u8*)ia + ia_ni->block_size; 1736 if (index_end > kaddr + PAGE_SIZE) { 1737 ntfs_error(vol->mp, "Index buffer (VCN 0x%llx) of directory " 1738 "inode 0x%llx crosses page boundary. This " 1739 "cannot happen and points either to memory " 1740 "corruption or to a driver bug.", 1741 (unsigned long long) 1742 sle64_to_cpu(ia->index_block_vcn), 1743 (unsigned long long)dir_ni->mft_no); 1744 goto vol_err; 1745 } 1746 index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); 1747 if (index_end > (u8*)ia + ia_ni->block_size) { 1748 ntfs_error(vol->mp, "Size of index block (VCN 0x%llx) " 1749 "exceeds maximum size%s%llx%s", 1750 (unsigned long long) 1751 sle64_to_cpu(ia->index_block_vcn), es1, 1752 (unsigned long long)dir_ni->mft_no, es2); 1753 goto vol_err; 1754 } 1755 /* The first index entry. */ 1756 ie = (INDEX_ENTRY*)((u8*)&ia->index + 1757 le32_to_cpu(ia->index.entries_offset)); 1758 /* Bounds checks. */ 1759 if ((u8*)ie < (u8*)&ia->index || 1760 (u8*)ie + sizeof(INDEX_ENTRY_HEADER) > index_end || 1761 (u8*)ie + le16_to_cpu(ie->key_length) > index_end) 1762 goto dir_err; 1763 /* 1764 * If this is the end node, it is not a filename so we continue to the 1765 * next index block. 1766 */ 1767 if (ie->flags & INDEX_ENTRY_END) 1768 goto find_next_index_buffer; 1769 /* 1770 * This is not the end node, i.e. it is a filename and thus the 1771 * directory is not empty. 1772 */ 1773 err = ENOTEMPTY; 1774unm_done: 1775 if (ia_upl) 1776 ntfs_page_unmap(ia_ni, ia_upl, ia_pl, FALSE); 1777 ntfs_page_unmap(bmp_ni, bmp_upl, bmp_pl, FALSE); 1778done: 1779 if (bmp_ni) { 1780 lck_rw_unlock_shared(&bmp_ni->lock); 1781 (void)vnode_put(bmp_ni->vn); 1782 } 1783 lck_rw_unlock_shared(&ia_ni->lock); 1784 (void)vnode_put(ia_ni->vn); 1785 ntfs_debug("Done (directory is%s empty).", !err ? "" : " not"); 1786 return err; 1787dir_err: 1788 ntfs_error(vol->mp, "Corrupt directory inode 0x%llx. Run chkdsk.", 1789 (unsigned long long)dir_ni->mft_no); 1790 NVolSetErrors(vol); 1791 /* 1792 * If @ia_upl is not NULL we got here from the index allocation related 1793 * code paths and if it is NULL we got here from the index root related 1794 * code paths. 1795 */ 1796 if (ia_upl) 1797 goto page_err; 1798 err = EIO; 1799put_err: 1800 ntfs_attr_search_ctx_put(ctx); 1801unm_err: 1802 ntfs_mft_record_unmap(dir_ni); 1803err: 1804 if (bmp_ni) { 1805 lck_rw_unlock_shared(&bmp_ni->lock); 1806 (void)vnode_put(bmp_ni->vn); 1807 } 1808 lck_rw_unlock_shared(&ia_ni->lock); 1809 (void)vnode_put(ia_ni->vn); 1810 return err; 1811vol_err: 1812 NVolSetErrors(vol); 1813page_err: 1814 if (!err) 1815 err = EIO; 1816 if (ia_upl) 1817 ntfs_page_unmap(ia_ni, ia_upl, ia_pl, FALSE); 1818 if (bmp_upl) 1819 ntfs_page_unmap(bmp_ni, bmp_upl, bmp_pl, FALSE); 1820 goto err; 1821} 1822 1823/** 1824 * ntfs_dir_entry_delete - delete a directory index entry 1825 * @dir_ni: directory ntfs inode from which to delete the index entry 1826 * @ni: base ntfs inode which the filename @fn links to 1827 * @fn: filename attribute describing index entry to delete 1828 * @fn_len: size of filename attribute in bytes 1829 * 1830 * Find the directory index entry corresponding to the filename attribute @fn 1831 * of size @fn_len bytes in the directory index of the directory ntfs inode 1832 * @dir_ni. 1833 * 1834 * Assuming the filename is present in the directory index, delete it from the 1835 * index. @ni is the inode which the filename @fn links to. 1836 * 1837 * Return 0 on success and errno on error. 1838 * 1839 * Locking: Caller must hold both @dir_ni->lock and @ni->lock for writing. 1840 */ 1841errno_t ntfs_dir_entry_delete(ntfs_inode *dir_ni, ntfs_inode *ni, 1842 const FILENAME_ATTR *fn, const u32 fn_len) 1843{ 1844 ntfs_volume *vol = ni->vol; 1845 ntfs_inode *ia_ni; 1846 ntfs_index_context *ictx; 1847 INDEX_ENTRY *ie; 1848 int err; 1849 FILENAME_TYPE_FLAGS fn_type; 1850 1851 ntfs_debug("Entering for mft_no 0x%llx, parent directory mft_no " 1852 "0x%llx.", (unsigned long long)ni->mft_no, 1853 (unsigned long long)dir_ni->mft_no); 1854 if (!S_ISDIR(dir_ni->mode)) 1855 panic("%s(): !S_ISDIR(dir_ni->mode\n", __FUNCTION__); 1856 /* 1857 * Verify that the mft reference of the parent directory specified in 1858 * the filename to be removed matches the mft reference of the parent 1859 * directory inode. 1860 */ 1861 if (fn->parent_directory != MK_LE_MREF(dir_ni->mft_no, 1862 dir_ni->seq_no)) { 1863 ntfs_error(vol->mp, "The reference of the parent directory " 1864 "specified in the filename to be removed does " 1865 "not match the reference of the parent " 1866 "directory inode. Volume is corrupt. Run " 1867 "chkdsk."); 1868 NVolSetErrors(vol); 1869 return EIO; 1870 } 1871 /* 1872 * We are now ok to go ahead and delete the directory index entry. 1873 * 1874 * Get the index allocation inode. 1875 */ 1876 err = ntfs_index_inode_get(dir_ni, I30, 4, FALSE, &ia_ni); 1877 if (err) { 1878 ntfs_error(vol->mp, "Failed to get index vnode (error %d).", 1879 err); 1880 return EIO; 1881 } 1882 /* Need exclusive access to the index throughout. */ 1883 lck_rw_lock_exclusive(&ia_ni->lock); 1884 ictx = ntfs_index_ctx_get(ia_ni); 1885 if (!ictx) { 1886 ntfs_error(vol->mp, "Not enough memory to allocate index " 1887 "context."); 1888 err = ENOMEM; 1889 goto err; 1890 } 1891restart: 1892 /* Get the index entry matching the filename @fn. */ 1893 err = ntfs_index_lookup(fn, fn_len, &ictx); 1894 if (err) { 1895 if (err == ENOENT) { 1896 ntfs_error(vol->mp, "Failed to delete directory index " 1897 "entry of mft_no 0x%llx because the " 1898 "filename was not found in its parent " 1899 "directory index. Directory 0x%llx " 1900 "is corrupt. Run chkdsk.", 1901 (unsigned long long)ni->mft_no, 1902 (unsigned long long)dir_ni->mft_no); 1903 NVolSetErrors(vol); 1904 } else 1905 ntfs_error(vol->mp, "Failed to delete directory index " 1906 "entry of mft_no 0x%llx because " 1907 "looking up the filename in its " 1908 "parent directory 0x%llx failed " 1909 "(error %d).", 1910 (unsigned long long)ni->mft_no, 1911 (unsigned long long)dir_ni->mft_no, 1912 err); 1913 goto put_err; 1914 } 1915 ie = ictx->entry; 1916 /* 1917 * Verify that the mft reference of the parent directory specified in 1918 * the filename to be removed matches the mft reference of the parent 1919 * directory specified in the found index entry. 1920 */ 1921 if (fn->parent_directory != ie->key.filename.parent_directory) { 1922 ntfs_error(vol->mp, "The reference of the parent directory " 1923 "(0x%llx) specified in the filename to be " 1924 "removed does not match the reference of the " 1925 "parent directory (0x%llx) specified in the " 1926 "matching directory index entry. Volume is " 1927 "corrupt. Run chkdsk.", (unsigned long long) 1928 le64_to_cpu(fn->parent_directory), 1929 (unsigned long long)le64_to_cpu( 1930 ie->key.filename.parent_directory)); 1931 NVolSetErrors(vol); 1932 err = EIO; 1933 goto put_err; 1934 } 1935 /* 1936 * Verify that the mft reference of the inode to which the filename to 1937 * be removed belongs matches the mft reference of the inode pointed to 1938 * by the found index entry. 1939 */ 1940 if (MK_LE_MREF(ni->mft_no, ni->seq_no) != ie->indexed_file) { 1941 ntfs_error(vol->mp, "The reference of the inode (0x%llx) to " 1942 "which the filename to be removed belongs " 1943 "does not match the reference of the inode " 1944 "(0x%llx) specified in the matching directory " 1945 "index entry. Volume is corrupt. Run " 1946 "chkdsk.", (unsigned long long) 1947 MK_MREF(ni->mft_no, ni->seq_no), 1948 (unsigned long long) 1949 le64_to_cpu(ie->indexed_file)); 1950 NVolSetErrors(vol); 1951 err = EIO; 1952 goto put_err; 1953 } 1954 fn_type = ie->key.filename.filename_type; 1955 /* We now have the directory index entry, delete it. */ 1956 err = ntfs_index_entry_delete(ictx); 1957 if (!err) { 1958 ntfs_index_ctx_put(ictx); 1959 /* Update the mtime and ctime in the parent directory inode. */ 1960 dir_ni->last_mft_change_time = dir_ni->last_data_change_time = 1961 ntfs_utc_current_time(); 1962 NInoSetDirtyTimes(dir_ni); 1963 lck_rw_unlock_exclusive(&ia_ni->lock); 1964 (void)vnode_put(ia_ni->vn); 1965 ntfs_debug("Done."); 1966 return 0; 1967 } 1968 /* 1969 * If the tree got rearranged in some unpredictable way and we 1970 * chickened out of working through it, we now reinitialize the index 1971 * context (as it is now invalid) and then redo the lookup and delete. 1972 * 1973 * Note we use a negative -EAGAIN to distinguish from a potential real 1974 * EAGAIN error. 1975 */ 1976 if (err == -EAGAIN) { 1977 ntfs_debug("Restarting delete as tree was rearranged."); 1978 ntfs_index_ctx_reinit(ictx, ia_ni); 1979 goto restart; 1980 } 1981 /* 1982 * Failed to delete the directory index entry. 1983 * 1984 * If the filename @fn is in the POSIX namespace but the directory 1985 * index entry is in the WIN32 namespace, convert the directory index 1986 * entry to the POSIX namespace. See comments above @restart_name 1987 * label in ntfs_vnops.c::ntfs_vnop_remove() for an explanation of when 1988 * this happens and why we need to do this. 1989 */ 1990 if (fn_type == FILENAME_WIN32 && fn->filename_type == FILENAME_POSIX) { 1991 errno_t err2; 1992 1993 ntfs_debug("Switching namespace of directory index entry from " 1994 "WIN32 to POSIX to match the namespace of the " 1995 "corresponding filename attribute."); 1996 /* 1997 * The old index context is now invalid, so need to redo the 1998 * index lookup. 1999 */ 2000 ntfs_index_ctx_reinit(ictx, ia_ni); 2001 err2 = ntfs_index_lookup(fn, fn_len, &ictx); 2002 if (err2) { 2003 ntfs_error(vol->mp, "Failed to switch namespace of " 2004 "directory index entry of inode " 2005 "0x%llx from WIN32 to POSIX because " 2006 "re-looking up the filename in its " 2007 "parent directory inode 0x%llx failed " 2008 "(error %d). Leaving inconsistent " 2009 "metadata. Run chkdsk.", 2010 (unsigned long long)ni->mft_no, 2011 (unsigned long long)dir_ni->mft_no, 2012 err2); 2013 NVolSetErrors(vol); 2014 goto put_err; 2015 } 2016 ictx->entry->key.filename.filename_type = FILENAME_POSIX; 2017 ntfs_index_entry_mark_dirty(ictx); 2018 dir_ni->last_mft_change_time = dir_ni->last_data_change_time = 2019 ntfs_utc_current_time(); 2020 NInoSetDirtyTimes(dir_ni); 2021 } 2022put_err: 2023 ntfs_index_ctx_put(ictx); 2024err: 2025 lck_rw_unlock_exclusive(&ia_ni->lock); 2026 (void)vnode_put(ia_ni->vn); 2027 ntfs_debug("Failed (error %d).", err); 2028 return err; 2029} 2030 2031/** 2032 * ntfs_dir_entry_add - add a directory index entry 2033 * @dir_ni: directory ntfs inode to which to add the index entry 2034 * @fn: filename attribute describing index entry to add 2035 * @fn_len: size of filename attribute in bytes 2036 * @mref: mft reference of the inode the filename @fn belongs to 2037 * 2038 * Find the directory index entry corresponding to the filename attribute @fn 2039 * of size @fn_len bytes in the directory index of the directory ntfs inode 2040 * @dir_ni. 2041 * 2042 * Assuming the filename is not already present in the directory index, add it 2043 * to the index and point the inserted index entry at the mft reference @mref 2044 * which is the little endian mft reference of the inode to which the filename 2045 * attribute @fn belongs. 2046 * 2047 * If the filename is already present in the directory index, abort and return 2048 * the error code EEXIST. 2049 * 2050 * Return 0 on success and errno on error. 2051 * 2052 * Locking: Caller must hold @dir_ni->lock for writing. 2053 */ 2054errno_t ntfs_dir_entry_add(ntfs_inode *dir_ni, const FILENAME_ATTR *fn, 2055 const u32 fn_len, const leMFT_REF mref) 2056{ 2057 const leMFT_REF tmp_mref = mref; 2058 ntfs_inode *ia_ni; 2059 ntfs_index_context *ictx; 2060 errno_t err; 2061 2062 ntfs_debug("Entering for mft_no 0x%llx, parent directory mft_no " 2063 "0x%llx.", (unsigned long long)MREF_LE(tmp_mref), 2064 (unsigned long long)dir_ni->mft_no); 2065 if (!S_ISDIR(dir_ni->mode)) 2066 panic("%s(): !S_ISDIR(dir_ni->mode\n", __FUNCTION__); 2067 /* Get the index allocation inode. */ 2068 err = ntfs_index_inode_get(dir_ni, I30, 4, FALSE, &ia_ni); 2069 if (err) { 2070 ntfs_error(dir_ni->vol->mp, "Failed to get index vnode (error " 2071 "%d).", err); 2072 return err; 2073 } 2074 /* Need exclusive access to the index throughout. */ 2075 lck_rw_lock_exclusive(&ia_ni->lock); 2076 ictx = ntfs_index_ctx_get(ia_ni); 2077 if (!ictx) { 2078 ntfs_error(dir_ni->vol->mp, "Not enough memory to allocate " 2079 "index context."); 2080 err = ENOMEM; 2081 goto err; 2082 } 2083 /* 2084 * Get the index entry matching the filename @fn and if not present get 2085 * the position at which the new index entry needs to be inserted. 2086 */ 2087 err = ntfs_index_lookup(fn, fn_len, &ictx); 2088 if (err != ENOENT) { 2089 if (!err) { 2090 ntfs_debug("Failed (filename already present in " 2091 "directory index)."); 2092 err = EEXIST; 2093 } else 2094 ntfs_error(dir_ni->vol->mp, "Failed to add directory " 2095 "index entry of mft_no 0x%llx to " 2096 "directory mft_no 0x%llx because " 2097 "looking up the filename in the " 2098 "directory index failed (error %d).", 2099 (unsigned long long)MREF_LE(tmp_mref), 2100 (unsigned long long)dir_ni->mft_no, 2101 err); 2102 ntfs_index_ctx_put(ictx); 2103 goto err; 2104 } 2105 /* 2106 * Create a new directory index entry inserting it in front of the 2107 * entry described by the index context. 2108 */ 2109 err = ntfs_index_entry_add(ictx, fn, fn_len, &tmp_mref, 0); 2110 ntfs_index_ctx_put(ictx); 2111 if (!err) { 2112 lck_rw_unlock_exclusive(&ia_ni->lock); 2113 (void)vnode_put(ia_ni->vn); 2114 /* Update the mtime and ctime of the parent directory inode. */ 2115 dir_ni->last_mft_change_time = dir_ni->last_data_change_time = 2116 ntfs_utc_current_time(); 2117 NInoSetDirtyTimes(dir_ni); 2118 ntfs_debug("Done."); 2119 return 0; 2120 } 2121err: 2122 lck_rw_unlock_exclusive(&ia_ni->lock); 2123 (void)vnode_put(ia_ni->vn); 2124 return err; 2125} 2126