1/* 2 * ntfs_mft.c - NTFS kernel mft record operations. 3 * 4 * Copyright (c) 2006-2011 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2011 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/buf.h> 39#include <sys/errno.h> 40#include <sys/param.h> 41#include <sys/stat.h> 42#include <sys/types.h> 43#include <sys/ucred.h> 44#include <sys/ubc.h> 45#include <sys/vnode.h> 46 47#include <string.h> 48 49#include <libkern/libkern.h> 50#include <libkern/OSAtomic.h> 51#include <libkern/OSMalloc.h> 52 53#include <kern/debug.h> 54#include <kern/locks.h> 55 56#include "ntfs.h" 57#include "ntfs_attr.h" 58#include "ntfs_bitmap.h" 59#include "ntfs_debug.h" 60#include "ntfs_dir.h" 61#include "ntfs_endian.h" 62#include "ntfs_hash.h" 63#include "ntfs_inode.h" 64#include "ntfs_layout.h" 65#include "ntfs_lcnalloc.h" 66#include "ntfs_mft.h" 67#include "ntfs_page.h" 68#include "ntfs_secure.h" 69#include "ntfs_time.h" 70#include "ntfs_types.h" 71#include "ntfs_volume.h" 72 73/** 74 * ntfs_mft_record_map_ext - map an mft record 75 * @ni: ntfs inode whose mft record to map 76 * @mrec: destination pointer for the mapped mft record 77 * @mft_is_locked: if true the caller holds the mft lock (@mft_ni->lock) 78 * 79 * The buffer containing the mft record belonging to the ntfs inode @ni is 80 * mapped which on OS X means it is held for exclusive via the BL_BUSY flag in 81 * the buffer. The mapped mft record is returned in *@m. 82 * 83 * If @mft_is_locked is true the caller holds the mft lock (@mft_ni->lock) thus 84 * ntfs_mft_record_map_ext() will not try to take the same lock. It is then 85 * the responsibility of the caller that the mft is consistent and stable for 86 * the duration of the call. 87 * 88 * Return 0 on success and errno on error. 89 * 90 * Note: Caller must hold an iocount reference on the vnode of the base inode 91 * of @ni. 92 */ 93errno_t ntfs_mft_record_map_ext(ntfs_inode *ni, MFT_RECORD **mrec, 94 const BOOL mft_is_locked) 95{ 96 ntfs_volume *vol; 97 ntfs_inode *mft_ni; 98 buf_t buf; 99 MFT_RECORD *m; 100 errno_t err; 101 102 ntfs_debug("Entering for mft_no 0x%llx (mft is %slocked).", 103 (unsigned long long)ni->mft_no, 104 mft_is_locked ? "" : "not "); 105 if (NInoAttr(ni)) 106 panic("%s(): Called for attribute inode.\n", __FUNCTION__); 107 vol = ni->vol; 108 mft_ni = vol->mft_ni; 109 /* 110 * If the volume is in the process of being unmounted then @vol->mft_ni 111 * may have become NULL in which case we need to bail out. 112 */ 113 if (!mft_ni) { 114 /* 115 * @vol->mp may be NULL now which is ok. ntfs_error() deals 116 * with this case gracefully. 117 */ 118 ntfs_error(vol->mp, "The volume is being unmounted, bailing " 119 "out (you can ignore any errors following " 120 "this one)."); 121 return EINVAL; 122 } 123 /* Get an iocount reference on the $MFT vnode. */ 124 err = vnode_get(mft_ni->vn); 125 if (err) { 126 ntfs_error(vol->mp, "Failed to get vnode for $MFT."); 127 return err; 128 } 129 if (!mft_is_locked) 130 lck_rw_lock_shared(&mft_ni->lock); 131 /* 132 * If the wanted mft record number is out of bounds the mft record does 133 * not exist. 134 */ 135 lck_spin_lock(&mft_ni->size_lock); 136 if (ni->mft_no > (ino64_t)(mft_ni->data_size >> 137 vol->mft_record_size_shift)) { 138 lck_spin_unlock(&mft_ni->size_lock); 139 ntfs_error(vol->mp, "Attempt to read mft record 0x%llx, which " 140 "is beyond the end of the mft.", 141 (unsigned long long)ni->mft_no); 142 err = ENOENT; 143 goto err; 144 } 145 lck_spin_unlock(&mft_ni->size_lock); 146 /* 147 * We implement access to $MFT/$DATA by mapping the buffer containing 148 * the mft record into memory using buf_meta_bread() which takes care 149 * of reading the buffer in if it is not in memory already and removing 150 * the mst protection fixups. 151 * 152 * In case we ever care, we know whether buf_meta_bread() found the 153 * buffer already in memory or whether it read it in because in the 154 * former case buf_fromcache(buf) will be true and in the latter case 155 * it will be false. 156 * 157 * Similarly we know if the buffer was already dirty or not by checking 158 * buf_flags(buf) & B_DELWRI. 159 */ 160 ntfs_debug("Calling buf_meta_bread()."); 161 err = buf_meta_bread(mft_ni->vn, ni->mft_no, vol->mft_record_size, 162 NOCRED, &buf); 163 ntfs_debug("After buf_meta_bread()."); 164 if (err) { 165 ntfs_error(vol->mp, "Failed to read buffer of mft record " 166 "0x%llx (error %d).", 167 (unsigned long long)ni->mft_no, err); 168 goto buf_err; 169 } 170 err = buf_map(buf, (caddr_t*)&m); 171 if (err) { 172 ntfs_error(vol->mp, "Failed to map buffer of mft record " 173 "0x%llx (error %d).", 174 (unsigned long long)ni->mft_no, err); 175 goto buf_err; 176 } 177 if (!m) 178 panic("%s(): buf_map() returned NULL.\n", __FUNCTION__); 179 if (ni->m_buf || ni->m) 180 panic("%s(): Mft record 0x%llx is already mapped.\n", 181 __FUNCTION__, (unsigned long long)ni->mft_no); 182 /* Catch multi sector transfer fixup errors. */ 183 if (ntfs_is_mft_record(m->magic)) { 184 if (!mft_is_locked) 185 lck_rw_unlock_shared(&mft_ni->lock); 186 ni->mft_ni = mft_ni; 187 ni->m_buf = buf; 188 ni->m = m; 189 *mrec = m; 190 ntfs_debug("Done."); 191 return 0; 192 } 193 ntfs_error(vol->mp, "Mft record 0x%llx is corrupt. Run chkdsk.", 194 (unsigned long long)ni->mft_no); 195 NVolSetErrors(vol); 196 /* Error, release the buffer. */ 197 err = buf_unmap(buf); 198 if (err) 199 ntfs_error(vol->mp, "Failed to unmap buffer of mft record " 200 "0x%llx (error %d).", 201 (unsigned long long)ni->mft_no, err); 202 err = EIO; 203buf_err: 204 buf_brelse(buf); 205err: 206 /* 207 * Release the iocount reference on the $MFT vnode. We can ignore the 208 * return value as it always is zero. 209 */ 210 if (!mft_is_locked) 211 lck_rw_unlock_shared(&mft_ni->lock); 212 (void)vnode_put(mft_ni->vn); 213 return err; 214} 215 216/** 217 * ntfs_mft_record_unmap - release a mapped mft record 218 * @ni: ntfs inode whose mft record to unmap 219 * 220 * Unmap the buffer containing the mft record. 221 */ 222void ntfs_mft_record_unmap(ntfs_inode *ni) 223{ 224 ntfs_inode *mft_ni; 225 buf_t buf; 226 errno_t err; 227 228 ntfs_debug("Entering for mft_no 0x%llx.", 229 (unsigned long long)ni->mft_no); 230 mft_ni = ni->mft_ni; 231 buf = ni->m_buf; 232 if (!mft_ni || !buf || !ni->m) 233 panic("%s(): Mft record 0x%llx is not mapped.\n", __FUNCTION__, 234 (unsigned long long)ni->mft_no); 235 ni->mft_ni = NULL; 236 ni->m_buf = NULL; 237 ni->m = NULL; 238 err = buf_unmap(buf); 239 if (err) 240 ntfs_error(ni->vol->mp, "Failed to unmap buffer of mft record " 241 "0x%llx (error %d).", 242 (unsigned long long)ni->mft_no, err); 243 if (NInoTestClearMrecNeedsDirtying(ni)) { 244 err = buf_bdwrite(buf); 245 if (err) { 246 ntfs_error(ni->vol->mp, "Failed to write buffer of " 247 "mft record 0x%llx (error %d). Run " 248 "chkdsk.", 249 (unsigned long long)ni->mft_no, err); 250 NVolSetErrors(ni->vol); 251 } 252 } else 253 buf_brelse(buf); 254 /* 255 * Release the iocount reference on the $MFT vnode. We can ignore the 256 * return value as it always is zero. 257 */ 258 (void)vnode_put(mft_ni->vn); 259 ntfs_debug("Done."); 260} 261 262/** 263 * ntfs_extent_mft_record_map_ext - load an extent inode 264 * @base_ni: base ntfs inode 265 * @mref: mft reference of the extent inode to load 266 * @ext_ni: destination pointer for the loaded ntfs inode 267 * @ext_mrec: destination pointer for the mapped mft record 268 * @mft_is_locked: if true the caller holds the mft lock (@mft_ni->lock) 269 * 270 * Load the extent mft record @mref and attach it to its base inode @base_ni. 271 * 272 * On success *@ext_ni contains a pointer to the ntfs inode structure of the 273 * mapped extent inode and *@ext_mrec contains a pointer to the mft record 274 * structure of the mapped extent inode. 275 * 276 * If @mft_is_locked is true the caller holds the mft lock thus 277 * ntfs_extent_mft_record_map_ext() will not try to take the same lock. It is 278 * then the responsibility of the caller that the mft is consistent and stable 279 * for the duration of the call. 280 * 281 * Return 0 on success and errno on error. 282 * 283 * Note: The caller must hold an iocount reference on the vnode of the base 284 * inode. 285 */ 286errno_t ntfs_extent_mft_record_map_ext(ntfs_inode *base_ni, MFT_REF mref, 287 ntfs_inode **ext_ni, MFT_RECORD **ext_mrec, 288 const BOOL mft_is_locked) 289{ 290 ino64_t mft_no; 291 ntfs_inode **extent_nis = NULL; 292 ntfs_inode *ni = NULL; 293 MFT_RECORD *m; 294 errno_t err; 295 unsigned seq_no; 296 int i; 297 BOOL need_reclaim; 298 299 mft_no = MREF(mref); 300 seq_no = MSEQNO(mref); 301 ntfs_debug("Mapping extent mft record 0x%llx (base mft record " 302 "0x%llx).", (unsigned long long)mft_no, 303 (unsigned long long)base_ni->mft_no); 304 /* 305 * Check if this extent inode has already been added to the base inode, 306 * in which case just return it. If not found, add it to the base 307 * inode before returning it. 308 */ 309 lck_mtx_lock(&base_ni->extent_lock); 310 if (base_ni->nr_extents > 0) { 311 extent_nis = base_ni->extent_nis; 312 for (i = 0; i < base_ni->nr_extents; i++) { 313 if (mft_no != extent_nis[i]->mft_no) 314 continue; 315 ni = extent_nis[i]; 316 break; 317 } 318 } 319 if (ni) { 320 lck_mtx_unlock(&base_ni->extent_lock); 321 /* We found the record. Map and return it. */ 322 err = ntfs_mft_record_map_ext(ni, &m, mft_is_locked); 323 if (!err) { 324 /* Verify the sequence number if present. */ 325 if (!seq_no || le16_to_cpu(m->sequence_number) == 326 seq_no) { 327 ntfs_debug("Done 1."); 328 *ext_ni = ni; 329 *ext_mrec = m; 330 return err; 331 } 332 ntfs_mft_record_unmap(ni); 333 ntfs_error(base_ni->vol->mp, "Found stale extent mft " 334 "reference! Corrupt file system. " 335 "Run chkdsk."); 336 return EIO; 337 } 338map_err_out: 339 ntfs_error(base_ni->vol->mp, "Failed to map extent mft " 340 "record (error %d).", (int)err); 341 return err; 342 } 343 /* Record was not there. Get a new ntfs inode and initialize it. */ 344 err = ntfs_extent_inode_get(base_ni, mref, &ni); 345 if (err) { 346 lck_mtx_unlock(&base_ni->extent_lock); 347 return err; 348 } 349 /* Now map the extent mft record. */ 350 err = ntfs_mft_record_map_ext(ni, &m, mft_is_locked); 351 if (err) { 352 lck_mtx_unlock(&base_ni->extent_lock); 353 ntfs_inode_reclaim(ni); 354 goto map_err_out; 355 } 356 need_reclaim = FALSE; 357 /* Verify the sequence number if it is present. */ 358 if (seq_no) { 359 if (le16_to_cpu(m->sequence_number) != seq_no) { 360 ntfs_error(base_ni->vol->mp, "Found stale extent mft " 361 "reference! Corrupt file system. " 362 "Run chkdsk."); 363 need_reclaim = TRUE; 364 err = EIO; 365 goto unm_err_out; 366 } 367 } else { 368 /* 369 * No sequence number was specified by the caller thus set the 370 * sequence number in the ntfs inode to the one in the mft 371 * record. 372 */ 373 ni->seq_no = le16_to_cpu(m->sequence_number); 374 } 375 /* Attach extent inode to base inode, reallocating memory if needed. */ 376 if ((base_ni->nr_extents + 1) * sizeof(ntfs_inode *) > 377 base_ni->extent_alloc) { 378 ntfs_inode **tmp; 379 int new_size; 380 381 new_size = base_ni->extent_alloc + 4 * sizeof(ntfs_inode *); 382 tmp = OSMalloc(new_size, ntfs_malloc_tag); 383 if (!tmp) { 384 ntfs_error(base_ni->vol->mp, "Failed to allocate " 385 "internal buffer."); 386 need_reclaim = TRUE; 387 err = ENOMEM; 388 goto unm_err_out; 389 } 390 if (base_ni->extent_alloc) { 391 if (base_ni->nr_extents > 0) 392 memcpy(tmp, base_ni->extent_nis, 393 base_ni->nr_extents * 394 sizeof(ntfs_inode *)); 395 OSFree(base_ni->extent_nis, base_ni->extent_alloc, 396 ntfs_malloc_tag); 397 } 398 base_ni->extent_alloc = new_size; 399 base_ni->extent_nis = tmp; 400 } 401 base_ni->extent_nis[base_ni->nr_extents++] = ni; 402 lck_mtx_unlock(&base_ni->extent_lock); 403 ntfs_debug("Done 2."); 404 *ext_ni = ni; 405 *ext_mrec = m; 406 return err; 407unm_err_out: 408 ntfs_mft_record_unmap(ni); 409 lck_mtx_unlock(&base_ni->extent_lock); 410 /* 411 * If the extent inode was not attached to the base inode we need to 412 * release it or we will leak memory. 413 */ 414 if (need_reclaim) 415 ntfs_inode_reclaim(ni); 416 return err; 417} 418 419static const char es[] = " Leaving inconsistent metadata. Unmount and run " 420 "chkdsk."; 421 422/** 423 * ntfs_mft_record_sync - synchronize an inode's mft record with that on disk 424 * @ni: ntfs inode whose mft record to synchronize to disk 425 * 426 * If the mft record belonging to the ntfs inode @ni is cached in memory and is 427 * dirty write it out. 428 * 429 * Note this function can only be called for real, base or extent, inodes, i.e. 430 * not for synthetic, attribute or index, inodes. Failure to obey this will 431 * result in a panic. 432 * 433 * Return 0 on success and errno on error. 434 * 435 * Locking: The mft record must not be mapped or a deadlock will occur. 436 */ 437errno_t ntfs_mft_record_sync(ntfs_inode *ni) 438{ 439 ntfs_volume *vol; 440 ntfs_inode *mft_ni; 441 buf_t buf; 442 errno_t err; 443 444 if (NInoAttr(ni)) 445 panic("%s(): Called for attribute inode.\n", __FUNCTION__); 446 ntfs_debug("Entering for mft record of %s inode 0x%llx.", 447 (ni->nr_extents >= 0) ? "base" : "extent", 448 (unsigned long long)ni->mft_no); 449 vol = ni->vol; 450 mft_ni = vol->mft_ni; 451 if (!mft_ni) { 452 ntfs_warning(vol->mp, "$MFT inode is missing from volume."); 453 return ENOTSUP; 454 } 455 /* Get an iocount reference on the $MFT vnode. */ 456 err = vnode_get(mft_ni->vn); 457 if (err) { 458 ntfs_error(vol->mp, "Failed to get vnode for $MFT."); 459 return err; 460 } 461 lck_rw_lock_shared(&mft_ni->lock); 462 /* 463 * Get the buffer if it is cached. If it is not cached then it cannot 464 * be dirty either thus we do not need to write it. 465 */ 466 buf = buf_getblk(mft_ni->vn, ni->mft_no, vol->mft_record_size, 0, 0, 467 BLK_META | BLK_ONLYVALID); 468 lck_rw_unlock_shared(&mft_ni->lock); 469 (void)vnode_put(mft_ni->vn); 470 if (!buf) { 471 ntfs_debug("Mft record 0x%llx is not in cache, nothing to do.", 472 (unsigned long long)ni->mft_no); 473 return 0; 474 } 475 /* The buffer must be the right size. */ 476 if (buf_size(buf) != vol->mft_record_size) 477 panic("%s(): Buffer containing mft record 0x%llx has wrong " 478 "size (0x%x instead of 0x%x).", __FUNCTION__, 479 (unsigned long long)ni->mft_no, 480 buf_size(buf), vol->mft_record_size); 481 /* If the buffer is clean there is nothing to do. */ 482 if (!(buf_flags(buf) & B_DELWRI)) { 483 ntfs_debug("Mft record 0x%llx is in cache but not dirty, " 484 "nothing to do.", 485 (unsigned long long)ni->mft_no); 486 buf_brelse(buf); 487 return 0; 488 } 489 /* The buffer is dirty, write it now. */ 490 err = buf_bwrite(buf); 491 if (!err) 492 ntfs_debug("Done."); 493 else 494 ntfs_error(vol->mp, "Failed to write mft record 0x%llx (error " 495 "%d).", (unsigned long long)ni->mft_no, err); 496 return err; 497} 498 499/** 500 * ntfs_mft_mirror_sync - synchronize an mft record to the mft mirror 501 * @vol: ntfs volume on which the mft record to synchronize resides 502 * @rec_no: mft record number to synchronize 503 * @m: mapped, mst protected (extent) mft record to synchronize 504 * @sync: if true perform synchronous i/o otherwise use async i/o 505 * 506 * Write the mapped, mst protected (extent) mft record number @rec_no with data 507 * @m to the mft mirror ($MFTMirr) of the ntfs volume @vol. 508 * 509 * On success return 0. On error return errno and set the volume errors flag 510 * in the ntfs volume @vol. 511 */ 512errno_t ntfs_mft_mirror_sync(ntfs_volume *vol, const s64 rec_no, 513 const MFT_RECORD *m, const BOOL sync) 514{ 515 s64 data_size; 516 ntfs_inode *mirr_ni; 517 vnode_t mirr_vn; 518 buf_t buf; 519 MFT_RECORD *mirr; 520 errno_t err; 521 522 ntfs_debug("Entering for rec_no 0x%llx.", (unsigned long long)rec_no); 523 mirr_ni = vol->mftmirr_ni; 524 if (!mirr_ni) { 525 /* This could happen during umount... */ 526 ntfs_error(vol->mp, "Umount time mft mirror syncing is not " 527 "implemented yet. %s", ntfs_please_email); 528 return ENOTSUP; 529 } 530 mirr_vn = mirr_ni->vn; 531 /* 532 * Protect against changes in initialized_size and thus against 533 * truncation also. 534 */ 535 lck_rw_lock_shared(&mirr_ni->lock); 536 if (rec_no >= vol->mftmirr_size) 537 panic("%s(): rec_no >= vol->mftmirr_size\n", __FUNCTION__); 538 err = vnode_get(mirr_vn); 539 if (err) { 540 ntfs_error(vol->mp, "Failed to get vnode for mft mirror."); 541 goto err; 542 } 543 lck_spin_lock(&mirr_ni->size_lock); 544 data_size = ubc_getsize(mirr_vn); 545 if (data_size > mirr_ni->data_size) 546 data_size = mirr_ni->data_size; 547 /* Byte offset of the mft record. */ 548 if ((rec_no << vol->mft_record_size_shift) + vol->mft_record_size > 549 mirr_ni->initialized_size) { 550 lck_spin_unlock(&mirr_ni->size_lock); 551 ntfs_error(vol->mp, "Write past the initialized size of mft " 552 "mirror."); 553 err = EIO; 554 goto put; 555 } 556 lck_spin_unlock(&mirr_ni->size_lock); 557 /* 558 * Map the buffer containing the mft mirror record. 559 * 560 * Note we use buf_getblk() as we do not care whether the record is 561 * up-to-date in memory or not as we are about to overwrite it. 562 */ 563 buf = buf_getblk(mirr_vn, rec_no, vol->mft_record_size, 0, 0, BLK_META); 564 if (!buf) 565 panic("%s(): buf_getblk() returned NULL.\n", __FUNCTION__); 566 err = buf_map(buf, (caddr_t*)&mirr); 567 if (err) { 568 ntfs_error(vol->mp, "Failed to map buffer of mft mirror " 569 "record %lld (error %d).", 570 (unsigned long long)rec_no, err); 571 buf_brelse(buf); 572 goto put; 573 } 574 memcpy(mirr, m, vol->mft_record_size); 575 err = buf_unmap(buf); 576 if (err) 577 ntfs_error(vol->mp, "Failed to unmap buffer of mft mirror " 578 "record %lld (error %d).", 579 (unsigned long long)rec_no, err); 580 /* 581 * If the i/o is synchronous use a synchronous write for the mft mirror 582 * as well. If the i/o is asynchronous then do the write 583 * asynchronously. Note we do not use a delayed write because we want 584 * to ensure that the mft mirror will be brought up-to-date as soon as 585 * possible because we are using delayed writes on the mft itself thus 586 * in case of a crash we want to have a valid and up-to-date mft mirror 587 * on disk that we can recover from even when the mft is not valid or 588 * up-to-date. 589 * 590 * FIXME: For maximum performance we could delete the above comment and 591 * change the buf_bawrite() to buf_bdwrite(). 592 */ 593 if (sync) 594 err = buf_bwrite(buf); 595 else 596 err = buf_bawrite(buf); 597 if (err) 598 ntfs_error(vol->mp, "Failed to write buffer of mft mirror " 599 "record %lld (error %d).", 600 (unsigned long long)rec_no, err); 601put: 602 (void)vnode_put(mirr_vn); 603err: 604 lck_rw_unlock_shared(&mirr_ni->lock); 605 if (!err) 606 ntfs_debug("Done."); 607 else { 608 ntfs_error(vol->mp, "Failed to synchronize mft mirror (error " 609 "code %d). Volume will be left marked dirty " 610 "on unmount. Run chkdsk.", err); 611 NVolSetErrors(vol); 612 } 613 return err; 614} 615 616/** 617 * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name 618 * @vol: volume on which to search for a free mft record 619 * @base_ni: open base inode if allocating an extent mft record or NULL 620 * @mft_no: destination in which to return the allocated mft record number 621 * 622 * Search for a free mft record in the mft bitmap attribute on the ntfs volume 623 * @vol and return the allocated mft record number in *@mft_no. 624 * 625 * If @base_ni is NULL start the search at the default allocator position. 626 * 627 * If @base_ni is not NULL start the search at the mft record after the base 628 * mft record @base_ni. 629 * 630 * Return 0 on success and errno on error. An error code of ENOSPC means that 631 * there are no free mft records in the currently initialized mft bitmap. 632 * 633 * Locking: - Caller must hold @vol->mftbmp_lock for writing. 634 * - Caller must hold @vol->mftbmp_ni->lock. 635 */ 636static errno_t ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, 637 ntfs_inode *base_ni, s64 *mft_no) 638{ 639 s64 pass_end, ll, data_pos, pass_start, ofs; 640 ntfs_inode *mftbmp_ni; 641 upl_t upl; 642 upl_page_info_array_t pl; 643 u8 *buf, *byte; 644 unsigned page_ofs, size, bit; 645 u8 pass, b; 646 647 ntfs_debug("Searching for free mft record in the currently " 648 "initialized mft bitmap."); 649 mftbmp_ni = vol->mftbmp_ni; 650 if (!mftbmp_ni) 651 panic("%s: !mftbmp_ni\n", __FUNCTION__); 652 /* 653 * Set the end of the pass making sure we do not overflow the mft 654 * bitmap. 655 */ 656 if (!vol->mft_ni) 657 panic("%s: !mft_ni\n", __FUNCTION__); 658 lck_spin_lock(&vol->mft_ni->size_lock); 659 pass_end = vol->mft_ni->allocated_size >> vol->mft_record_size_shift; 660 lck_spin_unlock(&vol->mft_ni->size_lock); 661 lck_spin_lock(&mftbmp_ni->size_lock); 662 ll = mftbmp_ni->initialized_size << 3; 663 lck_spin_unlock(&mftbmp_ni->size_lock); 664 if (pass_end > ll) 665 pass_end = ll; 666 pass = 1; 667 if (!base_ni) 668 data_pos = vol->mft_data_pos; 669 else 670 data_pos = base_ni->mft_no + 1; 671 if (data_pos < 24) 672 data_pos = 24; 673 if (data_pos >= pass_end) { 674 data_pos = 24; 675 pass = 2; 676 /* This happens on a freshly formatted volume. */ 677 if (data_pos >= pass_end) 678 goto no_space; 679 } 680 pass_start = data_pos; 681 ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, " 682 "pass_end 0x%llx, data_pos 0x%llx.", (unsigned)pass, 683 (unsigned long long)pass_start, 684 (unsigned long long)pass_end, 685 (unsigned long long)data_pos); 686 /* Loop until a free mft record is found. */ 687 do { 688 /* Cap size to pass_end. */ 689 ofs = data_pos >> 3; 690 page_ofs = (unsigned)ofs & PAGE_MASK; 691 size = PAGE_SIZE - page_ofs; 692 ll = ((pass_end + 7) >> 3) - ofs; 693 if (size > ll) 694 size = ll; 695 size <<= 3; 696 /* 697 * If we are still within the active pass, search the next page 698 * for a zero bit. 699 */ 700 if (size) { 701 errno_t err; 702 703 err = ntfs_page_map(mftbmp_ni, ofs & ~PAGE_MASK_64, 704 &upl, &pl, &buf, TRUE); 705 if (err) { 706 ntfs_error(vol->mp, "Failed to read mft " 707 "bitmap, aborting."); 708 return err; 709 } 710 buf += page_ofs; 711 bit = (unsigned)data_pos & 7; 712 data_pos &= ~7ULL; 713 ntfs_debug("Before inner for loop: size 0x%x, " 714 "data_pos 0x%llx, bit 0x%x", size, 715 (unsigned long long)data_pos, bit); 716 for (; bit < size && data_pos + bit < pass_end; 717 bit &= ~7, bit += 8) { 718 byte = buf + (bit >> 3); 719 if (*byte == 0xff) 720 continue; 721 /* 722 * TODO: There does not appear to be a ffz() 723 * function in the kernel. )-: If/when the 724 * kernel has an ffz() function, switch the 725 * below code to use it. 726 * 727 * So emulate "ffz(x)" using "ffs(~x) - 1" 728 * which gives the same result but incurs extra 729 * CPU overhead. 730 */ 731 b = ffs(~(unsigned long)*byte) - 1; 732 if (b < 8 && b >= (bit & 7)) { 733 ll = data_pos + (bit & ~7) + b; 734 if (ll > (1LL << 32)) { 735 ntfs_page_unmap(mftbmp_ni, 736 upl, pl, FALSE); 737 goto no_space; 738 } 739 *byte |= 1 << b; 740 ntfs_page_unmap(mftbmp_ni, upl, pl, 741 TRUE); 742 ntfs_debug("Done. (Found and " 743 "allocated mft record " 744 "0x%llx.)", 745 (unsigned long long)ll); 746 *mft_no = ll; 747 return 0; 748 } 749 } 750 ntfs_debug("After inner for loop: size 0x%x, " 751 "data_pos 0x%llx, bit 0x%x", size, 752 (unsigned long long)data_pos, bit); 753 data_pos += size; 754 ntfs_page_unmap(mftbmp_ni, upl, pl, FALSE); 755 /* 756 * If the end of the pass has not been reached yet, 757 * continue searching the mft bitmap for a zero bit. 758 */ 759 continue; 760 } 761 /* If we just did the second pass we are done. */ 762 if (pass >= 2) 763 break; 764 /* 765 * Do the second pass, in which we scan the first part of the 766 * zone which we omitted earlier. 767 */ 768 pass++; 769 pass_end = pass_start; 770 data_pos = pass_start = 24; 771 ntfs_debug("pass %u, pass_start 0x%llx, pass_end 0x%llx.", 772 (unsigned)pass, (unsigned long long)pass_start, 773 (unsigned long long)pass_end); 774 /* 775 * If the end of the pass has not been reached yet, continue 776 * searching the mft bitmap for a zero bit. 777 */ 778 } while (data_pos < pass_end); 779no_space: 780 ntfs_debug("Done. (No free mft records left in currently initialized " 781 "mft bitmap.)"); 782 return ENOSPC; 783} 784 785/** 786 * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster 787 * @vol: volume on which to extend the mft bitmap attribute 788 * 789 * Extend the mft bitmap attribute allocation on the ntfs volume @vol by one 790 * cluster. 791 * 792 * Note: Only changes allocated_size, i.e. does not touch initialized_size or 793 * data_size. 794 * 795 * Return 0 on success and errno on error. 796 * 797 * Locking: - Caller must hold @vol->mftbmp_lock for writing. 798 * - Caller must hold @vol->mftbmp_ni->lock for writing. 799 * - This function takes @vol->mftbmp_ni->rl.lock for writing and 800 * releases it before returning. 801 * - This function takes @vol->lcnbmp_lock for writing and releases it 802 * before returning. 803 */ 804static errno_t ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) 805{ 806 VCN vcn, lowest_vcn = 0; 807 LCN lcn; 808 s64 allocated_size, ll; 809 ntfs_inode *mft_ni, *mftbmp_ni, *lcnbmp_ni; 810 ntfs_rl_element *rl; 811 upl_t upl; 812 upl_page_info_array_t pl; 813 u8 *kaddr, *b; 814 MFT_RECORD *m; 815 ntfs_attr_search_ctx *ctx; 816 ATTR_RECORD *a; 817 unsigned mp_size, attr_len = 0; 818 errno_t err, err2; 819 BOOL mp_rebuilt = FALSE; 820 u8 tb; 821 822 ntfs_debug("Extending mft bitmap allocation."); 823 mft_ni = vol->mft_ni; 824 mftbmp_ni = vol->mftbmp_ni; 825 lcnbmp_ni = vol->lcnbmp_ni; 826 /* 827 * Determine the last lcn of the mft bitmap. The allocated size of the 828 * mft bitmap cannot be zero so we are ok to not check for it being 829 * zero first. 830 */ 831 lck_rw_lock_exclusive(&mftbmp_ni->rl.lock); 832 lck_spin_lock(&mftbmp_ni->size_lock); 833 allocated_size = mftbmp_ni->allocated_size; 834 lck_spin_unlock(&mftbmp_ni->size_lock); 835 vcn = (allocated_size - 1) >> vol->cluster_size_shift; 836 err = ntfs_attr_find_vcn_nolock(mftbmp_ni, vcn, &rl, NULL); 837 if (err || !rl || !rl->length || rl->lcn < 0 || rl[1].length || 838 rl[1].vcn != vcn + 1) { 839 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 840 ntfs_error(vol->mp, "Failed to determine last allocated " 841 "cluster of mft bitmap attribute."); 842 if (!err) 843 err = EIO; 844 return err; 845 } 846 lcn = rl->lcn + rl->length; 847 ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", 848 (unsigned long long)lcn); 849 lck_rw_lock_exclusive(&vol->lcnbmp_lock); 850 err = vnode_get(lcnbmp_ni->vn); 851 if (err) { 852 ntfs_error(vol->mp, "Failed to get vnode for $Bitmap."); 853 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 854 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 855 return err; 856 } 857 lck_rw_lock_shared(&lcnbmp_ni->lock); 858 /* 859 * Attempt to get the cluster following the last allocated cluster by 860 * hand as it may be in the MFT zone so the allocator would not give it 861 * to us. 862 */ 863 ll = lcn >> 3; 864 err = ntfs_page_map(lcnbmp_ni, ll & ~PAGE_MASK_64, &upl, &pl, &kaddr, 865 TRUE); 866 if (err) { 867 lck_rw_unlock_shared(&lcnbmp_ni->lock); 868 (void)vnode_put(lcnbmp_ni->vn); 869 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 870 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 871 ntfs_error(vol->mp, "Failed to read from lcn bitmap."); 872 return err; 873 } 874 b = kaddr + ((unsigned)ll & PAGE_MASK); 875 tb = 1 << ((unsigned)lcn & 7); 876 if (*b != 0xff && !(*b & tb)) { 877 /* Next cluster is free, allocate it. */ 878 *b |= tb; 879 vol->nr_free_clusters--; 880 if (vol->nr_free_clusters < 0) 881 vol->nr_free_clusters = 0; 882 ntfs_page_unmap(lcnbmp_ni, upl, pl, TRUE); 883 lck_rw_unlock_shared(&lcnbmp_ni->lock); 884 (void)vnode_put(lcnbmp_ni->vn); 885 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 886 /* Update the mft bitmap runlist. */ 887 rl->length++; 888 rl[1].vcn++; 889 ntfs_debug("Appending one cluster to mft bitmap."); 890 } else { 891 ntfs_runlist runlist; 892 893 ntfs_page_unmap(lcnbmp_ni, upl, pl, FALSE); 894 lck_rw_unlock_shared(&lcnbmp_ni->lock); 895 (void)vnode_put(lcnbmp_ni->vn); 896 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 897 /* Allocate a cluster from the DATA_ZONE. */ 898 runlist.rl = NULL; 899 runlist.alloc = runlist.elements = 0; 900 err = ntfs_cluster_alloc(vol, vcn + 1, 1, lcn, DATA_ZONE, 901 TRUE, &runlist); 902 if (err) { 903 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 904 ntfs_error(vol->mp, "Failed to allocate a cluster for " 905 "the mft bitmap."); 906 if (err != ENOMEM && err != ENOSPC) 907 err = EIO; 908 return err; 909 } 910 err = ntfs_rl_merge(&mftbmp_ni->rl, &runlist); 911 if (err) { 912 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 913 ntfs_error(vol->mp, "Failed to merge runlists for mft " 914 "bitmap."); 915 if (err != ENOMEM) 916 err = EIO; 917 err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, 918 -1, NULL); 919 if (err2) { 920 ntfs_error(vol->mp, "Failed to release " 921 "allocated cluster (error " 922 "%d).%s", err2, es); 923 NVolSetErrors(vol); 924 } 925 OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag); 926 return err; 927 } 928 ntfs_debug("Adding one run to mft bitmap."); 929 } 930 /* Update the attribute record as well. */ 931 err = ntfs_mft_record_map(mft_ni, &m); 932 if (err) { 933 ntfs_error(vol->mp, "Failed to map mft record."); 934 m = NULL; 935 ctx = NULL; 936 goto undo_alloc; 937 } 938 ctx = ntfs_attr_search_ctx_get(mft_ni, m); 939 if (!ctx) { 940 ntfs_error(vol->mp, "Failed to get search context."); 941 err = ENOMEM; 942 goto undo_alloc; 943 } 944 err = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 945 mftbmp_ni->name_len, vcn, NULL, 0, ctx); 946 if (err) { 947 ntfs_error(vol->mp, "Failed to find last attribute extent of " 948 "mft bitmap attribute."); 949 if (err == ENOENT) 950 err = EIO; 951 goto undo_alloc; 952 } 953 m = ctx->m; 954 a = ctx->a; 955 /* Find the runlist element with which the attribute extent starts. */ 956 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 957 rl = ntfs_rl_find_vcn_nolock(mftbmp_ni->rl.rl, lowest_vcn); 958 if (!rl) 959 panic("%s(): !rl\n", __FUNCTION__); 960 if (!rl->length) 961 panic("%s(): !rl->length\n", __FUNCTION__); 962 if (rl->lcn < LCN_HOLE) 963 panic("%s(): rl->lcn < LCN_HOLE\n", __FUNCTION__); 964 /* Get the size for the new mapping pairs array for this extent. */ 965 err = ntfs_get_size_for_mapping_pairs(vol, rl, lowest_vcn, -1, 966 &mp_size); 967 if (err) { 968 ntfs_error(vol->mp, "Get size for mapping pairs failed for " 969 "mft bitmap attribute extent."); 970 goto undo_alloc; 971 } 972 /* Extend the attribute record to fit the bigger mapping pairs array. */ 973 attr_len = le32_to_cpu(a->length); 974 err = ntfs_attr_record_resize(m, a, mp_size + 975 le16_to_cpu(a->mapping_pairs_offset)); 976 if (err) { 977 if (err != ENOSPC) { 978 ntfs_error(vol->mp, "Failed to resize attribute " 979 "record for mft bitmap attribute."); 980 goto undo_alloc; 981 } 982 // TODO: Deal with this by moving this extent to a new mft 983 // record or by starting a new extent in a new mft record or by 984 // moving other attributes out of this mft record. 985 // Note: It will need to be a special mft record and if none of 986 // those are available it gets rather complicated... 987 ntfs_error(vol->mp, "Not enough space in this mft record to " 988 "accomodate extended mft bitmap attribute " 989 "extent. Cannot handle this yet."); 990 err = ENOTSUP; 991 goto undo_alloc; 992 } 993 mp_rebuilt = TRUE; 994 /* Generate the mapping pairs array directly into the attr record. */ 995 err = ntfs_mapping_pairs_build(vol, (s8*)a + 996 le16_to_cpu(a->mapping_pairs_offset), mp_size, rl, 997 lowest_vcn, -1, NULL); 998 if (err) { 999 ntfs_error(vol->mp, "Failed to build mapping pairs array for " 1000 "mft bitmap attribute (error %d).", err); 1001 err = EIO; 1002 goto dirty_undo_alloc; 1003 } 1004 /* Update the highest_vcn. */ 1005 a->highest_vcn = cpu_to_sle64(vcn + 1); 1006 /* 1007 * We now have extended the mft bitmap allocated_size by one cluster. 1008 * Reflect this in the ntfs_inode structure and the attribute record. 1009 */ 1010 if (a->lowest_vcn) { 1011 /* 1012 * We are not in the first attribute extent, switch to it, but 1013 * first ensure the changes will make it to disk later. 1014 */ 1015 NInoSetMrecNeedsDirtying(ctx->ni); 1016 ntfs_attr_search_ctx_reinit(ctx); 1017 err = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1018 mftbmp_ni->name_len, 0, NULL, 0, ctx); 1019 if (err) 1020 goto restore_undo_alloc; 1021 /* @m is not used any more so no need to set it. */ 1022 a = ctx->a; 1023 } 1024 lck_spin_lock(&mftbmp_ni->size_lock); 1025 mftbmp_ni->allocated_size += vol->cluster_size; 1026 a->allocated_size = cpu_to_sle64(mftbmp_ni->allocated_size); 1027 lck_spin_unlock(&mftbmp_ni->size_lock); 1028 /* Ensure the changes make it to disk. */ 1029 NInoSetMrecNeedsDirtying(ctx->ni); 1030 ntfs_attr_search_ctx_put(ctx); 1031 ntfs_mft_record_unmap(mft_ni); 1032 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 1033 ntfs_debug("Done."); 1034 return 0; 1035restore_undo_alloc: 1036 ntfs_error(vol->mp, "Failed to find first attribute extent of mft " 1037 "bitmap attribute."); 1038 if (err == ENOENT) 1039 err = EIO; 1040 ntfs_attr_search_ctx_reinit(ctx); 1041 err2 = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1042 mftbmp_ni->name_len, vcn, NULL, 0, ctx); 1043 if (err2) { 1044 ntfs_error(vol->mp, "Failed to find last attribute extent of " 1045 "mft bitmap attribute (error %d).%s", err2, es); 1046 lck_spin_lock(&mftbmp_ni->size_lock); 1047 mftbmp_ni->allocated_size += vol->cluster_size; 1048 lck_spin_unlock(&mftbmp_ni->size_lock); 1049 ntfs_attr_search_ctx_put(ctx); 1050 ntfs_mft_record_unmap(mft_ni); 1051 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 1052 /* 1053 * The only thing that is now wrong is the allocated size of the 1054 * base attribute extent which chkdsk should be able to fix. 1055 */ 1056 NVolSetErrors(vol); 1057 return err; 1058 } 1059 ctx->a->highest_vcn = cpu_to_sle64(vcn); 1060dirty_undo_alloc: 1061 /* 1062 * Need to mark the mft record for dirtying because ntfs_cluster_free() 1063 * may drop the mft record on the floor otherwise. 1064 */ 1065 NInoSetMrecNeedsDirtying(ctx->ni); 1066undo_alloc: 1067 err2 = ntfs_cluster_free(mftbmp_ni, vcn + 1, -1, ctx, NULL); 1068 if (err2 || ctx->is_error) { 1069 ntfs_error(vol->mp, "Failed to release allocated cluster in " 1070 "error code path (error %d).%s", 1071 ctx->is_error ? ctx->error : err2, es); 1072 NVolSetErrors(vol); 1073 } 1074 /* 1075 * If the runlist truncation fails and/or the search context is no 1076 * longer valid, we cannot resize the attribute record or build the 1077 * mapping pairs array thus we mark the volume dirty and tell the user 1078 * to run chkdsk. 1079 */ 1080 err2 = ntfs_rl_truncate_nolock(vol, &mftbmp_ni->rl, vcn + 1); 1081 if (err2) { 1082 ntfs_error(vol->mp, "Failed to truncate attribute runlist s " 1083 "in error code path (error %d).%s", err2, es); 1084 NVolSetErrors(vol); 1085 } else if (mp_rebuilt) { 1086 a = ctx->a; 1087 err2 = ntfs_attr_record_resize(ctx->m, a, attr_len); 1088 if (err2) { 1089 ntfs_error(vol->mp, "Failed to restore attribute " 1090 "record in error code path (error " 1091 "%d).%s", err2, es); 1092 NVolSetErrors(vol); 1093 } else /* if (!err2) */ { 1094 u16 mp_ofs = le16_to_cpu(a->mapping_pairs_offset); 1095 err2 = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 1096 attr_len - mp_ofs, mftbmp_ni->rl.rl, 1097 lowest_vcn, -1, NULL); 1098 if (err2) { 1099 ntfs_error(vol->mp, "Failed to restore " 1100 "mapping pairs array in error " 1101 "code path (error %d).%s", 1102 err2, es); 1103 NVolSetErrors(vol); 1104 } 1105 NInoSetMrecNeedsDirtying(ctx->ni); 1106 } 1107 } 1108 if (ctx) 1109 ntfs_attr_search_ctx_put(ctx); 1110 if (m) 1111 ntfs_mft_record_unmap(mft_ni); 1112 lck_rw_unlock_exclusive(&mftbmp_ni->rl.lock); 1113 return err; 1114} 1115 1116/** 1117 * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data 1118 * @vol: volume on which to extend the mft bitmap attribute 1119 * 1120 * Extend the initialized portion of the mft bitmap attribute on the ntfs 1121 * volume @vol by 8 bytes. 1122 * 1123 * Note: Only changes initialized_size and data_size, i.e. requires that 1124 * allocated_size is big enough to fit the new initialized_size. 1125 * 1126 * Return 0 on success and error on error. 1127 * 1128 * Locking: - Caller must hold @vol->mftbmp_lock for writing. 1129 * - Caller must hold @vol->mftbmp_ni->lock for writing. 1130 */ 1131static errno_t ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) 1132{ 1133 s64 old_data_size, old_initialized_size; 1134 ntfs_inode *mft_ni, *mftbmp_ni; 1135 MFT_RECORD *m; 1136 ntfs_attr_search_ctx *ctx; 1137 ATTR_RECORD *a; 1138 errno_t err, err2; 1139 1140 ntfs_debug("Extending mft bitmap initiailized (and data) size."); 1141 mft_ni = vol->mft_ni; 1142 mftbmp_ni = vol->mftbmp_ni; 1143 /* Get the attribute record. */ 1144 err = ntfs_mft_record_map(mft_ni, &m); 1145 if (err) { 1146 ntfs_error(vol->mp, "Failed to map mft record."); 1147 return err; 1148 } 1149 ctx = ntfs_attr_search_ctx_get(mft_ni, m); 1150 if (!ctx) { 1151 ntfs_error(vol->mp, "Failed to get search context."); 1152 err = ENOMEM; 1153 goto unm_err; 1154 } 1155 err = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1156 mftbmp_ni->name_len, 0, NULL, 0, ctx); 1157 if (err) { 1158 ntfs_error(vol->mp, "Failed to find first attribute extent of " 1159 "mft bitmap attribute."); 1160 if (err == ENOENT) 1161 err = EIO; 1162 goto put_err; 1163 } 1164 a = ctx->a; 1165 lck_spin_lock(&mftbmp_ni->size_lock); 1166 old_data_size = mftbmp_ni->data_size; 1167 old_initialized_size = mftbmp_ni->initialized_size; 1168 /* 1169 * We can simply update the initialized_size before filling the space 1170 * with zeroes because the caller is holding the mft bitmap lock for 1171 * writing which ensures that no one else is trying to access the data. 1172 */ 1173 mftbmp_ni->initialized_size += 8; 1174 a->initialized_size = cpu_to_sle64(mftbmp_ni->initialized_size); 1175 if (mftbmp_ni->initialized_size > old_data_size) { 1176 const s64 init_size = mftbmp_ni->initialized_size; 1177 mftbmp_ni->data_size = init_size; 1178 a->data_size = cpu_to_sle64(init_size); 1179 lck_spin_unlock(&mftbmp_ni->size_lock); 1180 if (!ubc_setsize(mftbmp_ni->vn, init_size)) 1181 panic("%s(): !ubc_setsize(mftbmp_ni->vn, init_size)\n", 1182 __FUNCTION__); 1183 } else 1184 lck_spin_unlock(&mftbmp_ni->size_lock); 1185 /* Ensure the changes make it to disk. */ 1186 NInoSetMrecNeedsDirtying(ctx->ni); 1187 ntfs_attr_search_ctx_put(ctx); 1188 ntfs_mft_record_unmap(mft_ni); 1189 /* Initialize the mft bitmap attribute value with zeroes. */ 1190 err = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); 1191 if (!err) { 1192 ntfs_debug("Done. (Wrote eight initialized bytes to mft " 1193 "bitmap."); 1194 return 0; 1195 } 1196 ntfs_error(vol->mp, "Failed to write to mft bitmap."); 1197 /* Try to recover from the error. */ 1198 err2 = ntfs_mft_record_map(mft_ni, &m); 1199 if (err2) { 1200 ntfs_error(vol->mp, "Failed to map mft record in error code " 1201 "path (error %d).%s", err2, es); 1202 NVolSetErrors(vol); 1203 return err; 1204 } 1205 ctx = ntfs_attr_search_ctx_get(mft_ni, m); 1206 if (!ctx) { 1207 ntfs_error(vol->mp, "Failed to get search context.%s", es); 1208 NVolSetErrors(vol); 1209 goto unm_err; 1210 } 1211 err2 = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, 1212 mftbmp_ni->name_len, 0, NULL, 0, ctx); 1213 if (err2) { 1214 ntfs_error(vol->mp, "Failed to find first attribute extent of " 1215 "mft bitmap attribute in error code path " 1216 "(error %d).%s", err2, es); 1217 NVolSetErrors(vol); 1218 goto put_err; 1219 } 1220 a = ctx->a; 1221 lck_spin_lock(&mftbmp_ni->size_lock); 1222 mftbmp_ni->initialized_size = old_initialized_size; 1223 a->initialized_size = cpu_to_sle64(old_initialized_size); 1224 if (ubc_getsize(mftbmp_ni->vn) != old_data_size) { 1225 mftbmp_ni->data_size = old_data_size; 1226 a->data_size = cpu_to_sle64(old_data_size); 1227 lck_spin_unlock(&mftbmp_ni->size_lock); 1228 if (!ubc_setsize(mftbmp_ni->vn, old_data_size)) 1229 ntfs_error(vol->mp, "Failed to restore UBC size. " 1230 "Leaving UBC size out of sync with " 1231 "attribute data size."); 1232 } else 1233 lck_spin_unlock(&mftbmp_ni->size_lock); 1234 NInoSetMrecNeedsDirtying(ctx->ni); 1235#ifdef DEBUG 1236 lck_spin_lock(&mftbmp_ni->size_lock); 1237 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " 1238 "data_size 0x%llx, initialized_size 0x%llx.", 1239 (unsigned long long)mftbmp_ni->allocated_size, 1240 (unsigned long long)mftbmp_ni->data_size, 1241 (unsigned long long)mftbmp_ni->initialized_size); 1242 lck_spin_unlock(&mftbmp_ni->size_lock); 1243#endif /* DEBUG */ 1244put_err: 1245 ntfs_attr_search_ctx_put(ctx); 1246unm_err: 1247 ntfs_mft_record_unmap(mft_ni); 1248 return err; 1249} 1250 1251/** 1252 * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute 1253 * @vol: volume on which to extend the mft data attribute 1254 * 1255 * Extend the mft data attribute on the ntfs volume @vol by 16 mft records 1256 * worth of clusters or if not enough space for this by one mft record worth 1257 * of clusters. 1258 * 1259 * Note: Only changes allocated_size, i.e. does not touch initialized_size or 1260 * data_size. 1261 * 1262 * Return 0 on success and errno on error. 1263 * 1264 * Locking: - Caller must hold @vol->mftbmp_lock for writing. 1265 * - Caller must hold @vol->mft_ni->lock for writing. 1266 * - This function takes @vol->mft_ni->rl.lock for writing and 1267 * releases it before returning. 1268 * - This function calls functions which take @vol->lcnbmp_lock for 1269 * writing and release it before returning. 1270 */ 1271static errno_t ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) 1272{ 1273 VCN vcn, lowest_vcn = 0; 1274 LCN lcn; 1275 s64 allocated_size, min_nr, nr; 1276 ntfs_inode *mft_ni; 1277 ntfs_rl_element *rl; 1278 MFT_RECORD *m; 1279 ntfs_attr_search_ctx *ctx; 1280 ATTR_RECORD *a; 1281 unsigned mp_size, attr_len = 0; 1282 errno_t err, err2; 1283 BOOL mp_rebuilt = FALSE; 1284 ntfs_runlist runlist; 1285 1286 ntfs_debug("Extending mft data allocation."); 1287 mft_ni = vol->mft_ni; 1288 lck_spin_lock(&mft_ni->size_lock); 1289 allocated_size = mft_ni->allocated_size; 1290 lck_spin_unlock(&mft_ni->size_lock); 1291 vcn = (allocated_size - 1) >> vol->cluster_size_shift; 1292 /* 1293 * Determine the preferred allocation location, i.e. the last lcn of 1294 * the mft data attribute. 1295 */ 1296 lck_rw_lock_exclusive(&mft_ni->rl.lock); 1297 if (mft_ni->rl.elements > 1) 1298 rl = &mft_ni->rl.rl[mft_ni->rl.elements - 2]; 1299 else 1300 rl = mft_ni->rl.rl; 1301 if (!rl || !rl->length || rl->lcn < 0 || rl[1].length || 1302 rl[1].vcn != vcn + 1) { 1303 ntfs_error(vol->mp, "Failed to determine last allocated " 1304 "cluster of mft data attribute."); 1305 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1306 return EIO; 1307 } 1308 lcn = rl->lcn + rl->length; 1309 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", 1310 (unsigned long long)lcn); 1311 /* Minimum allocation is one mft record worth of clusters. */ 1312 min_nr = vol->mft_record_size >> vol->cluster_size_shift; 1313 if (!min_nr) 1314 min_nr = 1; 1315 /* Want to allocate 16 mft records worth of clusters. */ 1316 nr = (vol->mft_record_size * 16) / vol->cluster_size; 1317 if (!nr) 1318 nr = min_nr; 1319 /* 1320 * To be in line with what Windows allows we restrict the total number 1321 * of mft records to 2^32. 1322 */ 1323 if ((allocated_size + (nr << vol->cluster_size_shift)) >> 1324 vol->mft_record_size_shift >= (1LL << 32)) { 1325 nr = min_nr; 1326 if ((allocated_size + (nr << vol->cluster_size_shift)) >> 1327 vol->mft_record_size_shift >= (1LL << 32)) { 1328 ntfs_warning(vol->mp, "Cannot allocate mft record " 1329 "because the maximum number of inodes " 1330 "(2^32) has already been reached."); 1331 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1332 return ENOSPC; 1333 } 1334 } 1335 ntfs_debug("Trying mft data allocation with %s cluster count %lld.", 1336 nr > min_nr ? "default" : "minimal", (long long)nr); 1337 do { 1338 runlist.rl = NULL; 1339 runlist.alloc = runlist.elements = 0; 1340 /* 1341 * We have taken the mft lock for writing. This is not a 1342 * problem as ntfs_cluster_alloc() only needs to access pages 1343 * from the cluster bitmap (vol->lcnbmp_ni) and we have mapped 1344 * the whole runlist for the cluster bitmap at mount time thus 1345 * ntfs_page_map() will never need to map an mft record and 1346 * hence will never need to take the mft lock. 1347 */ 1348 err = ntfs_cluster_alloc(vol, vcn + 1, nr, lcn, MFT_ZONE, 1349 TRUE, &runlist); 1350 if (!err) 1351 break; 1352 if (err != ENOSPC || nr == min_nr) { 1353 if (err != ENOMEM && err != ENOSPC) 1354 err = EIO; 1355 ntfs_error(vol->mp, "Failed to allocate the minimal " 1356 "number of clusters (%lld) for the " 1357 "mft data attribute.", (long long)nr); 1358 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1359 return err; 1360 } 1361 /* 1362 * There is not enough space to do the allocation, but there 1363 * might be enough space to do a minimal allocation so try that 1364 * before failing. 1365 */ 1366 nr = min_nr; 1367 ntfs_debug("Retrying mft data allocation with minimal cluster " 1368 "count %lld.", (long long)nr); 1369 } while (1); 1370 /* 1371 * Merge the existing runlist with the new one describing the allocated 1372 * clusters. 1373 */ 1374 err = ntfs_rl_merge(&mft_ni->rl, &runlist); 1375 if (err) { 1376 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1377 ntfs_error(vol->mp, "Failed to merge runlists for mft data " 1378 "attribute."); 1379 if (err != ENOMEM) 1380 err = EIO; 1381 err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, -1, NULL); 1382 if (err2) { 1383 ntfs_error(vol->mp, "Failed to release allocated " 1384 "cluster(s) (error %d).%s", err2, es); 1385 NVolSetErrors(vol); 1386 } 1387 OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag); 1388 return err; 1389 } 1390 ntfs_debug("Allocated %lld clusters.", (long long)nr); 1391 lck_spin_lock(&mft_ni->size_lock); 1392 mft_ni->allocated_size += nr << vol->cluster_size_shift; 1393 lck_spin_unlock(&mft_ni->size_lock); 1394 /* 1395 * We now have to drop the runlist lock again or we can deadlock with 1396 * the below mapping of the mft record belonging to $MFT. 1397 * 1398 * Again as explained above the mft cannot change under us so we leave 1399 * the runlist unlocked. 1400 */ 1401 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1402 /* 1403 * Update the attribute record as well. 1404 * 1405 * When mapping the mft record for the mft we communicate the fact that 1406 * we hold the lock on the mft inode @mft_ni->lock for writing so it 1407 * does not try to take the lock. 1408 */ 1409 err = ntfs_mft_record_map_ext(mft_ni, &m, TRUE); 1410 if (err) { 1411 ntfs_error(vol->mp, "Failed to map mft record."); 1412 m = NULL; 1413 ctx = NULL; 1414 goto undo_alloc; 1415 } 1416 ctx = ntfs_attr_search_ctx_get(mft_ni, m); 1417 if (!ctx) { 1418 ntfs_error(vol->mp, "Failed to get search context."); 1419 err = ENOMEM; 1420 goto undo_alloc; 1421 } 1422 /* 1423 * We have the mft lock taken for write. Communicate this fact to 1424 * ntfs_attr_lookup() and hence to ntfs_extent_mft_record_map_ext() and 1425 * ntfs_mft_record_map_ext() so that they know not to try to take the 1426 * same lock. 1427 */ 1428 ctx->is_mft_locked = 1; 1429 err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 1430 vcn, NULL, 0, ctx); 1431 if (err) { 1432 ntfs_error(vol->mp, "Failed to find last attribute extent of " 1433 "mft data attribute."); 1434 if (err == ENOENT) 1435 err = EIO; 1436 goto undo_alloc; 1437 } 1438 m = ctx->m; 1439 a = ctx->a; 1440 /* Find the runlist element with which the attribute extent starts. */ 1441 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 1442 rl = ntfs_rl_find_vcn_nolock(mft_ni->rl.rl, lowest_vcn); 1443 if (!rl) 1444 panic("%s(): !rl\n", __FUNCTION__); 1445 if (!rl->length) 1446 panic("%s(): !rl->length\n", __FUNCTION__); 1447 if (rl->lcn < LCN_HOLE) 1448 panic("%s(): rl->lcn < LCN_HOLE\n", __FUNCTION__); 1449 /* Get the size for the new mapping pairs array for this extent. */ 1450 err = ntfs_get_size_for_mapping_pairs(vol, rl, lowest_vcn, -1, 1451 &mp_size); 1452 if (err) { 1453 ntfs_error(vol->mp, "Get size for mapping pairs failed for " 1454 "mft data attribute extent."); 1455 goto undo_alloc; 1456 } 1457 /* Extend the attribute record to fit the bigger mapping pairs array. */ 1458 attr_len = (int)le32_to_cpu(a->length); 1459 err = ntfs_attr_record_resize(m, a, mp_size + 1460 le16_to_cpu(a->mapping_pairs_offset)); 1461 if (err) { 1462 if (err != ENOSPC) { 1463 ntfs_error(vol->mp, "Failed to resize attribute " 1464 "record for mft data attribute."); 1465 goto undo_alloc; 1466 } 1467 // TODO: Deal with this by moving this extent to a new mft 1468 // record or by starting a new extent in a new mft record or by 1469 // moving other attributes out of this mft record. 1470 // Note: Use the special reserved mft records and ensure that 1471 // this extent is not required to find the mft record in 1472 // question. If no free special records left we would need to 1473 // move an existing record away, insert ours in its place, and 1474 // then place the moved record into the newly allocated space 1475 // and we would then need to update all references to this mft 1476 // record appropriately. This is rather complicated... 1477 ntfs_error(vol->mp, "Not enough space in this mft record to " 1478 "accomodate extended mft data attribute " 1479 "extent. Cannot handle this yet."); 1480 err = ENOTSUP; 1481 goto undo_alloc; 1482 } 1483 mp_rebuilt = TRUE; 1484 /* Generate the mapping pairs array directly into the attr record. */ 1485 err = ntfs_mapping_pairs_build(vol, (s8*)a + 1486 le16_to_cpu(a->mapping_pairs_offset), mp_size, rl, 1487 lowest_vcn, -1, NULL); 1488 if (err) { 1489 ntfs_error(vol->mp, "Failed to build mapping pairs array of " 1490 "mft data attribute (error %d).", err); 1491 err = EIO; 1492 goto dirty_undo_alloc; 1493 } 1494 /* Update the highest_vcn. */ 1495 a->highest_vcn = cpu_to_sle64(vcn + nr); 1496 /* 1497 * We now have extended the mft data allocated_size by @nr clusters. 1498 * Reflect this in the ntfs_inode structure and the attribute record. 1499 */ 1500 if (a->lowest_vcn) { 1501 /* 1502 * We are not in the first attribute extent, switch to it, but 1503 * first ensure the changes will make it to disk later. 1504 */ 1505 NInoSetMrecNeedsDirtying(ctx->ni); 1506 /* 1507 * The reinitialization will preserve the is_mft_locked flag in 1508 * the search context thus we do not need to set it again. 1509 */ 1510 ntfs_attr_search_ctx_reinit(ctx); 1511 err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, 1512 mft_ni->name_len, 0, NULL, 0, ctx); 1513 if (err) 1514 goto restore_undo_alloc; 1515 /* @m is not used any more so no need to set it. */ 1516 a = ctx->a; 1517 } 1518 a->allocated_size = cpu_to_sle64(mft_ni->allocated_size); 1519 /* Ensure the changes make it to disk. */ 1520 NInoSetMrecNeedsDirtying(ctx->ni); 1521 ntfs_attr_search_ctx_put(ctx); 1522 ntfs_mft_record_unmap(mft_ni); 1523 /* 1524 * We have modified the size of the base inode, cause the sizes to be 1525 * written to all the directory index entries pointing to the base 1526 * inode when the inode is written to disk. 1527 */ 1528 NInoSetDirtySizes(mft_ni); 1529 ntfs_debug("Done."); 1530 return 0; 1531restore_undo_alloc: 1532 ntfs_error(vol->mp, "Failed to find first attribute extent of mft " 1533 "data attribute."); 1534 if (err == ENOENT) 1535 err = EIO; 1536 /* 1537 * The reinitialization will preserve the is_mft_locked flag in the 1538 * search context thus we do not need to set it again. 1539 */ 1540 ntfs_attr_search_ctx_reinit(ctx); 1541 err2 = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 1542 vcn, NULL, 0, ctx); 1543 if (err2) { 1544 ntfs_error(vol->mp, "Failed to find last attribute extent of " 1545 "mft data attribute (error %d).%s", err2, es); 1546 ntfs_attr_search_ctx_put(ctx); 1547 ntfs_mft_record_unmap(mft_ni); 1548 /* 1549 * The only thing that is now wrong is the allocated size of the 1550 * base attribute extent which chkdsk should be able to fix. 1551 */ 1552 NVolSetErrors(vol); 1553 return err; 1554 } 1555 ctx->a->highest_vcn = cpu_to_sle64(vcn); 1556dirty_undo_alloc: 1557 /* 1558 * Need to mark the mft record for dirtying because ntfs_cluster_free() 1559 * may drop the mft record on the floor otherwise. 1560 */ 1561 NInoSetMrecNeedsDirtying(ctx->ni); 1562undo_alloc: 1563 err2 = ntfs_cluster_free(mft_ni, vcn + 1, -1, ctx, NULL); 1564 if (err2 || ctx->is_error) { 1565 ntfs_error(vol->mp, "Failed to release allocated cluster(s) " 1566 "in error code path (error %d).%s", 1567 ctx->is_error ? ctx->error : err2, es); 1568 NVolSetErrors(vol); 1569 } 1570 /* 1571 * If the runlist truncation fails and/or the search context is no 1572 * longer valid, we cannot resize the attribute record or build the 1573 * mapping pairs array thus we mark the volume dirty and tell the user 1574 * to run chkdsk. 1575 * 1576 * As before, we are going to update the runlist now so we need to take 1577 * the runlist lock for writing. 1578 */ 1579 lck_rw_lock_exclusive(&mft_ni->rl.lock); 1580 lck_spin_lock(&mft_ni->size_lock); 1581 mft_ni->allocated_size -= nr << vol->cluster_size_shift; 1582 lck_spin_unlock(&mft_ni->size_lock); 1583 err2 = ntfs_rl_truncate_nolock(vol, &mft_ni->rl, vcn + 1); 1584 lck_rw_unlock_exclusive(&mft_ni->rl.lock); 1585 if (err2) { 1586 ntfs_error(vol->mp, "Failed to truncate attribute runlist s " 1587 "in error code path (error %d).%s", err2, es); 1588 NVolSetErrors(vol); 1589 } else if (mp_rebuilt) { 1590 a = ctx->a; 1591 err2 = ntfs_attr_record_resize(ctx->m, a, attr_len); 1592 if (err2) { 1593 ntfs_error(vol->mp, "Failed to restore attribute " 1594 "record in error code path (error " 1595 "%d).%s", err2, es); 1596 NVolSetErrors(vol); 1597 } else /* if (!err2) */ { 1598 u16 mp_ofs = le16_to_cpu(a->mapping_pairs_offset); 1599 err2 = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 1600 attr_len - mp_ofs, mft_ni->rl.rl, 1601 lowest_vcn, -1, NULL); 1602 if (err2) { 1603 ntfs_error(vol->mp, "Failed to restore " 1604 "mapping pairs array in error " 1605 "code path (error %d).%s", 1606 err2, es); 1607 NVolSetErrors(vol); 1608 } 1609 NInoSetMrecNeedsDirtying(ctx->ni); 1610 } 1611 } 1612 if (ctx) 1613 ntfs_attr_search_ctx_put(ctx); 1614 if (m) 1615 ntfs_mft_record_unmap(mft_ni); 1616 return err; 1617} 1618 1619/** 1620 * ntfs_mft_record_lay_out - lay out an mft record into a memory buffer 1621 * @vol: volume to which the mft record will belong 1622 * @mft_no: mft record number of record to lay out 1623 * @m: destination buffer of size >= @vol->mft_record_size bytes 1624 * 1625 * Lay out an empty, unused mft record with the mft record number @mft_no into 1626 * the buffer @m. The volume @vol is needed because the mft record structure 1627 * was modified in NTFS 3.1 so we need to know which volume version this mft 1628 * record will be used on and also we need to know the size of an mft record. 1629 * 1630 * Return 0 on success and errno on error. 1631 */ 1632static errno_t ntfs_mft_record_lay_out(const ntfs_volume *vol, 1633 const s64 mft_no, MFT_RECORD *m) 1634{ 1635 ATTR_RECORD *a; 1636 1637 ntfs_debug("Entering for mft record 0x%llx.", 1638 (unsigned long long)mft_no); 1639 if (mft_no >= (1LL << 32)) { 1640 ntfs_error(vol->mp, "Mft record number 0x%llx exceeds " 1641 "maximum of 2^32.", 1642 (unsigned long long)mft_no); 1643 return ERANGE; 1644 } 1645 if (vol->mft_record_size < NTFS_BLOCK_SIZE) 1646 panic("%s(): vol->mft_record_size < NTFS_BLOCK_SIZE\n", 1647 __FUNCTION__); 1648 /* Start by clearing the whole mft record to give us a clean slate. */ 1649 bzero(m, vol->mft_record_size); 1650 /* Aligned to 2-byte boundary. */ 1651 if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) 1652 m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1); 1653 else { 1654 m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); 1655 /* 1656 * Set the NTFS 3.1+ specific fields while we know that the 1657 * volume version is 3.1+. 1658 */ 1659 /* m->reserved = 0; */ 1660 m->mft_record_number = cpu_to_le32((u32)mft_no); 1661 } 1662 m->magic = magic_FILE; 1663 m->usa_count = cpu_to_le16(1 + vol->mft_record_size / NTFS_BLOCK_SIZE); 1664 /* Set the update sequence number to 1. */ 1665 *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); 1666 /* m->lsn = 0; */ 1667 m->sequence_number = cpu_to_le16(1); 1668 /* m->link_count = 0; */ 1669 /* 1670 * Place the attributes straight after the update sequence array, 1671 * aligned to 8-byte boundary. 1672 */ 1673 m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + 1674 (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); 1675 /* m->flags = 0; */ 1676 /* 1677 * Using attrs_offset plus eight bytes (for the termination attribute). 1678 * attrs_offset is already aligned to 8-byte boundary, so no need to 1679 * align again. 1680 */ 1681 m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); 1682 m->bytes_allocated = cpu_to_le32(vol->mft_record_size); 1683 /* m->base_mft_record = 0; */ 1684 /* m->next_attr_instance = 0; */ 1685 /* Add the termination attribute. */ 1686 a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 1687 a->type = AT_END; 1688 /* a->length = 0; */ 1689 ntfs_debug("Done."); 1690 return 0; 1691} 1692 1693/** 1694 * ntfs_mft_record_format - format an mft record on an ntfs volume 1695 * @vol: volume on which to format the mft record 1696 * @mft_no: mft record number to format 1697 * @new_initialized_size: new initialized size to assign to @vol->mft_ni 1698 * 1699 * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused 1700 * mft record into the appropriate place of the mft data attribute. This is 1701 * used when extending the mft data attribute. 1702 * 1703 * Once the mft record is layed out the initialized size of @vol->mft_ni is 1704 * updated to @new_initalized_size. This must be bigger or equal to the old 1705 * initialized size and smaller or equal to the data size. 1706 * 1707 * Return 0 on success and errno on error. 1708 * 1709 * Locking: Caller must hold @vol->mft_ni->lock. 1710 */ 1711static errno_t ntfs_mft_record_format(ntfs_volume *vol, const s64 mft_no, 1712 const s64 new_initialized_size) 1713{ 1714 ntfs_inode *mft_ni; 1715 buf_t buf; 1716 MFT_RECORD *m; 1717 errno_t err, err2; 1718 1719 ntfs_debug("Entering for mft record 0x%llx.", 1720 (unsigned long long)mft_no); 1721 mft_ni = vol->mft_ni; 1722 /* The maximum valid offset into the VM page cache for $MFT's data. */ 1723 if ((mft_no << vol->mft_record_size_shift) + vol->mft_record_size > 1724 ubc_getsize(mft_ni->vn)) { 1725 ntfs_error(vol->mp, "Tried to format non-existing mft " 1726 "record 0x%llx.", (unsigned long long)mft_no); 1727 return ENOENT; 1728 } 1729 /* Read and map the buffer containing the mft record. */ 1730 err = buf_meta_bread(mft_ni->vn, mft_no, vol->mft_record_size, NOCRED, 1731 &buf); 1732 if (err) { 1733 ntfs_error(vol->mp, "Failed to read buffer of mft record " 1734 "0x%llx (error %d).", 1735 (unsigned long long)mft_no, err); 1736 goto brelse; 1737 } 1738 err = buf_map(buf, (caddr_t*)&m); 1739 if (err) { 1740 ntfs_error(vol->mp, "Failed to map buffer of mft record " 1741 "0x%llx (error %d).", 1742 (unsigned long long)mft_no, err); 1743 goto brelse; 1744 } 1745 err = ntfs_mft_record_lay_out(vol, mft_no, m); 1746 if (err) { 1747 ntfs_error(vol->mp, "Failed to lay out mft record 0x%llx " 1748 "(error %d).", (unsigned long long)mft_no, err); 1749 goto unmap; 1750 } 1751 err = buf_unmap(buf); 1752 if (err) { 1753 ntfs_error(vol->mp, "Failed to unmap buffer of mft record " 1754 "0x%llx (error %d).", 1755 (unsigned long long)mft_no, err); 1756 goto brelse; 1757 } 1758 lck_spin_lock(&mft_ni->size_lock); 1759 if (new_initialized_size < mft_ni->initialized_size || 1760 new_initialized_size > mft_ni->data_size) 1761 panic("%s(): new_initialized_size < mft_ni->initialized_size " 1762 "|| new_initialized_size > mft_ni->data_size\n", 1763 __FUNCTION__); 1764 mft_ni->initialized_size = new_initialized_size; 1765 lck_spin_unlock(&mft_ni->size_lock); 1766 err = buf_bdwrite(buf); 1767 if (!err) { 1768 ntfs_debug("Done."); 1769 return 0; 1770 } 1771 ntfs_error(vol->mp, "Failed to write buffer of mft record 0x%llx " 1772 "(error %d). Run chkdsk.", (unsigned long long)mft_no, 1773 err); 1774 NVolSetErrors(vol); 1775 return err; 1776unmap: 1777 err2 = buf_unmap(buf); 1778 if (err2) 1779 ntfs_error(vol->mp, "Failed to unmap buffer of mft record " 1780 "0x%llx in error code path (error %d).", 1781 (unsigned long long)mft_no, err2); 1782brelse: 1783 buf_brelse(buf); 1784 return err; 1785} 1786 1787/** 1788 * ntfs_standard_info_attribute_insert - add the standard information attribute 1789 * @m: mft record in which to insert the attribute 1790 * @a: attribute in front of which to insert the new attribute 1791 * @file_attrs: file attribute flags to set in the attribute 1792 * @security_id: security_id to set in the attribute 1793 * @create_time: time to use for the times in the attribute 1794 * 1795 * Insert the standard information attribute into the mft record @m in front of 1796 * the attribute record @a. 1797 * 1798 * If @security_id is not zero, insert a Win2k+ style standard information 1799 * attribute and if it is zero, insert an NT4 style one. 1800 * 1801 * This function cannot fail. 1802 */ 1803static void ntfs_standard_info_attribute_insert(MFT_RECORD *m, ATTR_RECORD *a, 1804 const FILE_ATTR_FLAGS file_attrs, const le32 security_id, 1805 struct timespec *create_time) 1806{ 1807 STANDARD_INFORMATION *si; 1808 u32 size; 1809 1810 ntfs_debug("Entering."); 1811 size = sizeof(STANDARD_INFORMATION); 1812 if (!security_id) 1813 size = offsetof(STANDARD_INFORMATION, reserved12) + 1814 sizeof(si->reserved12); 1815 /* 1816 * Insert the attribute and initialize the value to zero. This cannot 1817 * fail as we are only called with an empty mft record so there must be 1818 * enough space for the standard information attribute. 1819 */ 1820 if (ntfs_resident_attr_record_insert_internal(m, a, 1821 AT_STANDARD_INFORMATION, NULL, 0, size)) 1822 panic("%s(): Failed to insert standard information " 1823 "attribute.\n", __FUNCTION__); 1824 /* Set up the attribute value. */ 1825 si = (STANDARD_INFORMATION*)((u8*)a + le16_to_cpu(a->value_offset)); 1826 si->last_access_time = si->last_mft_change_time = 1827 si->last_data_change_time = si->creation_time = 1828 utc2ntfs(*create_time); 1829 si->file_attributes = file_attrs; 1830 if (security_id) 1831 si->security_id = security_id; 1832 ntfs_debug("Done (used %s style standard information attribute).", 1833 security_id ? "Win2k+" : "NT4"); 1834} 1835 1836/** 1837 * ntfs_sd_attribute_insert - add the security descriptor attribute 1838 * @vol: volume to which the mft record belongs 1839 * @m: mft record in which to insert the attribute 1840 * @a: attribute in front of which to insert the new attribute 1841 * @va: vnode attributes 1842 * 1843 * Insert the security descriptor attribute into the mft record @m in front of 1844 * the attribute record @a. 1845 * 1846 * @vol is the volume the mft record @m belongs to and is used to determine 1847 * whether an NT4 security descriptor is needed (NTFS 1.x) or a Win2k+ security 1848 * descriptor is needed (NTFS 3.0+). 1849 * 1850 * @va are the vnode attributes to assign to the create inode and allows us to 1851 * distinguish whether we need to insert a directory security descriptor or a 1852 * file one. 1853 * 1854 * This function cannot fail. 1855 */ 1856static void ntfs_sd_attribute_insert(ntfs_volume *vol, MFT_RECORD *m, 1857 ATTR_RECORD *a, const struct vnode_attr *va) 1858{ 1859 SDS_ENTRY *sds; 1860 u32 sd_size; 1861 1862 ntfs_debug("Entering."); 1863 if (vol->major_ver > 1) { 1864 if (va->va_type == VDIR) 1865 sds = ntfs_dir_sds_entry; 1866 else 1867 sds = ntfs_file_sds_entry; 1868 } else { 1869 if (va->va_type == VDIR) 1870 sds = ntfs_dir_sds_entry_old; 1871 else 1872 sds = ntfs_file_sds_entry_old; 1873 } 1874 sd_size = le32_to_cpu(sds->length) - sizeof(SDS_ENTRY_HEADER); 1875 /* 1876 * Insert the attribute. This cannot fail as we are only called with 1877 * an empty mft record so there must be enough space for our default 1878 * security descriptor attribute which is tiny. 1879 */ 1880 if (ntfs_resident_attr_record_insert_internal(m, a, 1881 AT_SECURITY_DESCRIPTOR, NULL, 0, sd_size)) 1882 panic("%s(): Failed to insert security descriptor " 1883 "attribute.\n", __FUNCTION__); 1884 /* Copy the chosen security descriptor into place. */ 1885 memcpy((u8*)a + le16_to_cpu(a->value_offset), &sds->sd, sd_size); 1886 ntfs_debug("Done."); 1887} 1888 1889/** 1890 * ntfs_index_root_attribute_insert - add the empty, $I30 index root attribute 1891 * @vol: volume to which the mft record belongs 1892 * @m: mft record in which to insert the attribute 1893 * @a: attribute in front of which to insert the new attribute 1894 * 1895 * Insert the empty, $I30 index root attribute into the mft record @m in front 1896 * of the attribute record @a. 1897 * 1898 * @vol is the volume the mft record @m belongs to and is used to determine the 1899 * the index block size as well as the number of clusters per index block. 1900 * 1901 * This function cannot fail. 1902 */ 1903static void ntfs_index_root_attribute_insert(ntfs_volume *vol, MFT_RECORD *m, 1904 ATTR_RECORD *a) 1905{ 1906 INDEX_ROOT *ir; 1907 INDEX_ENTRY_HEADER *ieh; 1908 1909 ntfs_debug("Entering."); 1910 /* 1911 * Insert the attribute and initialize the value to zero. This cannot 1912 * fail as we are only called with an empty mft record so there must be 1913 * enough space for the empty index root attribute. 1914 */ 1915 if (ntfs_resident_attr_record_insert_internal(m, a, AT_INDEX_ROOT, I30, 1916 4, sizeof(INDEX_ROOT) + sizeof(INDEX_ENTRY_HEADER))) 1917 panic("%s(): Failed to insert index root attribute.\n", 1918 __FUNCTION__); 1919 /* Set up the attribute value. */ 1920 ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->value_offset)); 1921 ir->type = AT_FILENAME; 1922 ir->collation_rule = COLLATION_FILENAME; 1923 ir->index_block_size = cpu_to_le32(vol->index_block_size); 1924 ir->blocks_per_index_block = vol->blocks_per_index_block; 1925 ir->index.entries_offset = const_cpu_to_le32(sizeof(INDEX_HEADER)); 1926 ir->index.allocated_size = ir->index.index_length = const_cpu_to_le32( 1927 sizeof(INDEX_HEADER) + sizeof(INDEX_ENTRY_HEADER)); 1928 /* SMALL_INDEX is zero and the attribute value is already zeroed. */ 1929 /* ir->index.flags = SMALL_INDEX; */ 1930 ieh = (INDEX_ENTRY_HEADER*)((u8*)ir + sizeof(INDEX_ROOT)); 1931 ieh->length = const_cpu_to_le16(sizeof(INDEX_ENTRY_HEADER)); 1932 ieh->flags = INDEX_ENTRY_END; 1933 ntfs_debug("Done."); 1934} 1935 1936/** 1937 * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume 1938 * @vol: [IN] volume on which to allocate the mft record 1939 * @va: [IN/OUT] vnode attributes to assign to the new inode or NULL 1940 * @cn: [IN] name of new inode (@va != NULL) or NULL (@va == NULL) 1941 * @base_ni: [IN] base inode (@va == NULL) or parent directory (@va != NULL) 1942 * @new_ni: [OUT] on success this is the ntfs inode of the created inode 1943 * @new_m: [OUT] on success this is the mapped mft record 1944 * @new_a: [OUT] on success this is the attribute at which to insert 1945 * 1946 * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol and return 1947 * the ntfs inode of the created inode in *@new_ni, its mft record in *@new_m, 1948 * and *@new_a poinst to the attribute record in front of which the filename 1949 * attribute needs be inserted (if @va was not NULL, i.e. we allocated a base 1950 * mft record for a file or directory) or to the position at which the first 1951 * attribute in this mft record needs to be inserted (if @va is NULL, i.e. we 1952 * allocate an extent mft record). 1953 * 1954 * If @va is not NULL make the mft record a base mft record, i.e. a file or 1955 * directory inode, and allocate it at the default allocator position. In this 1956 * case @va are the vnode attributes as given to us by the caller, @base_ni is 1957 * is the ntfs inode of the parent directory, and @cn is the name of the new 1958 * inode. 1959 * 1960 * When allocating a base mft record the caller needs to do an 1961 * ntfs_inode_unlock_alloc(*@new_ni); 1962 * to make the inode a full member of society by unlocking it and waking up any 1963 * waiters. We do not do it here as the caller is likely to want to do more 1964 * work before unlocking the inode. 1965 * 1966 * Note that we only support some of the attributes that can be specified in 1967 * @va and we update @va to reflect the values we actually end up using. 1968 * 1969 * We in particular use @va to distinguish what type of inode is being created 1970 * (@va->va_type == VREG, VDIR, VLNK, VSOCK, VFIFO, VBLK, or VCHR, 1971 * respectively). @va also gives us the creation_time to use 1972 * (@va->va_create_time) as well as the mode (@va->va_mode) and the file 1973 * attributes (@va->va_flags). And for block and character device special file 1974 * nodes @va->va_rdev specifies the device. 1975 * 1976 * If @va is NULL, make the allocated mft record an extent record, allocate it 1977 * starting at the mft record after the base mft record and attach the 1978 * allocated and opened ntfs inode to the base inode @base_ni. @cn is NULL. 1979 * 1980 * When allocating a base mft record, add the standard information attribute, 1981 * the security descriptor attribute (if needed) as well as the empty data 1982 * attribute (@va->va_type == VREG or VLNK), the empty index root attribute 1983 * (@va->va_type == VDIR) or the special flags and attributes for special 1984 * inodes (@va->va_type == VSOCK, VFIFO, VBLK, or VCHR). 1985 * 1986 * Return 0 on success and errno on error. On error *@new_ni, *@new_m, and 1987 * *@new_a are not defined. 1988 * 1989 * Allocation strategy: 1990 * 1991 * To find a free mft record, we scan the mft bitmap for a zero bit. To 1992 * optimize this we start scanning at the place specified by @base_ni or if 1993 * @base_ni is NULL we start where we last stopped and we perform wrap around 1994 * when we reach the end. Note, we do not try to allocate mft records below 1995 * number 24 because numbers 0 to 15 are the defined system files anyway and 16 1996 * to 24 are special in that they are used for storing extension mft records 1997 * for the $DATA attribute of $MFT. This is required to avoid the possibility 1998 * of creating a runlist with a circular dependency which once written to disk 1999 * can never be read in again. Windows will only use records 16 to 24 for 2000 * normal files if the volume is completely out of space. We never use them 2001 * which means that when the volume is really out of space we cannot create any 2002 * more files while Windows can still create up to 8 small files. We can start 2003 * doing this at some later time, it does not matter much for now. 2004 * 2005 * When scanning the mft bitmap, we only search up to the last allocated mft 2006 * record. If there are no free records left in the range 24 to number of 2007 * allocated mft records, then we extend the $MFT/$DATA attribute in order to 2008 * create free mft records. We extend the allocated size of $MFT/$DATA by 16 2009 * records at a time or one cluster, if cluster size is above 16kiB. If there 2010 * is not sufficient space to do this, we try to extend by a single mft record 2011 * or one cluster, if cluster size is above the mft record size. 2012 * 2013 * No matter how many mft records we allocate, we initialize only the first 2014 * allocated mft record, incrementing mft data size and initialized size 2015 * accordingly, open an ntfs_inode for it and return it to the caller, unless 2016 * there are less than 24 mft records, in which case we allocate and initialize 2017 * mft records until we reach record 24 which we consider as the first free mft 2018 * record for use by normal files. 2019 * 2020 * If during any stage we overflow the initialized data in the mft bitmap, we 2021 * extend the initialized size (and data size) by 8 bytes, allocating another 2022 * cluster if required. The bitmap data size has to be at least equal to the 2023 * number of mft records in the mft, but it can be bigger, in which case the 2024 * superflous bits are padded with zeroes. 2025 * 2026 * Thus, when we return success (i.e. zero), we will have: 2027 * - initialized / extended the mft bitmap if necessary, 2028 * - initialized / extended the mft data if necessary, 2029 * - set the bit corresponding to the mft record being allocated in the 2030 * mft bitmap, 2031 * - opened an ntfs_inode for the allocated mft record, and we will have 2032 * - returned the ntfs_inode as well as the allocated and mapped mft 2033 * record. 2034 * 2035 * On error, the volume will be left in a consistent state and no record will 2036 * be allocated. If rolling back a partial operation fails, we may leave some 2037 * inconsistent metadata in which case we set NVolErrors() so the volume is 2038 * left dirty when unmounted. 2039 * 2040 * Note, this function cannot make use of most of the normal functions, like 2041 * for example for attribute resizing, etc, because when the run list overflows 2042 * the base mft record and an attribute list is used, it is very important that 2043 * the extension mft records used to store the $DATA attribute of $MFT can be 2044 * reached without having to read the information contained inside them, as 2045 * this would make it impossible to find them in the first place after the 2046 * volume is unmounted. $MFT/$BITMAP probably does not need to follow this 2047 * rule because the bitmap is not essential for finding the mft records, but on 2048 * the other hand, handling the bitmap in this special way would make life 2049 * easier because otherwise there might be circular invocations of functions 2050 * when reading the bitmap. 2051 */ 2052errno_t ntfs_mft_record_alloc(ntfs_volume *vol, struct vnode_attr *va, 2053 struct componentname *cn, ntfs_inode *base_ni, 2054 ntfs_inode **new_ni, MFT_RECORD **new_m, 2055 ATTR_RECORD **new_a) 2056{ 2057 s64 bit, ll, old_data_initialized, old_data_size, old_mft_data_pos; 2058 s64 nr_mft_records_added; 2059 ntfs_inode *mft_ni, *mftbmp_ni, *ni; 2060 MFT_RECORD *m; 2061 ntfs_attr_search_ctx *ctx; 2062 ATTR_RECORD *a; 2063 buf_t buf; 2064 errno_t err, err2; 2065 le16 seq_no, usn; 2066 BOOL record_formatted, mark_sizes_dirty, dirty_buf; 2067 BOOL mft_ni_write_locked; 2068 2069 ntfs_debug("Entering (allocating a%s mft record, %s 0x%llx).", 2070 va ? " base" : "n extent", 2071 va ? "parent directory" : "base mft record", 2072 (unsigned long long)base_ni->mft_no); 2073 if (!new_ni || !new_m || !new_a) 2074 panic("%s(): !new_ni || !new_m || !new_a\n", __FUNCTION__); 2075 if (!base_ni) 2076 panic("%s(): !base_ni\n", __FUNCTION__); 2077 lck_rw_lock_exclusive(&vol->mftbmp_lock); 2078 /* 2079 * Get an iocount reference on the mft and mftbmp vnodes. 2080 * 2081 * We do not bother with the iocount reference on the mft if @va is 2082 * NULL, i.e. we are allocating an extent mft record, because in that 2083 * case the base mft record @ni is already mapped thus an iocount 2084 * reference is already held on the mft. 2085 */ 2086 mft_ni = vol->mft_ni; 2087 if (va) { 2088 err = vnode_get(mft_ni->vn); 2089 if (err) { 2090 ntfs_error(vol->mp, "Failed to get vnode for $MFT."); 2091 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 2092 return err; 2093 } 2094 } 2095 mftbmp_ni = vol->mftbmp_ni; 2096 err = vnode_get(mftbmp_ni->vn); 2097 if (err) { 2098 ntfs_error(vol->mp, "Failed to get vnode for $MFT/$Bitmap."); 2099 if (va) 2100 (void)vnode_put(mft_ni->vn); 2101 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 2102 return err; 2103 } 2104retry_mftbmp_alloc: 2105 record_formatted = mark_sizes_dirty = dirty_buf = FALSE; 2106 lck_rw_lock_exclusive(&mftbmp_ni->lock); 2107 err = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, 2108 va ? NULL : base_ni, &bit); 2109 if (!err) { 2110 ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", 2111 (unsigned long long)bit); 2112 goto have_alloc_rec; 2113 } 2114 if (err != ENOSPC) 2115 goto unl_err; 2116 /* 2117 * No free mft records left. If the mft bitmap already covers more 2118 * than the currently used mft records, the next records are all free, 2119 * so we can simply allocate the first unused mft record. 2120 * 2121 * Note: We also have to make sure that the mft bitmap at least covers 2122 * the first 24 mft records as they are special and whilst they may not 2123 * be in use, we do not allocate from them. 2124 */ 2125 lck_spin_lock(&mft_ni->size_lock); 2126 ll = mft_ni->initialized_size >> vol->mft_record_size_shift; 2127 lck_spin_unlock(&mft_ni->size_lock); 2128 lck_spin_lock(&mftbmp_ni->size_lock); 2129 old_data_initialized = mftbmp_ni->initialized_size; 2130 lck_spin_unlock(&mftbmp_ni->size_lock); 2131 if (old_data_initialized << 3 > ll && old_data_initialized > 3) { 2132 bit = ll; 2133 if (bit < 24) 2134 bit = 24; 2135 /* 2136 * To be in line with what Windows allows we restrict the total 2137 * number of mft records to 2^32. 2138 */ 2139 if (bit >= (1LL << 32)) 2140 goto max_err; 2141 ntfs_debug("Found free record (#2), bit 0x%llx.", 2142 (unsigned long long)bit); 2143 goto found_free_rec; 2144 } 2145 /* 2146 * The mft bitmap needs to be extended until it covers the first unused 2147 * mft record that we can allocate. 2148 * 2149 * Note: The smallest mft record we allocate is mft record 24. 2150 */ 2151 bit = old_data_initialized << 3; 2152 /* 2153 * To be in line with what Windows allows we restrict the total number 2154 * of mft records to 2^32. 2155 */ 2156 if (bit >= (1LL << 32)) 2157 goto max_err; 2158 lck_spin_lock(&mftbmp_ni->size_lock); 2159 old_data_size = mftbmp_ni->allocated_size; 2160 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " 2161 "data_size 0x%llx, initialized_size 0x%llx.", 2162 (unsigned long long)old_data_size, 2163 (unsigned long long)mftbmp_ni->data_size, 2164 (unsigned long long)old_data_initialized); 2165 lck_spin_unlock(&mftbmp_ni->size_lock); 2166 if (old_data_initialized + 8 > old_data_size) { 2167 /* Need to extend bitmap by one more cluster. */ 2168 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 2169 err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2170 if (err) 2171 goto unl_err; 2172#ifdef DEBUG 2173 lck_spin_lock(&mftbmp_ni->size_lock); 2174 ntfs_debug("Status of mftbmp after allocation extension: " 2175 "allocated_size 0x%llx, data_size 0x%llx, " 2176 "initialized_size 0x%llx.", 2177 (unsigned long long)mftbmp_ni->allocated_size, 2178 (unsigned long long)mftbmp_ni->data_size, 2179 (unsigned long long) 2180 mftbmp_ni->initialized_size); 2181 lck_spin_unlock(&mftbmp_ni->size_lock); 2182#endif /* DEBUG */ 2183 } 2184 /* 2185 * We now have sufficient allocated space, extend the initialized_size 2186 * as well as the data_size if necessary and fill the new space with 2187 * zeroes. 2188 */ 2189 err = ntfs_mft_bitmap_extend_initialized_nolock(vol); 2190 if (err) 2191 goto unl_err; 2192#ifdef DEBUG 2193 lck_spin_lock(&mftbmp_ni->size_lock); 2194 ntfs_debug("Status of mftbmp after initialized extension: " 2195 "allocated_size 0x%llx, data_size 0x%llx, " 2196 "initialized_size 0x%llx.", 2197 (unsigned long long)mftbmp_ni->allocated_size, 2198 (unsigned long long)mftbmp_ni->data_size, 2199 (unsigned long long)mftbmp_ni->initialized_size); 2200 lck_spin_unlock(&mftbmp_ni->size_lock); 2201#endif /* DEBUG */ 2202 ntfs_debug("Found free record (#3), bit 0x%llx.", 2203 (unsigned long long)bit); 2204found_free_rec: 2205 /* @bit is the found free mft record, allocate it in the mft bitmap. */ 2206 ntfs_debug("At found_free_rec."); 2207 err = ntfs_bitmap_set_bit(mftbmp_ni, bit); 2208 if (err) { 2209 ntfs_error(vol->mp, "Failed to allocate bit in mft bitmap."); 2210 goto unl_err; 2211 } 2212 ntfs_debug("Set bit 0x%llx in mft bitmap.", (unsigned long long)bit); 2213have_alloc_rec: 2214 lck_rw_unlock_exclusive(&mftbmp_ni->lock); 2215 /* 2216 * The mft bitmap is now uptodate. Deal with mft data attribute now. 2217 * Note, we keep hold of the mft bitmap lock for writing until all 2218 * modifications to the mft data attribute are complete, too, as they 2219 * will impact decisions for mft bitmap and mft record allocation done 2220 * by a parallel allocation and if the lock is not maintained a 2221 * parallel allocation could decide to allocate the same mft record as 2222 * this one. 2223 */ 2224 lck_rw_lock_shared(&mft_ni->lock); 2225 mft_ni_write_locked = FALSE; 2226mft_relocked: 2227 ll = (bit + 1) << vol->mft_record_size_shift; 2228 lck_spin_lock(&mft_ni->size_lock); 2229 old_data_initialized = mft_ni->initialized_size; 2230 lck_spin_unlock(&mft_ni->size_lock); 2231 if (ll <= old_data_initialized) { 2232 ntfs_debug("Allocated mft record already initialized."); 2233 goto mft_rec_already_initialized; 2234 } 2235 if (!mft_ni_write_locked) { 2236 mft_ni_write_locked = TRUE; 2237 if (!lck_rw_lock_shared_to_exclusive(&mft_ni->lock)) { 2238 lck_rw_lock_exclusive(&mft_ni->lock); 2239 goto mft_relocked; 2240 } 2241 } 2242 ntfs_debug("Initializing allocated mft record."); 2243 /* 2244 * The mft record is outside the initialized data. Extend the mft data 2245 * attribute until it covers the allocated record. The loop is only 2246 * actually traversed more than once when a freshly formatted volume is 2247 * first written to so it optimizes away nicely in the common case. 2248 */ 2249 lck_spin_lock(&mft_ni->size_lock); 2250 ntfs_debug("Status of mft data before extension: " 2251 "allocated_size 0x%llx, data_size 0x%llx, " 2252 "initialized_size 0x%llx.", 2253 (unsigned long long)mft_ni->allocated_size, 2254 (unsigned long long)mft_ni->data_size, 2255 (unsigned long long)mft_ni->initialized_size); 2256 while (ll > mft_ni->allocated_size) { 2257 lck_spin_unlock(&mft_ni->size_lock); 2258 err = ntfs_mft_data_extend_allocation_nolock(vol); 2259 if (err) { 2260 ntfs_error(vol->mp, "Failed to extend mft data " 2261 "allocation."); 2262 lck_rw_unlock_exclusive(&mft_ni->lock); 2263 goto undo_mftbmp_alloc_locked; 2264 } 2265 lck_spin_lock(&mft_ni->size_lock); 2266 ntfs_debug("Status of mft data after allocation extension: " 2267 "allocated_size 0x%llx, data_size 0x%llx, " 2268 "initialized_size 0x%llx.", 2269 (unsigned long long)mft_ni->allocated_size, 2270 (unsigned long long)mft_ni->data_size, 2271 (unsigned long long)mft_ni->initialized_size); 2272 } 2273 lck_spin_unlock(&mft_ni->size_lock); 2274 /* 2275 * Extend mft data initialized size (and data size of course) to reach 2276 * the allocated mft record, formatting the mft records allong the way. 2277 * 2278 * Note: We only modify the ntfs_inode structure as that is all that is 2279 * needed by ntfs_mft_record_format(). We will update the attribute 2280 * record itself in one fell swoop later on. 2281 */ 2282 lck_spin_lock(&mft_ni->size_lock); 2283 old_data_initialized = mft_ni->initialized_size; 2284 old_data_size = mft_ni->data_size; 2285 nr_mft_records_added = 0; 2286 if (old_data_size != ubc_getsize(mft_ni->vn)) 2287 panic("%s(): old_data_size != ubc_getsize(mft_ni->vn)\n", 2288 __FUNCTION__); 2289 while (ll > mft_ni->initialized_size) { 2290 s64 new_initialized_size, mft_no; 2291 2292 new_initialized_size = mft_ni->initialized_size + 2293 vol->mft_record_size; 2294 mft_no = mft_ni->initialized_size >> vol->mft_record_size_shift; 2295 ntfs_debug("mft_no 0x%llx, new_initialized_size 0x%llx, " 2296 "initialized_size 0x%llx, data_size 0x%llx.", 2297 (unsigned long long)mft_no, 2298 (unsigned long long)new_initialized_size, 2299 (unsigned long long)mft_ni->initialized_size, 2300 (unsigned long long)mft_ni->data_size); 2301 if (new_initialized_size > mft_ni->data_size) { 2302 /* Increment the number of newly added mft records. */ 2303 nr_mft_records_added += (new_initialized_size - 2304 mft_ni->data_size) >> 2305 vol->mft_record_size_shift; 2306 ntfs_debug("Updating data size and ubc size, " 2307 "nr_mft_records_added %lld.", 2308 (long long)nr_mft_records_added); 2309 mft_ni->data_size = new_initialized_size; 2310 lck_spin_unlock(&mft_ni->size_lock); 2311 if (!ubc_setsize(mft_ni->vn, new_initialized_size)) 2312 panic("%s(): ubc_setsize() failed.\n", 2313 __FUNCTION__); 2314 mark_sizes_dirty = TRUE; 2315 } else 2316 lck_spin_unlock(&mft_ni->size_lock); 2317 ntfs_debug("Initializing mft record 0x%llx.", 2318 (unsigned long long)mft_no); 2319 /* 2320 * ntfs_mft_record_format() updates the initialized size in 2321 * @mft_ni. 2322 */ 2323 err = ntfs_mft_record_format(vol, mft_no, new_initialized_size); 2324 if (err) { 2325 ntfs_error(vol->mp, "Failed to format mft record."); 2326 goto undo_data_init; 2327 } 2328 lck_spin_lock(&mft_ni->size_lock); 2329 } 2330 lck_spin_unlock(&mft_ni->size_lock); 2331 record_formatted = TRUE; 2332 /* 2333 * Update the mft data attribute record to reflect the new sizes. 2334 * 2335 * When mapping the mft record for the mft we communicate the fact that 2336 * we hold the lock on the mft inode @mft_ni->lock for writing so it 2337 * does not try to take the lock. 2338 */ 2339 err = ntfs_mft_record_map_ext(mft_ni, &m, TRUE); 2340 if (err) { 2341 ntfs_error(vol->mp, "Failed to map mft record."); 2342 goto undo_data_init; 2343 } 2344 ctx = ntfs_attr_search_ctx_get(mft_ni, m); 2345 if (!ctx) { 2346 ntfs_error(vol->mp, "Failed to get search context."); 2347 err = ENOMEM; 2348 ntfs_mft_record_unmap(mft_ni); 2349 goto undo_data_init; 2350 } 2351 /* 2352 * We have the mft lock taken for write. Communicate this fact to 2353 * ntfs_attr_lookup() and hence to ntfs_extent_mft_record_map_ext() and 2354 * ntfs_mft_record_map_ext() so that they know not to try to take the 2355 * same lock. 2356 */ 2357 ctx->is_mft_locked = 1; 2358 err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, 2359 0, NULL, 0, ctx); 2360 if (err) { 2361 ntfs_error(vol->mp, "Failed to find first attribute extent of " 2362 "mft data attribute."); 2363 ntfs_attr_search_ctx_put(ctx); 2364 ntfs_mft_record_unmap(mft_ni); 2365 goto undo_data_init; 2366 } 2367 a = ctx->a; 2368 lck_spin_lock(&mft_ni->size_lock); 2369 a->initialized_size = cpu_to_sle64(mft_ni->initialized_size); 2370 a->data_size = cpu_to_sle64(mft_ni->data_size); 2371 /* 2372 * We have created new mft records thus update the cached numbers of 2373 * total and free mft records to reflect this. 2374 */ 2375 vol->nr_mft_records = mft_ni->data_size >> vol->mft_record_size_shift; 2376 vol->nr_free_mft_records += nr_mft_records_added; 2377 if (vol->nr_free_mft_records >= vol->nr_mft_records) 2378 panic("%s(): vol->nr_free_mft_records > vol->nr_mft_records\n", 2379 __FUNCTION__); 2380 lck_spin_unlock(&mft_ni->size_lock); 2381 /* Ensure the changes make it to disk. */ 2382 NInoSetMrecNeedsDirtying(ctx->ni); 2383 ntfs_attr_search_ctx_put(ctx); 2384 ntfs_mft_record_unmap(mft_ni); 2385 /* 2386 * If we have modified the size of the base inode, cause the sizes to 2387 * be written to all the directory index entries pointing to the base 2388 * inode when the inode is written to disk. 2389 */ 2390 if (mark_sizes_dirty) 2391 NInoSetDirtySizes(mft_ni); 2392 lck_spin_lock(&mft_ni->size_lock); 2393 ntfs_debug("Status of mft data after mft record initialization: " 2394 "allocated_size 0x%llx, data_size 0x%llx, " 2395 "initialized_size 0x%llx.", 2396 (unsigned long long)mft_ni->allocated_size, 2397 (unsigned long long)mft_ni->data_size, 2398 (unsigned long long)mft_ni->initialized_size); 2399 if (mft_ni->data_size != ubc_getsize(mft_ni->vn)) 2400 panic("%s(): mft_ni->data_size != ubc_getsize(mft_ni->vn)\n", 2401 __FUNCTION__); 2402 if (mft_ni->data_size > mft_ni->allocated_size) 2403 panic("%s(): mft_ni->data_size > mft_ni->allocated_size\n", 2404 __FUNCTION__); 2405 if (mft_ni->initialized_size > mft_ni->data_size) 2406 panic("%s(): mft_ni->initialized_size > mft_ni->data_size\n", 2407 __FUNCTION__); 2408 lck_spin_unlock(&mft_ni->size_lock); 2409 lck_rw_lock_exclusive_to_shared(&mft_ni->lock); 2410mft_rec_already_initialized: 2411 /* 2412 * Update the default mft allocation position. We have to do this now 2413 * even if we fail later and deallocate the mft record because we are 2414 * about to drop the mftbmp_lock so we cannot touch vol->mft_data_pos 2415 * later on. We save the old value so we can restore it on error. 2416 */ 2417 old_mft_data_pos = vol->mft_data_pos; 2418 vol->mft_data_pos = bit + 1; 2419 /* 2420 * We have allocated an mft record thus decrement the cached number of 2421 * free mft records to reflect this. 2422 */ 2423 vol->nr_free_mft_records--; 2424 if (vol->nr_free_mft_records < 0) 2425 vol->nr_free_mft_records = 0; 2426 /* 2427 * We can finally drop the mft bitmap lock as the mft data attribute 2428 * has been fully updated. The only disparity left is that the 2429 * allocated mft record still needs to be marked as in use to match the 2430 * set bit in the mft bitmap but this is actually not a problem since 2431 * this mft record is not referenced from anywhere yet and the fact 2432 * that it is allocated in the mft bitmap means that no-one will try to 2433 * allocate it either. 2434 */ 2435 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 2436 /* 2437 * We now have allocated and initialized the mft record. 2438 * 2439 * Read and map the buffer containing the mft record. 2440 */ 2441 err = buf_meta_bread(mft_ni->vn, bit, vol->mft_record_size, NOCRED, 2442 &buf); 2443 if (err) { 2444 ntfs_error(vol->mp, "Failed to read buffer of mft record " 2445 "0x%llx (error %d).", (unsigned long long)bit, 2446 err); 2447 goto undo_mftbmp_alloc; 2448 } 2449 err = buf_map(buf, (caddr_t*)&m); 2450 if (err) { 2451 ntfs_error(vol->mp, "Failed to map buffer of mft record " 2452 "0x%llx (error %d).", (unsigned long long)bit, 2453 err); 2454 goto undo_mftbmp_alloc; 2455 } 2456 /* If we just formatted the mft record no need to do it again. */ 2457 if (!record_formatted) { 2458 /* 2459 * Sanity check that the mft record is really not in use. If 2460 * it is in use then warn the user about this inconsistency, 2461 * mark the volume as dirty to force chkdsk to run, and try to 2462 * allocate another mft record. As we have already set the mft 2463 * bitmap bit this means we have "repaired" the inconsistency. 2464 * Of course we may now have an mft record that is marked in 2465 * use correctly but that is not referenced from anywhere at 2466 * all but chkdsk should hopefully fix this case by either 2467 * recovering the mft record by linking it somewhere or by 2468 * properly freeing the mft record. 2469 * 2470 * TODO: Need to test what chkdsk does exactly. For example if 2471 * it only clears the bit in the mft bitmap but leaves the mft 2472 * record marked in use we would detect this here as corruption 2473 * again and set the bitmap bit back to one and thus end up 2474 * with a vicious circle. So we need to figure out what chkdsk 2475 * does and adjust our handling here appropriately. 2476 */ 2477 if (ntfs_is_file_record(m->magic) && 2478 m->flags & MFT_RECORD_IN_USE) { 2479 ntfs_warning(vol->mp, "Mft record 0x%llx was marked " 2480 "free in mft bitmap but is marked " 2481 "used itself. Marking it used in mft " 2482 "bitmap. This indicates a corrupt " 2483 "file system. Unmount and run " 2484 "chkdsk.", (unsigned long long)bit); 2485 err = buf_unmap(buf); 2486 if (err) 2487 ntfs_error(vol->mp, "Failed to unmap buffer " 2488 "of mft record 0x%llx (error " 2489 "%d).", 2490 (unsigned long long)bit, err); 2491 buf_brelse(buf); 2492 lck_rw_unlock_shared(&mft_ni->lock); 2493 lck_rw_lock_exclusive(&vol->mftbmp_lock); 2494 NVolSetErrors(vol); 2495 goto retry_mftbmp_alloc; 2496 } 2497 /* 2498 * We need to (re-)format the mft record, preserving the 2499 * sequence number if it is not zero as well as the update 2500 * sequence number if it is not zero or -1 (0xffff). This 2501 * means we do not need to care whether or not something went 2502 * wrong with the previous mft record. 2503 */ 2504 seq_no = m->sequence_number; 2505 usn = 0; 2506 if (le16_to_cpu(m->usa_ofs) < NTFS_BLOCK_SIZE - sizeof(u16)) 2507 usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)); 2508 err = ntfs_mft_record_lay_out(vol, bit, m); 2509 if (err) { 2510 ntfs_error(vol->mp, "Failed to lay out allocated mft " 2511 "record 0x%llx.", 2512 (unsigned long long)bit); 2513 goto unmap_undo_mftbmp_alloc; 2514 } 2515 if (seq_no) 2516 m->sequence_number = seq_no; 2517 if (usn && usn != 0xffff) 2518 *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn; 2519 } 2520 /* Set the mft record itself in use. */ 2521 m->flags |= MFT_RECORD_IN_USE; 2522 if (!va) { 2523 /* 2524 * Record the sequence number so we can supply it as part of 2525 * the mft reference when mapping the extent mft record below 2526 * which ensures that we get back the same mft record we 2527 * expected. 2528 */ 2529 seq_no = m->sequence_number; 2530 /* 2531 * Setup the base mft record in the extent mft record. This 2532 * completes initialization of the allocated extent mft record 2533 * and we can simply use it with ntfs_extent_mft_record_map(). 2534 */ 2535 m->base_mft_record = MK_LE_MREF(base_ni->mft_no, 2536 base_ni->seq_no); 2537 /* 2538 * Need to release the page so that we can call 2539 * ntfs_extent_mft_record_map(). We also set the page dirty to 2540 * ensure that it does not get thrown out under VM pressure 2541 * before we get it with the ntfs_extent_mft_record_map() call. 2542 * 2543 * FIXME: This could be optimized by modifying 2544 * ntfs_extent_mft_record_map() to take an optional mft record, 2545 * i.e. @m, and if supplied using this instead of trying to map 2546 * the extent mft record. Alternatively we could unlock the 2547 * page but not release it but this cannot be done in OS X 2548 * (yet). 2549 * 2550 * Allocate an extent inode structure for the new mft record, 2551 * attach it to the base inode @base_ni and map its, i.e. the 2552 * allocated, mft record. 2553 */ 2554 err = buf_unmap(buf); 2555 if (err) 2556 ntfs_error(vol->mp, "Failed to unmap buffer of mft " 2557 "record 0x%llx (error %d).", 2558 (unsigned long long)bit, err); 2559 err = buf_bdwrite(buf); 2560 if (err) { 2561 ntfs_error(vol->mp, "Failed to write buffer of mft " 2562 "record 0x%llx (error %d). Run " 2563 "chkdsk.", (unsigned long long)bit, 2564 err); 2565 NVolSetErrors(vol); 2566 lck_rw_unlock_shared(&mft_ni->lock); 2567 goto free_undo_mftbmp_alloc; 2568 } 2569 err = ntfs_extent_mft_record_map_ext(base_ni, MK_MREF(bit, 2570 le16_to_cpu(seq_no)), &ni, &m, TRUE); 2571 lck_rw_unlock_shared(&mft_ni->lock); 2572 if (err) { 2573 ntfs_error(vol->mp, "Failed to map allocated mft " 2574 "record 0x%llx (error %d).", 2575 (unsigned long long)bit, err); 2576 goto free_undo_mftbmp_alloc; 2577 } 2578 /* This is where the first attribute needs to be inserted. */ 2579 *new_a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 2580 } else { 2581 FILE_ATTR_FLAGS file_attrs; 2582 le32 security_id; 2583 ntfs_attr na; 2584 2585 /* 2586 * Mirror the file attribute flags we want to inherit from the 2587 * parent directory. 2588 */ 2589 file_attrs = base_ni->file_attributes & (FILE_ATTR_ENCRYPTED | 2590 FILE_ATTR_NOT_CONTENT_INDEXED | 2591 FILE_ATTR_COMPRESSED | FILE_ATTR_SPARSE_FILE); 2592 switch (va->va_type) { 2593 case VDIR: 2594 m->flags |= MFT_RECORD_IS_DIRECTORY; 2595 break; 2596 case VSOCK: 2597 case VFIFO: 2598 case VBLK: 2599 case VCHR: 2600 /* 2601 * We use the same way of implementing special inodes 2602 * as Services For Unix uses on Windows thus we set the 2603 * FILE_ATTR_SYSTEM file attribute. 2604 */ 2605 file_attrs |= FILE_ATTR_SYSTEM; 2606 /* 2607 * It makes no sense for a special inode to be 2608 * encrypted or compressed so clear those flags. 2609 */ 2610 file_attrs &= ~(FILE_ATTR_ENCRYPTED | 2611 FILE_ATTR_COMPRESSED); 2612 default: 2613 file_attrs |= FILE_ATTR_ARCHIVE; 2614 /* 2615 * FIXME: We do not implement writing to compressed or 2616 * encrypted files yet, so we clear the corresponding 2617 * bits in the file attribute flags for now. 2618 */ 2619 file_attrs &= ~(FILE_ATTR_ENCRYPTED | 2620 FILE_ATTR_COMPRESSED); 2621 } 2622 /* 2623 * Determine whether we need to insert a Win2k+ style standard 2624 * information attribute or an NT4 style one. For NTFS 1.x 2625 * volumes, we always insert NT4 style standard information 2626 * attributes whilst for newer volumes we decide depending on 2627 * the value of NVolUseSDAttr(). If NVolUseSDAttr() is set, we 2628 * are to specify security descriptors by creating security 2629 * descriptor attributes and in this case we have to use the 2630 * NT4 style standard information attribute. If it is clear, 2631 * we are to specify security descriptors by security_id 2632 * reference into $Secure system file and in this case we have 2633 * to use the Win2k+ style standard information attribute. 2634 * 2635 * To make things simpler, if this is an NTFS 1.x volume, 2636 * NVolUseSDAttr() has been set so we only need to test for it. 2637 */ 2638 if (NVolUseSDAttr(vol)) 2639 security_id = 0; 2640 else { 2641 BOOL is_retry = FALSE; 2642retry: 2643 lck_spin_lock(&vol->security_id_lock); 2644 if (va->va_type == VDIR) 2645 security_id = vol->default_dir_security_id; 2646 else 2647 security_id = vol->default_file_security_id; 2648 lck_spin_unlock(&vol->security_id_lock); 2649 /* 2650 * If the default security_id is not initialized, try 2651 * to initialize it now and should the initialization 2652 * fail, use a security descriptor attribute and hence 2653 * an NT4 style standard information attribute. 2654 */ 2655 if (!security_id && !is_retry) { 2656 if (!ntfs_default_security_id_init(vol, va)) { 2657 is_retry = TRUE; 2658 goto retry; 2659 } 2660 } 2661 } 2662 a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 2663 /* Add the standard information attribute. */ 2664 ntfs_standard_info_attribute_insert(m, a, file_attrs, 2665 security_id, &va->va_create_time); 2666 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 2667 /* 2668 * If @security_id is zero, add the security descriptor 2669 * attribute. If it is not zero, we have already set the 2670 * security_id in the standard information attribute to 2671 * reference our security descriptor in $Secure. 2672 */ 2673 if (!security_id) { 2674 /* Add the security descriptor attribute. */ 2675 ntfs_sd_attribute_insert(vol, m, a, va); 2676 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 2677 } 2678 if (va->va_type == VDIR) { 2679 /* Add the empty, $I30 index root attribute. */ 2680 ntfs_index_root_attribute_insert(vol, m, a); 2681 } else { 2682 INTX_FILE *ix; 2683 u32 data_len; 2684 2685 /* 2686 * FIXME: For encrypted files, we need to add an empty, 2687 * non-resident $DATA attribute and we need to add the 2688 * $EFS attribute. For now, we should never get here 2689 * as we clear the encrypted bit above because we do 2690 * not support creating encrypted files. 2691 */ 2692 if (file_attrs & FILE_ATTR_ENCRYPTED) 2693 panic("%s(): file_attrs & " 2694 "FILE_ATTR_ENCRYPTED\n", 2695 __FUNCTION__); 2696 switch (va->va_type) { 2697 case VBLK: 2698 case VCHR: 2699 /* 2700 * In Services for Unix on Windows, a device 2701 * special file is a system file whose $DATA 2702 * attribute contains the INTX_FILE structure. 2703 */ 2704 data_len = offsetof(INTX_FILE, device) + 2705 sizeof(ix->device); 2706 break; 2707 case VSOCK: 2708 /* 2709 * In Services for Unix on Windows, a socket is 2710 * a system file with a $DATA attribute of 2711 * length 1. 2712 */ 2713 data_len = 1; 2714 break; 2715 case VFIFO: 2716 /* 2717 * On Services for Unix on Windows, a fifo is a 2718 * system file with a zero-length $DATA 2719 * attribute so fall through to the default 2720 * case. 2721 */ 2722 default: 2723 data_len = 0; 2724 break; 2725 } 2726 /* 2727 * Insert the empty, resident $DATA attribute. This 2728 * cannot fail as we are dealing with an empty mft 2729 * record so there must be enough space for an empty 2730 * $DATA attribute. 2731 */ 2732 if (ntfs_resident_attr_record_insert_internal(m, a, 2733 AT_DATA, NULL, 0, data_len)) 2734 panic("%s(): Failed to insert resident data " 2735 "attribute.\n", __FUNCTION__); 2736 /* 2737 * If this is a device special inode then set up the 2738 * INTX_FILE structure inside the created $DATA 2739 * attribute. 2740 */ 2741 if (va->va_type == VBLK || va->va_type == VCHR) { 2742 ix = (INTX_FILE*)((u8*)a + 2743 le16_to_cpu(a->value_offset)); 2744 if (va->va_type == VBLK) 2745 ix->magic = INTX_BLOCK_DEVICE; 2746 else 2747 ix->magic = INTX_CHAR_DEVICE; 2748 ix->device.major = cpu_to_le64( 2749 major(va->va_rdev)); 2750 ix->device.minor = cpu_to_le64( 2751 minor(va->va_rdev)); 2752 } 2753 } 2754 /* Allocate a new ntfs inode and set it up. */ 2755 na = (ntfs_attr) { 2756 .mft_no = bit, 2757 .type = AT_UNUSED, 2758 .raw = FALSE, 2759 }; 2760 ni = ntfs_inode_hash_get(vol, &na); 2761 if (!ni) { 2762 ntfs_error(vol->mp, "Failed to allocate ntfs inode " 2763 "(ENOMEM)."); 2764 err = ENOMEM; 2765 /* Set the mft record itself not in use. */ 2766 m->flags &= ~MFT_RECORD_IN_USE; 2767 dirty_buf = TRUE; 2768 goto unmap_undo_mftbmp_alloc; 2769 } 2770 /* 2771 * This inode cannot still be in the inode cache as we would 2772 * have removed it when it was deleted last time. 2773 */ 2774 if (!NInoAlloc(ni)) 2775 panic("%s(): !NInoAlloc(ni)\n", __FUNCTION__); 2776 ni->seq_no = le16_to_cpu(m->sequence_number); 2777 /* 2778 * Set the appropriate mode, attribute type, and name. For 2779 * directories, also set up the index values to the defaults. 2780 */ 2781 ni->mode |= ACCESSPERMS; 2782 if (va->va_type == VDIR) { 2783 ni->mode |= S_IFDIR; 2784 ni->mode &= ~vol->dmask; 2785 NInoSetMstProtected(ni); 2786 ni->type = AT_INDEX_ALLOCATION; 2787 ni->name = I30; 2788 ni->name_len = 4; 2789 ni->vcn_size = 0; 2790 ni->collation_rule = 0; 2791 ni->vcn_size_shift = 0; 2792 } else /* if (va->va_type == VREG || va->va_type == VLNK) */ { 2793 switch (va->va_type) { 2794 case VREG: 2795 ni->mode |= S_IFREG; 2796 break; 2797 case VLNK: 2798 ni->mode |= S_IFLNK; 2799 break; 2800 case VSOCK: 2801 ni->mode |= S_IFSOCK; 2802 break; 2803 case VFIFO: 2804 ni->mode |= S_IFIFO; 2805 break; 2806 case VBLK: 2807 ni->mode |= S_IFBLK; 2808 ni->rdev = va->va_rdev; 2809 break; 2810 case VCHR: 2811 ni->mode |= S_IFCHR; 2812 ni->rdev = va->va_rdev; 2813 break; 2814 default: 2815 panic("%s(): Should never have gotten here " 2816 "for va->va_type 0x%x.\n", 2817 __FUNCTION__, va->va_type); 2818 } 2819 if (!S_ISLNK(ni->mode)) 2820 ni->mode &= ~vol->fmask; 2821 ni->type = AT_DATA; 2822 /* ni->name = NULL; */ 2823 /* ni->name_len = 0; */ 2824 if (file_attrs & FILE_ATTR_COMPRESSED) { 2825 // TODO: Set up all the @ni->compress* fields... 2826 // For now it does not matter as we do not 2827 // allow creation of compressed files. 2828 panic("%s(): file_attrs & " 2829 "FILE_ATTR_COMPRESSED\n", 2830 __FUNCTION__); 2831 } 2832 } 2833 ni->file_attributes = file_attrs; 2834 if (file_attrs & FILE_ATTR_COMPRESSED) 2835 NInoSetCompressed(ni); 2836 if (file_attrs & FILE_ATTR_ENCRYPTED) 2837 NInoSetEncrypted(ni); 2838 if (file_attrs & FILE_ATTR_SPARSE_FILE) 2839 NInoSetSparse(ni); 2840 ni->last_access_time = ni->last_mft_change_time = 2841 ni->last_data_change_time = ni->creation_time = 2842 va->va_create_time; 2843 /* Initialize the backup time and Finder info cache. */ 2844 ntfs_inode_afpinfo_cache(ni, NULL, 0); 2845 /* 2846 * If it is a symbolic link set the Finder info type and 2847 * creator appropriately and mark it dirty. We will create the 2848 * AFP_AfpInfo attribute later when the inode is ready for it. 2849 */ 2850 if (va->va_type == VLNK) { 2851 ni->finder_info.type = FINDER_TYPE_SYMBOLIC_LINK; 2852 ni->finder_info.creator = FINDER_CREATOR_SYMBOLIC_LINK; 2853 NInoSetDirtyFinderInfo(ni); 2854 } 2855 /* Tell the caller what mode and flags we actually used. */ 2856 va->va_mode = ni->mode; 2857 va->va_flags = 0; 2858 if (file_attrs & FILE_ATTR_READONLY) 2859 va->va_flags |= UF_IMMUTABLE; 2860 if (file_attrs & FILE_ATTR_HIDDEN) 2861 va->va_flags |= UF_HIDDEN; 2862 if (!(file_attrs & FILE_ATTR_ARCHIVE)) 2863 va->va_flags |= SF_ARCHIVED; 2864 /* The ntfs inode is now fully setup so we now add the vnode. */ 2865 err = ntfs_inode_add_vnode(ni, FALSE, base_ni->vn, cn); 2866 if (err) { 2867 /* Destroy the allocated ntfs inode. */ 2868 ntfs_inode_reclaim(ni); 2869 /* Set the mft record itself not in use. */ 2870 m->flags &= ~MFT_RECORD_IN_USE; 2871 dirty_buf = TRUE; 2872 goto unmap_undo_mftbmp_alloc; 2873 } 2874 /* 2875 * Need to release the buffer so that we can call 2876 * ntfs_mft_record_map(). 2877 * 2878 * FIXME: This could be optimized by modifying 2879 * ntfs_mft_record_map() to take an optional mft record, i.e. 2880 * @m, and if supplied using this instead of trying to map the 2881 * extent mft record. 2882 */ 2883 err = buf_unmap(buf); 2884 if (err) 2885 ntfs_error(vol->mp, "Failed to unmap buffer of mft " 2886 "record 0x%llx (error %d).", 2887 (unsigned long long)bit, err); 2888 err = buf_bdwrite(buf); 2889 if (err) { 2890 ntfs_error(vol->mp, "Failed to write buffer of mft " 2891 "record 0x%llx (error %d). Run " 2892 "chkdsk.", (unsigned long long)bit, 2893 err); 2894 NVolSetErrors(vol); 2895 lck_rw_unlock_shared(&mft_ni->lock); 2896 ntfs_inode_unlock_alloc(ni); 2897 (void)vnode_recycle(ni->vn); 2898 (void)vnode_put(ni->vn); 2899 goto free_undo_mftbmp_alloc; 2900 } 2901 err = ntfs_mft_record_map_ext(ni, &m, TRUE); 2902 lck_rw_unlock_shared(&mft_ni->lock); 2903 if (err) { 2904 ntfs_inode_unlock_alloc(ni); 2905 (void)vnode_recycle(ni->vn); 2906 (void)vnode_put(ni->vn); 2907 goto free_undo_mftbmp_alloc; 2908 } 2909 a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 2910 if (a->type != AT_STANDARD_INFORMATION) 2911 panic("%s(): a->type != AT_STANDARD_INFORMATION\n", 2912 __FUNCTION__); 2913 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 2914 if (le32_to_cpu(a->type) <= const_le32_to_cpu(AT_FILENAME)) 2915 panic("%s(): a->type <= AT_FILENAME\n", __FUNCTION__); 2916 /* This is where the filename attribute needs to be inserted. */ 2917 *new_a = a; 2918 } 2919 /* Make sure the (extent) inode is written out to disk. */ 2920 NInoSetMrecNeedsDirtying(ni); 2921 /* 2922 * Drop the taken iocount references on the mft and mftbmp vnodes. 2923 * 2924 * Note we still retain an iocount reference on the mft vnode due to 2925 * the above call to ntfs_{,extent_}mft_record_map(). 2926 */ 2927 (void)vnode_put(mftbmp_ni->vn); 2928 if (va) 2929 (void)vnode_put(mft_ni->vn); 2930 /* 2931 * Return the opened, allocated inode of the allocated mft record as 2932 * well as the mapped mft record. 2933 */ 2934 ntfs_debug("Returning allocated %sntfs inode (mft_no 0x%llx).", 2935 va ? "" : "extent ", (unsigned long long)bit); 2936 *new_ni = ni; 2937 *new_m = m; 2938 return err; 2939undo_data_init: 2940 lck_spin_lock(&mft_ni->size_lock); 2941 mft_ni->initialized_size = old_data_initialized; 2942 lck_spin_unlock(&mft_ni->size_lock); 2943 if (!ubc_setsize(mft_ni->vn, old_data_size)) 2944 panic("%s(): !ubc_setsize(mft_ni->vn, old_data_size)\n", 2945 __FUNCTION__); 2946 lck_spin_lock(&mft_ni->size_lock); 2947 mft_ni->data_size = old_data_size; 2948 lck_spin_unlock(&mft_ni->size_lock); 2949 lck_rw_unlock_exclusive(&mft_ni->lock); 2950 goto undo_mftbmp_alloc_locked; 2951free_undo_mftbmp_alloc: 2952 lck_rw_lock_shared(&mft_ni->lock); 2953 err2 = buf_meta_bread(mft_ni->vn, bit, vol->mft_record_size, NOCRED, 2954 &buf); 2955 if (err2) { 2956 ntfs_error(vol->mp, "Failed to re-read buffer of mft record " 2957 "0x%llx in error code path (error %d).%s", 2958 (unsigned long long)bit, err2, es); 2959 NVolSetErrors(vol); 2960 goto undo_mftbmp_alloc; 2961 } 2962 err2 = buf_map(buf, (caddr_t*)&m); 2963 if (err2) { 2964 ntfs_error(vol->mp, "Failed to re-map buffer of mft record " 2965 "0x%llx in error code path (error %d).%s", 2966 (unsigned long long)bit, err2, es); 2967 NVolSetErrors(vol); 2968 goto undo_mftbmp_alloc; 2969 } 2970 /* Set the mft record itself not in use. */ 2971 m->flags &= ~MFT_RECORD_IN_USE; 2972 dirty_buf = TRUE; 2973unmap_undo_mftbmp_alloc: 2974 err2 = buf_unmap(buf); 2975 if (err2) 2976 ntfs_error(vol->mp, "Failed to unmap buffer of mft record " 2977 "0x%llx (error %d).", (unsigned long long)bit, 2978 err2); 2979undo_mftbmp_alloc: 2980 if (dirty_buf) { 2981 err2 = buf_bdwrite(buf); 2982 if (err2) 2983 ntfs_error(vol->mp, "Failed to write buffer of mft " 2984 "record 0x%llx in error code path " 2985 "(error %d).", (unsigned long long)bit, 2986 err2); 2987 } else 2988 buf_brelse(buf); 2989 lck_rw_unlock_shared(&mft_ni->lock); 2990 lck_rw_lock_exclusive(&vol->mftbmp_lock); 2991 /* 2992 * We decremented the cached number of free mft records thus we need to 2993 * increment it again here now that we are not allocating the mft 2994 * record after all. 2995 */ 2996 vol->nr_free_mft_records++; 2997 /* 2998 * Restore the previous mft data position but only if no-one else has 2999 * restored it to something even older whilst we had dropped the lock. 3000 */ 3001 if (old_mft_data_pos < vol->mft_data_pos) 3002 vol->mft_data_pos = old_mft_data_pos; 3003undo_mftbmp_alloc_locked: 3004 lck_rw_lock_shared(&mftbmp_ni->lock); 3005 if (ntfs_bitmap_clear_bit(mftbmp_ni, bit)) { 3006 ntfs_error(vol->mp, "Failed to clear bit in mft bitmap.%s", es); 3007 NVolSetErrors(vol); 3008 /* 3009 * We failed to clear the bit thus we are wasting an mft record 3010 * and since its bit is set in the mft bitmap it is effectively 3011 * in use thus it is not free. So decrement the number of free 3012 * mft records again. 3013 */ 3014 vol->nr_free_mft_records--; 3015 if (vol->nr_free_mft_records < 0) 3016 vol->nr_free_mft_records = 0; 3017 } 3018 lck_rw_unlock_shared(&mftbmp_ni->lock); 3019err: 3020 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 3021 (void)vnode_put(mftbmp_ni->vn); 3022 if (va) 3023 (void)vnode_put(mft_ni->vn); 3024 return err; 3025max_err: 3026 ntfs_warning(vol->mp, "Cannot allocate mft record because the maximum " 3027 "number of inodes (2^32) has already been reached."); 3028 err = ENOSPC; 3029unl_err: 3030 lck_rw_unlock_exclusive(&mftbmp_ni->lock); 3031 goto err; 3032} 3033 3034/** 3035 * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume 3036 * @base_ni: base ntfs inode to which the extent inode to be freed belongs 3037 * @ni: ntfs inode of the mapped extent mft record to free 3038 * @m: mapped extent mft record of the ntfs inode @ni 3039 * 3040 * Free the mapped extent mft record @m of the extent ntfs inode @ni belonging 3041 * to the base ntfs inode @base_ni. 3042 * 3043 * Note that this function unmaps the mft record and closes and destroys @ni 3044 * internally and hence you cannot use either the inode nor its mft record any 3045 * more after this function returns success. 3046 * 3047 * Return 0 on success and errno on error. In the error case @ni and @m are 3048 * still valid and have not been freed. 3049 * 3050 * For some errors an error message is displayed and the success code 0 is 3051 * returned and the volume is then left dirty on umount. This makes sense in 3052 * case we could not rollback the changes that were already done since the 3053 * caller no longer wants to reference this mft record so it does not matter to 3054 * the caller if something is wrong with it as long as it is properly detached 3055 * from the base inode. 3056 */ 3057errno_t ntfs_extent_mft_record_free(ntfs_inode *base_ni, ntfs_inode *ni, 3058 MFT_RECORD *m) 3059{ 3060 ino64_t mft_no = ni->mft_no; 3061 ntfs_volume *vol = ni->vol; 3062 ntfs_inode **extent_nis; 3063 int i; 3064 errno_t err; 3065 u16 seq_no; 3066 3067 ntfs_debug("Entering for extent mft_no 0x%llx, base mft_no 0x%llx.\n", 3068 (unsigned long long)mft_no, 3069 (unsigned long long)base_ni->mft_no); 3070 if (NInoAttr(ni)) 3071 panic("%s(): NInoAttr(ni)\n", __FUNCTION__); 3072 if (ni->nr_extents != -1) 3073 panic("%s(): ni->nr_extents != -1\n", __FUNCTION__); 3074 if (base_ni->nr_extents <= 0) 3075 panic("%s(): base_ni->nr_extents <= 0\n", __FUNCTION__); 3076 lck_mtx_lock(&base_ni->extent_lock); 3077 /* Dissociate the ntfs inode from the base inode. */ 3078 extent_nis = base_ni->extent_nis; 3079 err = ENOENT; 3080 for (i = 0; i < base_ni->nr_extents; i++) { 3081 if (ni != extent_nis[i]) 3082 continue; 3083 extent_nis += i; 3084 base_ni->nr_extents--; 3085 if (base_ni->nr_extents > 0) { 3086 /* 3087 * We do not bother reallocating memory for the array 3088 * to shrink it as in the worst case we are wasting a 3089 * bit of memory until the inode is thrown out of the 3090 * cache or until all extent mft records are removed in 3091 * which case we will free the whole array below. 3092 */ 3093 memmove(extent_nis, extent_nis + 1, 3094 (base_ni->nr_extents - i) * 3095 sizeof(ntfs_inode*)); 3096 } else { 3097 if (base_ni->nr_extents < 0) 3098 panic("%s(): base_ni->nr_extents < 0\n", 3099 __FUNCTION__); 3100 OSFree(base_ni->extent_nis, base_ni->extent_alloc, 3101 ntfs_malloc_tag); 3102 base_ni->extent_alloc = 0; 3103 } 3104 err = 0; 3105 break; 3106 } 3107 lck_mtx_unlock(&base_ni->extent_lock); 3108 if (err) 3109 panic("%s(): Extent mft_no 0x%llx is not attached to " 3110 "its base mft_no 0x%llx.\n", __FUNCTION__, 3111 (unsigned long long)mft_no, 3112 (unsigned long long)base_ni->mft_no); 3113 /* 3114 * The extent inode is no longer attached to the base inode so we can 3115 * proceed to free it as no one can get a reference to it now because 3116 * we still hold the base mft record mapped. 3117 * 3118 * Begin by setting the mft record itself not in use and then increment 3119 * the sequence number, skipping zero, if it is not zero. 3120 */ 3121 m->flags &= ~MFT_RECORD_IN_USE; 3122 seq_no = le16_to_cpu(m->sequence_number); 3123 if (seq_no == 0xffff) 3124 seq_no = 1; 3125 else if (seq_no) 3126 seq_no++; 3127 m->sequence_number = cpu_to_le16(seq_no); 3128 /* Make sure the mft record is written out to disk. */ 3129 NInoSetMrecNeedsDirtying(ni); 3130 /* 3131 * Unmap and throw away the now freed extent inode. The mft record 3132 * will be written out later by the VM due to its page being marked 3133 * dirty. 3134 */ 3135 ntfs_extent_mft_record_unmap(ni); 3136 ntfs_inode_reclaim(ni); 3137 /* 3138 * Clear the bit in the $MFT/$BITMAP corresponding to this record thus 3139 * making it available for someone else to allocate it. 3140 */ 3141 lck_rw_lock_exclusive(&vol->mftbmp_lock); 3142 err = vnode_get(vol->mftbmp_ni->vn); 3143 if (err) 3144 ntfs_error(vol->mp, "Failed to get vnode for $MFT/$BITMAP."); 3145 else { 3146 lck_rw_lock_shared(&vol->mftbmp_ni->lock); 3147 err = ntfs_bitmap_clear_bit(vol->mftbmp_ni, mft_no); 3148 lck_rw_unlock_shared(&vol->mftbmp_ni->lock); 3149 (void)vnode_put(vol->mftbmp_ni->vn); 3150 if (!err) { 3151 /* 3152 * We cleared a bit in the mft bitmap thus we need to 3153 * reflect this in the cached number of free mft 3154 * records. 3155 */ 3156 vol->nr_free_mft_records++; 3157 if (vol->nr_free_mft_records >= vol->nr_mft_records) 3158 panic("%s(): vol->nr_free_mft_records > " 3159 "vol->nr_mft_records\n", 3160 __FUNCTION__); 3161 } 3162 } 3163 lck_rw_unlock_exclusive(&vol->mftbmp_lock); 3164 if (err) { 3165 /* 3166 * The extent inode is gone but we failed to deallocate it in 3167 * the mft bitmap. Just emit a warning and leave the volume 3168 * dirty on umount. 3169 */ 3170 ntfs_error(vol->mp, "Failed to mark extent mft record as " 3171 "unused in mft bitmap.%s", es); 3172 NVolSetErrors(vol); 3173 } 3174 return 0; 3175} 3176