1/* 2 * ntfs_attr.c - NTFS kernel attribute operations. 3 * 4 * Copyright (c) 2006-2011 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2011 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/errno.h> 39#include <sys/stat.h> 40#include <sys/ucred.h> 41#include <sys/ubc.h> 42 43#include <string.h> 44 45#include <libkern/libkern.h> 46#include <libkern/OSMalloc.h> 47 48#include <kern/debug.h> 49#include <kern/sched_prim.h> 50 51#include "ntfs.h" 52#include "ntfs_attr.h" 53#include "ntfs_attr_list.h" 54#include "ntfs_debug.h" 55#include "ntfs_dir.h" 56#include "ntfs_endian.h" 57#include "ntfs_index.h" 58#include "ntfs_inode.h" 59#include "ntfs_layout.h" 60#include "ntfs_lcnalloc.h" 61#include "ntfs_mft.h" 62#include "ntfs_page.h" 63#include "ntfs_runlist.h" 64#include "ntfs_time.h" 65#include "ntfs_types.h" 66#include "ntfs_unistr.h" 67 68ntfschar AT_UNNAMED[1] = { 0 }; 69 70/** 71 * ntfs_attr_map_runlist - map the whole runlist of an ntfs inode 72 * @ni: ntfs inode for which to map the whole runlist 73 * 74 * Map the whole runlist of the ntfs inode @ni. 75 * 76 * Return 0 on success and errno on error. 77 * 78 * Note this function requires the runlist not to be mapped yet at all. This 79 * limitation is ok because we only use this function at mount time to map the 80 * runlist of some system files thus we are guaranteed that they will not have 81 * any runlist fragments mapped yet. 82 * 83 * Note the runlist can be NULL after this function returns if the attribute 84 * has zero allocated size, i.e. there simply is no runlist. 85 */ 86errno_t ntfs_attr_map_runlist(ntfs_inode *ni) 87{ 88 VCN vcn, end_vcn; 89 ntfs_inode *base_ni; 90 MFT_RECORD *m; 91 ntfs_attr_search_ctx *ctx; 92 ATTR_RECORD *a; 93 errno_t err = 0; 94 95 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x.", 96 (unsigned long long)ni->mft_no, 97 (unsigned)le32_to_cpu(ni->type)); 98 /* If the attribute is resident there is nothing to do. */ 99 if (!NInoNonResident(ni)) { 100 ntfs_debug("Done (resident, nothing to do)."); 101 return 0; 102 } 103 lck_rw_lock_exclusive(&ni->rl.lock); 104 /* Verify that the runlist is not mapped yet. */ 105 if (ni->rl.alloc && ni->rl.elements) 106 panic("%s(): ni->rl.alloc && ni->rl.elements\n", __FUNCTION__); 107 base_ni = ni; 108 if (NInoAttr(ni)) 109 base_ni = ni->base_ni; 110 err = ntfs_mft_record_map(base_ni, &m); 111 if (err) 112 goto err; 113 ctx = ntfs_attr_search_ctx_get(base_ni, m); 114 if (!ctx) { 115 err = ENOMEM; 116 goto unm_err; 117 } 118 vcn = 0; 119 end_vcn = ni->allocated_size >> ni->vol->cluster_size_shift; 120 do { 121 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn, 122 NULL, 0, ctx); 123 if (err) { 124 if (err == ENOENT) 125 err = EIO; 126 break; 127 } 128 a = ctx->a; 129 if (!a->non_resident) { 130corrupt_err: 131 ntfs_error(ni->vol->mp, "Inode 0x%llx contains corrupt " 132 "attribute extent, run chkdsk.", 133 (unsigned long long)base_ni->mft_no); 134 NVolSetErrors(ni->vol); 135 err = EIO; 136 break; 137 } 138 /* 139 * If we are in the first attribute extent, verify the cached 140 * allocated size is correct. 141 */ 142 if (!a->lowest_vcn) 143 if (sle64_to_cpu(a->allocated_size) != 144 ni->allocated_size) 145 panic("%s(): sle64_to_cpu(a->allocated_size) " 146 "!= ni->allocated_size\n", 147 __FUNCTION__); 148 /* 149 * Sanity check the lowest_vcn of the attribute is equal to the 150 * vcn we looked up and that the highest_vcn of the attribute 151 * is above the current vcn. 152 */ 153 if (sle64_to_cpu(a->lowest_vcn) != vcn || (vcn && 154 sle64_to_cpu(a->highest_vcn) < vcn)) 155 goto corrupt_err; 156 /* Determine the next vcn. */ 157 vcn = sle64_to_cpu(a->highest_vcn) + 1; 158 /* 159 * Finally, map the runlist fragment contained in this 160 * attribute extent. 161 */ 162 err = ntfs_mapping_pairs_decompress(ni->vol, a, &ni->rl); 163 } while (!err && vcn < end_vcn); 164unm_err: 165 ntfs_attr_search_ctx_put(ctx); 166 ntfs_mft_record_unmap(base_ni); 167err: 168 lck_rw_unlock_exclusive(&ni->rl.lock); 169 if (!err) 170 ntfs_debug("Done."); 171 else 172 ntfs_error(ni->vol->mp, "Failed (error %d).", (int)err); 173 return err; 174} 175 176/** 177 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode 178 * @ni: ntfs inode for which to map (part of) a runlist 179 * @vcn: map runlist part containing this vcn 180 * @ctx: active attribute search context if present or NULL if not 181 * 182 * Map the part of a runlist containing the @vcn of the ntfs inode @ni. 183 * 184 * If @ctx is specified, it is an active search context of @ni and its base mft 185 * record. This is needed when ntfs_map_runlist_nolock() encounters unmapped 186 * runlist fragments and allows their mapping. If you do not have the mft 187 * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock() 188 * will perform the necessary mapping and unmapping. 189 * 190 * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and 191 * restores it before returning. Thus, @ctx will be left pointing to the same 192 * attribute on return as on entry. However, the actual pointers in @ctx may 193 * point to different memory locations on return, so you must remember to reset 194 * any cached pointers from the @ctx, i.e. after the call to 195 * ntfs_map_runlist_nolock(), you will probably want to do: 196 * m = ctx->m; 197 * a = ctx->a; 198 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that 199 * you cache ctx->m in a variable @m of type MFT_RECORD *. 200 * 201 * Return 0 on success and errno on error. There is one special error code 202 * which is not an error as such. This is ENOENT. It means that @vcn is out 203 * of bounds of the runlist. 204 * 205 * Note the runlist can be NULL after this function returns if @vcn is zero and 206 * the attribute has zero allocated size, i.e. there simply is no runlist. 207 * 208 * WARNING: If @ctx is supplied, regardless of whether success or failure is 209 * returned, you need to check @ctx->is_error and if 1 the @ctx is no 210 * longer valid, i.e. you need to either call 211 * ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it. 212 * In that case @ctx->error will give you the error code for why the 213 * mapping of the old inode failed. 214 * Also if @ctx is supplied and the current attribute (or the mft 215 * record it is in) has been modified then the caller must call 216 * NInoSetMrecNeedsDirtying(ctx->ni); before calling 217 * ntfs_map_runlist_nolock() or the changes may be lost. 218 * 219 * Locking: - The runlist described by @ni must be locked for writing on entry 220 * and is locked on return. Note the runlist will be modified. 221 * - If @ctx is NULL, the base mft record of @ni must not be mapped on 222 * entry and it will be left unmapped on return. 223 * - If @ctx is not NULL, the base mft record must be mapped on entry 224 * and it will be left mapped on return. 225 */ 226errno_t ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, 227 ntfs_attr_search_ctx *ctx) 228{ 229 VCN end_vcn; 230 ntfs_inode *base_ni; 231 MFT_RECORD *m; 232 ATTR_RECORD *a; 233 errno_t err = 0; 234 BOOL ctx_is_temporary, ctx_needs_reset; 235 ntfs_attr_search_ctx old_ctx = { { NULL, }, }; 236 237 ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx.", 238 (unsigned long long)ni->mft_no, 239 (unsigned long long)vcn); 240 base_ni = ni; 241 if (NInoAttr(ni)) 242 base_ni = ni->base_ni; 243 if (!ctx) { 244 ctx_is_temporary = ctx_needs_reset = TRUE; 245 err = ntfs_mft_record_map(base_ni, &m); 246 if (err) 247 goto done; 248 ctx = ntfs_attr_search_ctx_get(base_ni, m); 249 if (!ctx) { 250 err = ENOMEM; 251 goto err; 252 } 253 } else { 254 VCN allocated_size_vcn; 255 256 if (ctx->is_error) 257 panic("%s(): ctx->is_error\n", __FUNCTION__); 258 a = ctx->a; 259 if (!a->non_resident) 260 panic("%s(): !a->non_resident\n", __FUNCTION__); 261 ctx_is_temporary = FALSE; 262 end_vcn = sle64_to_cpu(a->highest_vcn); 263 lck_spin_lock(&ni->size_lock); 264 allocated_size_vcn = ni->allocated_size >> 265 ni->vol->cluster_size_shift; 266 lck_spin_unlock(&ni->size_lock); 267 /* 268 * If we already have the attribute extent containing @vcn in 269 * @ctx, no need to look it up again. We slightly cheat in 270 * that if vcn exceeds the allocated size, we will refuse to 271 * map the runlist below, so there is definitely no need to get 272 * the right attribute extent. 273 */ 274 if (vcn >= allocated_size_vcn || (a->type == ni->type && 275 a->name_length == ni->name_len && 276 !bcmp((u8*)a + le16_to_cpu(a->name_offset), 277 ni->name, ni->name_len) && 278 sle64_to_cpu(a->lowest_vcn) <= vcn && 279 end_vcn >= vcn)) 280 ctx_needs_reset = FALSE; 281 else { 282 /* Save the old search context. */ 283 old_ctx = *ctx; 284 /* 285 * Reinitialize the search context so we can lookup the 286 * needed attribute extent. 287 */ 288 ntfs_attr_search_ctx_reinit(ctx); 289 ctx_needs_reset = TRUE; 290 } 291 } 292 if (ctx_needs_reset) { 293 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn, 294 NULL, 0, ctx); 295 if (err) { 296 if (err == ENOENT) 297 err = EIO; 298 goto err; 299 } 300 if (!ctx->a->non_resident) 301 panic("%s(): !a->non_resident!\n", __FUNCTION__); 302 } 303 a = ctx->a; 304 /* 305 * Only decompress the mapping pairs if @vcn is inside it. Otherwise 306 * we get into problems when we try to map an out of bounds vcn because 307 * we then try to map the already mapped runlist fragment and 308 * ntfs_mapping_pairs_decompress() fails. 309 */ 310 end_vcn = sle64_to_cpu(a->highest_vcn) + 1; 311 if (vcn && vcn >= end_vcn) { 312 err = ENOENT; 313 goto err; 314 } 315 err = ntfs_mapping_pairs_decompress(ni->vol, a, &ni->rl); 316err: 317 if (ctx_is_temporary) { 318 if (ctx) 319 ntfs_attr_search_ctx_put(ctx); 320 ntfs_mft_record_unmap(base_ni); 321 } else if (ctx_needs_reset) { 322 /* 323 * If there is no attribute list, restoring the search context 324 * is acomplished simply by copying the saved context back over 325 * the caller supplied context. If there is an attribute list, 326 * things are more complicated as we need to deal with mapping 327 * of mft records and resulting potential changes in pointers. 328 */ 329 if (NInoAttrList(base_ni)) { 330 /* 331 * If the currently mapped (extent) inode is not the 332 * one we had before, we need to unmap it and map the 333 * old one. 334 */ 335 if (ctx->ni != old_ctx.ni) { 336 /* 337 * If the currently mapped inode is not the 338 * base inode, unmap it. 339 */ 340 if (ctx->base_ni && ctx->ni != ctx->base_ni) { 341 ntfs_extent_mft_record_unmap(ctx->ni); 342 ctx->m = ctx->base_m; 343 if (!ctx->m) 344 panic("%s(): !ctx->m\n", 345 __FUNCTION__); 346 } 347 /* 348 * If the old mapped inode is not the base 349 * inode, map it. 350 */ 351 if (old_ctx.base_ni && old_ctx.ni != 352 old_ctx.base_ni) { 353 errno_t err2; 354retry_map: 355 err2 = ntfs_mft_record_map(old_ctx.ni, 356 &ctx->m); 357 /* 358 * Something bad has happened. If out 359 * of memory retry till it succeeds. 360 * Any other errors are fatal and we 361 * return the error code in ctx->m. 362 * Let the caller deal with it... We 363 * just need to fudge things so the 364 * caller can reinit and/or put the 365 * search context safely. 366 */ 367 if (err2) { 368 if (err2 == ENOMEM) { 369 (void)thread_block( 370 THREAD_CONTINUE_NULL); 371 goto retry_map; 372 } 373 ctx->is_error = 1; 374 ctx->error = err2; 375 old_ctx.ni = old_ctx.base_ni; 376 } 377 } 378 } 379 if (ctx->is_error) { 380 old_ctx.is_error = 1; 381 old_ctx.error = ctx->error; 382 } else if (ctx->m != old_ctx.m) { 383 /* 384 * Update the changed pointers in the saved 385 * context. 386 */ 387 old_ctx.a = (ATTR_RECORD*)((u8*)ctx->m + 388 ((u8*)old_ctx.a - 389 (u8*)old_ctx.m)); 390 old_ctx.m = ctx->m; 391 } 392 } 393 /* Restore the search context to the saved one. */ 394 *ctx = old_ctx; 395 } 396done: 397 ntfs_debug("Done (error %d).", (int)err); 398 return err; 399} 400 401/** 402 * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode 403 * @ni: ntfs inode of the attribute whose runlist to search 404 * @vcn: vcn to convert 405 * @write_locked: true if the runlist is locked for writing 406 * @clusters: optional destination for number of contiguous clusters 407 * 408 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute 409 * described by the ntfs inode @ni and return the corresponding logical cluster 410 * number (lcn). 411 * 412 * If the @vcn is not mapped yet, the attempt is made to map the attribute 413 * extent containing the @vcn and the vcn to lcn conversion is retried. 414 * 415 * If @write_locked is true the caller has locked the runlist for writing and 416 * if false for reading. 417 * 418 * If @clusters is not NULL, on success (i.e. we return >= LCN_HOLE) we return 419 * the number of contiguous clusters after the returned lcn in *@clusters. 420 * 421 * Since lcns must be >= 0, we use negative return codes with special meaning: 422 * 423 * Return code Meaning / Description 424 * ========================================== 425 * LCN_HOLE Hole / not allocated on disk. 426 * LCN_ENOENT There is no such vcn in the runlist, i.e. @vcn is out of bounds. 427 * LCN_ENOMEM Not enough memory to map runlist. 428 * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc). 429 * 430 * Locking: - The runlist must be locked on entry and is left locked on return. 431 * - If @write_locked is FALSE, i.e. the runlist is locked for reading, 432 * the lock may be dropped inside the function so you cannot rely on 433 * the runlist still being the same when this function returns. 434 */ 435LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, 436 const BOOL write_locked, s64 *clusters) 437{ 438 LCN lcn; 439 BOOL need_lock_switch = FALSE; 440 BOOL is_retry = FALSE; 441 442 ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx, %s_locked.", 443 (unsigned long long)ni->mft_no, 444 (unsigned long long)vcn, 445 write_locked ? "write" : "read"); 446 if (!NInoNonResident(ni)) 447 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 448 if (vcn < 0) 449 panic("%s(): vcn < 0\n", __FUNCTION__); 450retry_remap: 451 if (!ni->rl.elements) { 452 lck_spin_lock(&ni->size_lock); 453 if (!ni->allocated_size) { 454 lck_spin_unlock(&ni->size_lock); 455 lcn = LCN_ENOENT; 456 goto lcn_enoent; 457 } 458 lck_spin_unlock(&ni->size_lock); 459 if (!is_retry) 460 goto try_to_map; 461 lcn = LCN_EIO; 462 goto lcn_eio; 463 } 464 /* Convert vcn to lcn. If that fails map the runlist and retry once. */ 465 lcn = ntfs_rl_vcn_to_lcn(ni->rl.rl, vcn, clusters); 466 if (lcn >= LCN_HOLE) { 467 if (need_lock_switch) 468 lck_rw_lock_exclusive_to_shared(&ni->rl.lock); 469 ntfs_debug("Done (lcn 0x%llx, clusters 0x%llx).", 470 (unsigned long long)lcn, 471 clusters ? (unsigned long long)*clusters : 0); 472 return lcn; 473 } 474 if (lcn != LCN_RL_NOT_MAPPED) { 475 if (lcn != LCN_ENOENT) 476 lcn = LCN_EIO; 477 } else if (!is_retry) { 478 errno_t err; 479 480try_to_map: 481 if (!write_locked && !need_lock_switch) { 482 need_lock_switch = TRUE; 483 /* 484 * If converting the lock from shared to exclusive 485 * fails, need to take the lock for writing and retry 486 * in case the racing process did the mapping for us. 487 */ 488 if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) { 489 lck_rw_lock_exclusive(&ni->rl.lock); 490 goto retry_remap; 491 } 492 } 493 err = ntfs_map_runlist_nolock(ni, vcn, NULL); 494 if (!err) { 495 is_retry = TRUE; 496 goto retry_remap; 497 } 498 switch (err) { 499 case ENOENT: 500 lcn = LCN_ENOENT; 501 break; 502 case ENOMEM: 503 lcn = LCN_ENOMEM; 504 break; 505 default: 506 lcn = LCN_EIO; 507 } 508 } 509lcn_eio: 510 if (need_lock_switch) 511 lck_rw_lock_exclusive_to_shared(&ni->rl.lock); 512 if (lcn == LCN_ENOENT) { 513lcn_enoent: 514 ntfs_debug("Done (LCN_ENOENT)."); 515 } else 516 ntfs_error(ni->vol->mp, "Failed (error %lld).", (long long)lcn); 517 return lcn; 518} 519 520/** 521 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode 522 * @ni: ntfs inode of the attribute whose runlist to search 523 * @vcn: vcn to find 524 * @run: return pointer for the found runlist element 525 * @ctx: active attribute search context if present or NULL if not 526 * 527 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute 528 * described by the ntfs inode @ni and return the address of the runlist 529 * element containing the @vcn in *@run. 530 * 531 * If the @vcn is not mapped yet, the attempt is made to map the attribute 532 * extent containing the @vcn and the vcn to lcn conversion is retried. 533 * 534 * If @ctx is specified, it is an active search context of @ni and its base mft 535 * record. This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped 536 * runlist fragments and allows their mapping. If you do not have the mft 537 * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock() 538 * will perform the necessary mapping and unmapping. 539 * 540 * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and 541 * restores it before returning. Thus, @ctx will be left pointing to the same 542 * attribute on return as on entry. However, the actual pointers in @ctx may 543 * point to different memory locations on return, so you must remember to reset 544 * any cached pointers from the @ctx, i.e. after the call to 545 * ntfs_attr_find_vcn_nolock(), you will probably want to do: 546 * m = ctx->m; 547 * a = ctx->a; 548 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that 549 * you cache ctx->m in a variable @m of type MFT_RECORD *. 550 * Note you need to distinguish between the lcn of the returned runlist element 551 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on 552 * read and allocate clusters on write. 553 * 554 * Return 0 on success and errno on error. 555 * 556 * The possible error return codes are: 557 * ENOENT - No such vcn in the runlist, i.e. @vcn is out of bounds. 558 * ENOMEM - Not enough memory to map runlist. 559 * EIO - Critical error (runlist/file is corrupt, i/o error, etc). 560 * 561 * WARNING: If @ctx is supplied, regardless of whether success or failure is 562 * returned, you need to check @ctx->is_error and if 1 the @ctx is no 563 * longer valid, i.e. you need to either call 564 * ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it. 565 * In that case @ctx->error will give you the error code for why the 566 * mapping of the old inode failed. 567 * Also if @ctx is supplied and the current attribute (or the mft 568 * record it is in) has been modified then the caller must call 569 * NInoSetMrecNeedsDirtying(ctx->ni); before calling 570 * ntfs_map_runlist_nolock() or the changes may be lost. 571 * 572 * Locking: - The runlist described by @ni must be locked for writing on entry 573 * and is locked on return. Note the runlist may be modified when 574 * needed runlist fragments need to be mapped. 575 * - If @ctx is NULL, the base mft record of @ni must not be mapped on 576 * entry and it will be left unmapped on return. 577 * - If @ctx is not NULL, the base mft record must be mapped on entry 578 * and it will be left mapped on return. 579 */ 580errno_t ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, 581 ntfs_rl_element **run, ntfs_attr_search_ctx *ctx) 582{ 583 ntfs_rl_element *rl; 584 errno_t err = 0; 585 BOOL is_retry = FALSE; 586 587 ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx, with%s ctx.", 588 (unsigned long long)ni->mft_no, 589 (unsigned long long)vcn, ctx ? "" : "out"); 590 if (!NInoNonResident(ni)) 591 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 592 if (vcn < 0) 593 panic("%s(): vcn < 0\n", __FUNCTION__); 594retry_remap: 595 if (!ni->rl.elements) { 596 lck_spin_lock(&ni->size_lock); 597 if (!ni->allocated_size) { 598 lck_spin_unlock(&ni->size_lock); 599 return LCN_ENOENT; 600 } 601 lck_spin_unlock(&ni->size_lock); 602 if (!is_retry) 603 goto try_to_map; 604 err = EIO; 605 goto err; 606 } 607 rl = ni->rl.rl; 608 if (vcn >= rl[0].vcn) { 609 while (rl->length) { 610 if (vcn < rl[1].vcn) { 611 if (rl->lcn >= LCN_HOLE) { 612 ntfs_debug("Done."); 613 *run = rl; 614 return 0; 615 } 616 break; 617 } 618 rl++; 619 } 620 if (rl->lcn != LCN_RL_NOT_MAPPED) { 621 if (rl->lcn == LCN_ENOENT) 622 err = ENOENT; 623 else 624 err = EIO; 625 } 626 } 627 if (!err && !is_retry) { 628 /* 629 * If the search context is invalid we cannot map the unmapped 630 * region. 631 */ 632 if (ctx->is_error) 633 err = ctx->error; 634 else { 635try_to_map: 636 /* 637 * The @vcn is in an unmapped region, map the runlist 638 * and retry. 639 */ 640 err = ntfs_map_runlist_nolock(ni, vcn, ctx); 641 if (!err) { 642 is_retry = TRUE; 643 goto retry_remap; 644 } 645 } 646 if (err == EINVAL) 647 err = EIO; 648 } else if (!err) 649 err = EIO; 650err: 651 if (err != ENOENT) 652 ntfs_error(ni->vol->mp, "Failed (error %d).", err); 653 return err; 654} 655 656/** 657 * ntfs_attr_search_ctx_reinit - reinitialize an attribute search context 658 * @ctx: attribute search context to reinitialize 659 * 660 * Reinitialize the attribute search context @ctx, unmapping an associated 661 * extent mft record if present, and initialize the search context again. 662 * 663 * This is used when a search for a new attribute is being started to reset 664 * the search context to the beginning. 665 * 666 * Note: We preserve the content of @ctx->is_mft_locked so that reinitializing 667 * a search context can also be done when dealing with the mft itself. 668 */ 669void ntfs_attr_search_ctx_reinit(ntfs_attr_search_ctx *ctx) 670{ 671 const BOOL mft_is_locked = ctx->is_mft_locked; 672 673 if (!ctx->base_ni) { 674 /* No attribute list. */ 675 ctx->is_first = 1; 676 ctx->is_iteration = 0; 677 /* Sanity checks are performed elsewhere. */ 678 ctx->a = (ATTR_RECORD*)((u8*)ctx->m + 679 le16_to_cpu(ctx->m->attrs_offset)); 680 /* 681 * This needs resetting due to 682 * ntfs_attr_find_in_attribute_list() which can leave it set 683 * despite having zeroed ctx->base_ni. 684 */ 685 ctx->al_entry = NULL; 686 return; 687 } 688 /* Attribute list. */ 689 if (ctx->ni != ctx->base_ni) 690 ntfs_extent_mft_record_unmap(ctx->ni); 691 ntfs_attr_search_ctx_init(ctx, ctx->base_ni, ctx->base_m); 692 if (mft_is_locked) 693 ctx->is_mft_locked = 1; 694} 695 696/** 697 * ntfs_attr_search_ctx_get - allocate and init a new attribute search context 698 * @ni: ntfs inode with which to initialize the search context 699 * @m: mft record with which to initialize the search context 700 * 701 * Allocate a new attribute search context, initialize it with @ni and @m, and 702 * return it. Return NULL if allocation failed. 703 */ 704ntfs_attr_search_ctx *ntfs_attr_search_ctx_get(ntfs_inode *ni, MFT_RECORD *m) 705{ 706 ntfs_attr_search_ctx *ctx; 707 708 ctx = OSMalloc(sizeof(ntfs_attr_search_ctx), ntfs_malloc_tag); 709 if (ctx) 710 ntfs_attr_search_ctx_init(ctx, ni, m); 711 return ctx; 712} 713 714/** 715 * ntfs_attr_search_ctx_put - release an attribute search context 716 * @ctx: attribute search context to free 717 * 718 * Release the attribute search context @ctx, unmapping an associated extent 719 * mft record if present. 720 */ 721void ntfs_attr_search_ctx_put(ntfs_attr_search_ctx *ctx) 722{ 723 if (ctx->base_ni && ctx->ni != ctx->base_ni) 724 ntfs_extent_mft_record_unmap(ctx->ni); 725 OSFree(ctx, sizeof(ntfs_attr_search_ctx), ntfs_malloc_tag); 726} 727 728/** 729 * ntfs_attr_find_in_mft_record - find (next) attribute in mft record 730 * @type: attribute type to find 731 * @name: attribute name to find (optional, i.e. NULL means do not care) 732 * @name_len: attribute name length (only needed if @name present) 733 * @val: attribute value to find (optional, resident attributes only) 734 * @val_len: attribute value length (only needed if @val present) 735 * @ctx: search context with mft record and attribute to search from 736 * 737 * You should not need to call this function directly. Use ntfs_attr_lookup() 738 * instead. 739 * 740 * ntfs_attr_find_in_mft_record() takes a search context @ctx as parameter and 741 * searches the mft record specified by @ctx->m, beginning at @ctx->a, for an 742 * attribute of @type, optionally @name and @val. 743 * 744 * If the attribute is found, ntfs_attr_find_in_mft_record() returns 0 and 745 * @ctx->a is set to point to the found attribute. 746 * 747 * If the attribute is not found, ENOENT is returned and @ctx->a is set to 748 * point to the attribute before which the attribute being searched for would 749 * need to be inserted if such an action were to be desired. 750 * 751 * On actual error, ntfs_attr_find_in_mft_record() returns EIO. In this case 752 * @ctx->a is undefined and in particular do not rely on it not having changed. 753 * 754 * If @ctx->is_first is 1, the search begins with @ctx->a itself. If it is 0, 755 * the search begins after @ctx->a. 756 * 757 * If @ctx->is_iteration is 1 and @type is AT_UNUSED this is not a search but 758 * an iteration in which case each attribute in the mft record is returned in 759 * turn with each call to ntfs_attr_find_in_mft_record(). Note all attributes 760 * are returned including the attribute list attribute, unlike when 761 * @ctx->is_iteration is 0 when it is not returned unless it is specifically 762 * looked for. 763 * 764 * Similarly to the above, when @ctx->is_iterations is 1 and @type is not 765 * AT_UNUSED all attributes of type @type are returned one after the other. 766 * 767 * If @name is AT_UNNAMED search for an unnamed attribute. If @name is present 768 * but not AT_UNNAMED search for a named attribute matching @name. Otherwise, 769 * match both named and unnamed attributes. 770 * 771 * Finally, the resident attribute value @val is looked for, if present. If 772 * @val is not present (NULL), @val_len is ignored. 773 * 774 * ntfs_attr_find_in_mft_record() only searches the specified mft record and it 775 * ignores the presence of an attribute list attribute (unless it is the one 776 * being searched for, obviously). If you need to take attribute lists into 777 * consideration, use ntfs_attr_lookup() instead (see below). This also means 778 * that you cannot use ntfs_attr_find_in_mft_record() to search for extent 779 * records of non-resident attributes, as extents with lowest_vcn != 0 are 780 * usually described by the attribute list attribute only. Note that it is 781 * possible that the first extent is only in the attribute list while the last 782 * extent is in the base mft record, so do not rely on being able to find the 783 * first extent in the base mft record. 784 * 785 * Warning: Never use @val when looking for attribute types which can be 786 * non-resident as this most likely will result in a crash! 787 * 788 * Note if the volume is mounted case sensitive we treat attribute names as 789 * being case sensitive and vice versa if the volume is not mounted case 790 * sensitive we treat attribute names as being case insensitive also. 791 */ 792errno_t ntfs_attr_find_in_mft_record(const ATTR_TYPE type, 793 const ntfschar *name, const u32 name_len, 794 const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx) 795{ 796 ATTR_RECORD *a; 797 ntfs_volume *vol = ctx->ni->vol; 798 const ntfschar *upcase = vol->upcase; 799 const u32 upcase_len = vol->upcase_len; 800 const BOOL case_sensitive = NVolCaseSensitive(vol); 801 const BOOL is_iteration = ctx->is_iteration; 802 803 /* 804 * Iterate over attributes in mft record starting at @ctx->a, or the 805 * attribute following that, if @ctx->is_first is true. 806 */ 807 if (ctx->is_first) { 808 a = ctx->a; 809 ctx->is_first = 0; 810 } else 811 a = (ATTR_RECORD*)((u8*)ctx->a + le32_to_cpu(ctx->a->length)); 812 for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { 813 if ((u8*)a < (u8*)ctx->m || (u8*)a > (u8*)ctx->m + 814 le32_to_cpu(ctx->m->bytes_allocated)) 815 break; 816 ctx->a = a; 817 if (((!is_iteration || type != AT_UNUSED) && 818 le32_to_cpu(a->type) > le32_to_cpu(type)) || 819 a->type == AT_END) 820 return ENOENT; 821 if (!a->length) 822 break; 823 if (is_iteration) { 824 if (type == AT_UNUSED || type == a->type) 825 return 0; 826 } 827 if (a->type != type) 828 continue; 829 /* 830 * If @name is AT_UNNAMED we want an unnamed attribute. 831 * If @name is present, compare the two names. 832 * Otherwise, match any attribute. 833 */ 834 if (name == AT_UNNAMED) { 835 /* The search failed if the found attribute is named. */ 836 if (a->name_length) 837 return ENOENT; 838 } else if (name) { 839 unsigned len, ofs; 840 841 len = a->name_length; 842 ofs = le16_to_cpu(a->name_offset); 843 if (ofs + (len * sizeof(ntfschar)) > 844 le32_to_cpu(a->length)) 845 break; 846 if (!ntfs_are_names_equal(name, name_len, 847 (ntfschar*)((u8*)a + ofs), len, 848 case_sensitive, upcase, upcase_len)) { 849 int rc; 850 851 rc = ntfs_collate_names(name, name_len, 852 (ntfschar*)((u8*)a + ofs), len, 853 1, FALSE, upcase, upcase_len); 854 /* 855 * If @name collates before a->name, there is 856 * no matching attribute. 857 */ 858 if (rc == -1) 859 return ENOENT; 860 /* 861 * If the strings are not equal, continue 862 * searching. 863 */ 864 if (rc) 865 continue; 866 rc = ntfs_collate_names(name, name_len, 867 (ntfschar*)((u8*)a + ofs), len, 868 1, TRUE, upcase, upcase_len); 869 if (rc == -1) 870 return ENOENT; 871 if (rc) 872 continue; 873 } 874 } 875 /* 876 * The names match or @name not present and attribute is 877 * unnamed. If no @val specified, we have found the attribute 878 * and are done. 879 */ 880 if (!val) 881 return 0; 882 /* @val is present; compare values. */ 883 else { 884 unsigned len, ofs; 885 int rc; 886 887 len = le32_to_cpu(a->value_length); 888 ofs = le16_to_cpu(a->value_offset); 889 if (ofs + len > le32_to_cpu(a->length)) 890 break; 891 rc = memcmp(val, (u8*)a + ofs, 892 len <= val_len ? len : val_len); 893 /* 894 * If @val collates before the value of the current 895 * attribute, there is no matching attribute. 896 */ 897 if (!rc) { 898 if (val_len == len) 899 return 0; 900 if (val_len < len) 901 return ENOENT; 902 } else if (rc < 0) 903 return ENOENT; 904 } 905 } 906 ntfs_error(vol->mp, "Inode is corrupt. Run chkdsk."); 907 NVolSetErrors(vol); 908 return EIO; 909} 910 911/** 912 * ntfs_attr_find_in_attribute_list - find an attribute in the attribute list 913 * @type: attribute type to find 914 * @name: attribute name to find (optional, i.e. NULL means do not care) 915 * @name_len: attribute name length (only needed if @name present) 916 * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) 917 * @val: attribute value to find (optional, resident attributes only) 918 * @val_len: attribute value length (only needed if @val present) 919 * @ctx: search context with mft record and attribute to search from 920 * 921 * You should not need to call this function directly. Use ntfs_attr_lookup() 922 * instead. 923 * 924 * Find an attribute by searching the attribute list for the corresponding 925 * attribute list entry. Having found the entry, map the mft record if the 926 * attribute is in a different mft record/inode, ntfs_attr_find_in_mft_record() 927 * the attribute in there and return it. 928 * 929 * On first search @ctx->ni must be the base mft record and @ctx must have been 930 * obtained from a call to ntfs_attr_search_ctx_get(). On subsequent calls 931 * @ctx->ni can be any extent inode, too (@ctx->base_ni is then the base 932 * inode). 933 * 934 * After finishing with the attribute/mft record you need to call 935 * ntfs_attr_search_ctx_put() to clean up the search context (unmapping any 936 * mapped mft records, etc). 937 * 938 * If the attribute is found, ntfs_attr_find_in_attribute_list() returns 0 and 939 * @ctx->a is set to point to the found attribute. @ctx->m is set to point to 940 * the mft record in which @ctx->a is located and @ctx->al_entry is set to 941 * point to the attribute list entry for the attribute. 942 * 943 * If the attribute is not found, ENOENT is returned and @ctx->a is set to 944 * point to the attribute in the base mft record before which the attribute 945 * being searched for would need to be inserted if such an action were to be 946 * desired. @ctx->m is set to point to the mft record in which @ctx->a is 947 * located, i.e. the base mft record, and @ctx->al_entry is set to point to the 948 * attribute list entry of the attribute before which the attribute being 949 * searched for would need to be inserted if such an action were to be desired. 950 * 951 * Thus to insert the not found attribute, one wants to add the attribute to 952 * @ctx->m (the base mft record) and if there is not enough space, the 953 * attribute should be placed in a newly allocated extent mft record. The 954 * attribute list entry for the inserted attribute should be inserted in the 955 * attribute list attribute at @ctx->al_entry. 956 * 957 * On actual error, ntfs_attr_find_in_attribute_list() returns EIO. In this 958 * case @ctx->a is undefined and in particular do not rely on it not having 959 * changed. 960 * 961 * If @ctx->is_first is 1, the search begins with @ctx->a itself. If it is 0, 962 * the search begins after @ctx->a. 963 * 964 * If @name is AT_UNNAMED search for an unnamed attribute. If @name is present 965 * but not AT_UNNAMED search for a named attribute matching @name. Otherwise, 966 * match both named and unnamed attributes. 967 * 968 * Finally, the resident attribute value @val is looked for, if present. If 969 * @val is not present (NULL), @val_len is ignored. 970 * 971 * Warning: Never use @val when looking for attribute types which can be 972 * non-resident as this most likely will result in a crash! 973 */ 974static errno_t ntfs_attr_find_in_attribute_list(const ATTR_TYPE type, 975 const ntfschar *name, const u32 name_len, const VCN lowest_vcn, 976 const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx) 977{ 978 ntfs_inode *base_ni, *ni = ctx->ni; 979 ntfs_volume *vol = ni->vol; 980 ATTR_LIST_ENTRY *al_entry, *next_al_entry; 981 u8 *al_start, *al_end; 982 ATTR_RECORD *a; 983 ntfschar *al_name; 984 const ntfschar *upcase = vol->upcase; 985 const u32 upcase_len = vol->upcase_len; 986 u32 al_name_len; 987 errno_t err = 0; 988 static const char es[] = " Unmount and run chkdsk."; 989 const BOOL case_sensitive = NVolCaseSensitive(vol); 990 991 if (ctx->is_iteration) 992 panic("%s(): ctx->is_iteration\n", __FUNCTION__); 993 base_ni = ctx->base_ni; 994 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x.", 995 (unsigned long long)ni->mft_no, le32_to_cpu(type)); 996 if (!base_ni) { 997 /* First call happens with the base mft record. */ 998 base_ni = ctx->base_ni = ctx->ni; 999 ctx->base_m = ctx->m; 1000 } 1001 if (ni == base_ni) 1002 ctx->base_a = ctx->a; 1003 if (type == AT_END) 1004 goto not_found; 1005 al_start = base_ni->attr_list; 1006 al_end = al_start + base_ni->attr_list_size; 1007 if (!ctx->al_entry) 1008 ctx->al_entry = (ATTR_LIST_ENTRY*)al_start; 1009 /* 1010 * Iterate over entries in attribute list starting at @ctx->al_entry, 1011 * or the entry following that, depending on the value of 1012 * @ctx->is_first. 1013 */ 1014 if (ctx->is_first) { 1015 al_entry = ctx->al_entry; 1016 ctx->is_first = 0; 1017 } else 1018 al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + 1019 le16_to_cpu(ctx->al_entry->length)); 1020 for (;; al_entry = next_al_entry) { 1021 /* Out of bounds check. */ 1022 if ((u8*)al_entry < base_ni->attr_list || 1023 (u8*)al_entry > al_end) 1024 break; /* Inode is corrupt. */ 1025 ctx->al_entry = al_entry; 1026 /* Catch the end of the attribute list. */ 1027 if ((u8*)al_entry == al_end) 1028 goto not_found; 1029 if (!al_entry->length) 1030 break; 1031 if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + 1032 le16_to_cpu(al_entry->length) > al_end) 1033 break; 1034 next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + 1035 le16_to_cpu(al_entry->length)); 1036 if (al_entry->type != type) { 1037 if (le32_to_cpu(al_entry->type) < le32_to_cpu(type)) 1038 continue; 1039 goto not_found; 1040 } 1041 /* 1042 * If @name is AT_UNNAMED we want an unnamed attribute. 1043 * If @name is present, compare the two names. 1044 * Otherwise, match any attribute. 1045 */ 1046 al_name_len = al_entry->name_length; 1047 al_name = (ntfschar*)((u8*)al_entry + al_entry->name_offset); 1048 if (name == AT_UNNAMED) { 1049 if (al_name_len) 1050 goto not_found; 1051 } else if (name && !ntfs_are_names_equal(al_name, al_name_len, 1052 name, name_len, case_sensitive, upcase, 1053 upcase_len)) { 1054 int rc; 1055 1056 rc = ntfs_collate_names(name, name_len, al_name, 1057 al_name_len, 1, FALSE, 1058 upcase, upcase_len); 1059 /* 1060 * If @name collates before al_name, there is no 1061 * matching attribute. 1062 */ 1063 if (rc == -1) 1064 goto not_found; 1065 /* If the strings are not equal, continue search. */ 1066 if (rc) 1067 continue; 1068 /* 1069 * FIXME: Reverse engineering showed 0, IGNORE_CASE but 1070 * that would be inconsistent with 1071 * ntfs_attr_find_in_mft_record(). The subsequent rc 1072 * checks were also different. Perhaps I made a 1073 * mistake in one of the two. Need to recheck which is 1074 * correct or at least see what is going on... 1075 */ 1076 rc = ntfs_collate_names(name, name_len, al_name, 1077 al_name_len, 1, TRUE, 1078 vol->upcase, vol->upcase_len); 1079 if (rc == -1) 1080 goto not_found; 1081 if (rc) 1082 continue; 1083 } 1084 /* 1085 * The names match or @name not present and attribute is 1086 * unnamed. Now check @lowest_vcn. Continue search if the 1087 * next attribute list entry still fits @lowest_vcn. Otherwise 1088 * we have reached the right one or the search has failed. 1089 */ 1090 if (lowest_vcn && (u8*)next_al_entry >= al_start && 1091 (u8*)next_al_entry + 6 < al_end && 1092 (u8*)next_al_entry + le16_to_cpu( 1093 next_al_entry->length) <= al_end && 1094 sle64_to_cpu(next_al_entry->lowest_vcn) <= 1095 lowest_vcn && 1096 next_al_entry->type == al_entry->type && 1097 next_al_entry->name_length == al_name_len && 1098 ntfs_are_names_equal((ntfschar*)((u8*) 1099 next_al_entry + 1100 next_al_entry->name_offset), 1101 next_al_entry->name_length, 1102 al_name, al_name_len, case_sensitive, 1103 vol->upcase, vol->upcase_len)) 1104 continue; 1105 if (MREF_LE(al_entry->mft_reference) == ni->mft_no) { 1106 if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) { 1107 ntfs_error(vol->mp, "Found stale mft " 1108 "reference in attribute list " 1109 "of base inode 0x%llx.%s", 1110 (unsigned long long) 1111 base_ni->mft_no, es); 1112 err = EIO; 1113 break; 1114 } 1115 } else { /* Mft references do not match. */ 1116 /* If there is a mapped record unmap it first. */ 1117 if (ni != base_ni) 1118 ntfs_extent_mft_record_unmap(ni); 1119 /* Do we want the base record back? */ 1120 if (MREF_LE(al_entry->mft_reference) == 1121 base_ni->mft_no) { 1122 ni = ctx->ni = base_ni; 1123 ctx->m = ctx->base_m; 1124 } else { 1125 /* We want an extent record. */ 1126 err = ntfs_extent_mft_record_map_ext(base_ni, 1127 le64_to_cpu( 1128 al_entry->mft_reference), &ni, 1129 &ctx->m, ctx->is_mft_locked); 1130 if (err) { 1131 ntfs_error(vol->mp, "Failed to map " 1132 "extent mft record " 1133 "0x%llx of base inode " 1134 "0x%llx.%s", 1135 (unsigned long long) 1136 MREF_LE(al_entry-> 1137 mft_reference), 1138 (unsigned long long) 1139 base_ni->mft_no, es); 1140 if (err == ENOENT) 1141 err = EIO; 1142 /* Cause @ctx to be sanitized below. */ 1143 ni = NULL; 1144 break; 1145 } 1146 ctx->ni = ni; 1147 } 1148 } 1149 a = ctx->a = (ATTR_RECORD*)((u8*)ctx->m + 1150 le16_to_cpu(ctx->m->attrs_offset)); 1151 /* 1152 * ctx->ni, ctx->m, and ctx->a now point to the mft record 1153 * containing the attribute represented by the current 1154 * al_entry. 1155 * 1156 * We could call into ntfs_attr_find_in_mft_record() to find 1157 * the right attribute in this mft record but this would be 1158 * less efficient and not quite accurate as it ignores the 1159 * attribute instance numbers for example which become 1160 * important when one plays with attribute lists. Also, 1161 * because a proper match has been found in the attribute list 1162 * entry above, the comparison can now be optimized. So it is 1163 * worth re-implementing a simplified 1164 * ntfs_attr_find_in_mft_record() here. 1165 * 1166 * Use a manual loop so we can still use break and continue 1167 * with the same meanings as above. 1168 */ 1169do_next_attr_loop: 1170 if ((u8*)a < (u8*)ctx->m || (u8*)a > (u8*)ctx->m + 1171 le32_to_cpu(ctx->m->bytes_allocated)) 1172 break; 1173 if (a->type == AT_END) 1174 continue; 1175 if (!a->length) 1176 break; 1177 if (al_entry->instance != a->instance) 1178 goto do_next_attr; 1179 /* 1180 * If the type and/or the name are mismatched between the 1181 * attribute list entry and the attribute record, there is 1182 * corruption so we break and return error EIO. 1183 */ 1184 if (al_entry->type != a->type) 1185 break; 1186 if (!ntfs_are_names_equal((ntfschar*)((u8*)a + 1187 le16_to_cpu(a->name_offset)), a->name_length, 1188 al_name, al_name_len, case_sensitive, 1189 vol->upcase, vol->upcase_len)) 1190 break; 1191 ctx->a = a; 1192 /* 1193 * If no @val specified or @val specified and it matches, we 1194 * have found it! 1195 */ 1196 if (!val || (!a->non_resident && 1197 le32_to_cpu(a->value_length) == val_len && 1198 !bcmp((u8*)a + le16_to_cpu(a->value_offset), 1199 val, val_len))) { 1200 ntfs_debug("Done, found."); 1201 return 0; 1202 } 1203do_next_attr: 1204 /* Proceed to the next attribute in the current mft record. */ 1205 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 1206 goto do_next_attr_loop; 1207 } 1208 if (!err) { 1209 ntfs_error(vol->mp, "Base inode 0x%llx contains corrupt " 1210 "attribute list attribute.%s", 1211 (unsigned long long)base_ni->mft_no, es); 1212 err = EIO; 1213 } 1214 if (ni != base_ni) { 1215 if (ni) 1216 ntfs_extent_mft_record_unmap(ni); 1217 ctx->ni = base_ni; 1218 ctx->m = ctx->base_m; 1219 ctx->a = ctx->base_a; 1220 } 1221 if (err != ENOMEM) 1222 NVolSetErrors(vol); 1223 return err; 1224not_found: 1225 /* 1226 * If we were looking for AT_END, we reset the search context @ctx and 1227 * use ntfs_attr_find_in_mft_record() to seek to the end of the base 1228 * mft record. 1229 */ 1230 if (type == AT_END) { 1231 ntfs_attr_search_ctx_reinit(ctx); 1232 return ntfs_attr_find_in_mft_record(AT_END, NULL, 0, NULL, 0, 1233 ctx); 1234 } 1235 /* 1236 * The attribute was not found. Before we return, we want to ensure 1237 * @ctx->m and @ctx->a indicate the position at which the attribute 1238 * should be inserted in the base mft record. Since we also want to 1239 * preserve @ctx->al_entry we cannot reinitialize the search context 1240 * using ntfs_attr_search_ctx_reinit() as this would set @ctx->al_entry 1241 * to NULL. Thus we do the necessary bits manually (see 1242 * ntfs_attr_search_ctx_init() above). Note, we postpone setting 1243 * @base_a until after the call to ntfs_attr_find_in_mft_record() as we 1244 * do not know the correct value yet. 1245 */ 1246 if (ni != base_ni) 1247 ntfs_extent_mft_record_unmap(ni); 1248 ctx->m = ctx->base_m; 1249 ctx->a = (ATTR_RECORD*)((u8*)ctx->m + 1250 le16_to_cpu(ctx->m->attrs_offset)); 1251 ctx->is_first = 1; 1252 ctx->ni = base_ni; 1253 /* 1254 * In case there are multiple matches in the base mft record, need to 1255 * keep enumerating until we get an attribute not found response (or 1256 * another error), otherwise we would keep returning the same attribute 1257 * over and over again and all programs using us for enumeration would 1258 * lock up in a tight loop. 1259 */ 1260 do { 1261 err = ntfs_attr_find_in_mft_record(type, name, name_len, 1262 val, val_len, ctx); 1263 } while (!err); 1264 ctx->base_a = ctx->a; 1265 ntfs_debug("Done, not found."); 1266 return err; 1267} 1268 1269/** 1270 * ntfs_attr_lookup - find an attribute in an ntfs inode 1271 * @type: attribute type to find 1272 * @name: attribute name to find (optional, i.e. NULL means do not care) 1273 * @name_len: attribute name length (only needed if @name present) 1274 * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) 1275 * @val: attribute value to find (optional, resident attributes only) 1276 * @val_len: attribute value length (only needed if @val present) 1277 * @ctx: search context with mft record and attribute to search from 1278 * 1279 * Find an attribute in an ntfs inode. On first search @ctx->ni must be the 1280 * base mft record and @ctx must have been obtained from a call to 1281 * ntfs_attr_search_ctx_get(). 1282 * 1283 * This function transparently handles attribute lists and @ctx is used to 1284 * continue searches where they were left off at. 1285 * 1286 * After finishing with the attribute/mft record you need to call 1287 * ntfs_attr_search_ctx_put() to clean up the search context (unmapping any 1288 * mapped mft records, etc). 1289 * 1290 * Return 0 if the search was successful and errno if not. 1291 * 1292 * On success, @ctx->a is the found attribute and it is in mft record @ctx->m. 1293 * If an attribute list attribute is present, @ctx->al_entry is the attribute 1294 * list entry of the found attribute. 1295 * 1296 * On error ENOENT, @ctx->a is the attribute which collates just after the 1297 * attribute being searched for, i.e. if one wants to add the attribute to the 1298 * mft record this is the correct place to insert it into. If an attribute 1299 * list attribute is present, @ctx->al_entry is the attribute list entry which 1300 * collates just after the attribute list entry of the attribute being searched 1301 * for, i.e. if one wants to add the attribute to the mft record this is the 1302 * correct place to insert its attribute list entry into. 1303 * 1304 * When errno != ENOENT, an error occured during the lookup. @ctx->a is then 1305 * undefined and in particular you should not rely on it not having changed. 1306 * 1307 * Warning: Never use @val when looking for attribute types which can be 1308 * non-resident as this most likely will result in a crash! 1309 */ 1310errno_t ntfs_attr_lookup(const ATTR_TYPE type, 1311 const ntfschar *name, const u32 name_len, const VCN lowest_vcn, 1312 const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx) 1313{ 1314 ntfs_inode *base_ni; 1315 1316 ntfs_debug("Entering."); 1317 if (ctx->base_ni) 1318 base_ni = ctx->base_ni; 1319 else 1320 base_ni = ctx->ni; 1321 /* Sanity check, just for debugging really. */ 1322 if (!base_ni) 1323 panic("%s(): !base_ni\n", __FUNCTION__); 1324 if (!NInoAttrList(base_ni) || type == AT_ATTRIBUTE_LIST) 1325 return ntfs_attr_find_in_mft_record(type, name, name_len, 1326 val, val_len, ctx); 1327 if (ctx->is_iteration) 1328 panic("%s(): ctx->is_iteration\n", __FUNCTION__); 1329 return ntfs_attr_find_in_attribute_list(type, name, name_len, 1330 lowest_vcn, val, val_len, ctx); 1331} 1332 1333/** 1334 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file 1335 * @vol: ntfs volume to which the attribute belongs 1336 * @type: attribute type which to find 1337 * 1338 * Search for the attribute definition record corresponding to the attribute 1339 * @type in the $AttrDef system file. 1340 * 1341 * Return the attribute type definition record if found and NULL if not found. 1342 */ 1343static ATTR_DEF *ntfs_attr_find_in_attrdef(const ntfs_volume *vol, 1344 const ATTR_TYPE type) 1345{ 1346 ATTR_DEF *ad; 1347 1348 if (!vol->attrdef) 1349 panic("%s(): !vol->attrdef\n", __FUNCTION__); 1350 if (!type) 1351 panic("%s(): !type\n", __FUNCTION__); 1352 for (ad = vol->attrdef; (u8*)ad - (u8*)vol->attrdef < 1353 vol->attrdef_size && ad->type; ++ad) { 1354 /* If we have not found it yet, carry on searching. */ 1355 if (le32_to_cpu(type) > le32_to_cpu(ad->type)) 1356 continue; 1357 /* If we have found the attribute, return it. */ 1358 if (type == ad->type) 1359 return ad; 1360 /* We have gone too far already. No point in continuing. */ 1361 break; 1362 } 1363 /* Attribute not found. */ 1364 ntfs_debug("Attribute type 0x%x not found in $AttrDef.", 1365 le32_to_cpu(type)); 1366 return NULL; 1367} 1368 1369/** 1370 * ntfs_attr_size_bounds_check - check a size of an attribute type for validity 1371 * @vol: ntfs volume to which the attribute belongs 1372 * @type: attribute type which to check 1373 * @size: size which to check 1374 * 1375 * Check whether the @size in bytes is valid for an attribute of @type on the 1376 * ntfs volume @vol. This information is obtained from $AttrDef system file. 1377 * 1378 * Return 0 if valid, ERANGE if not valid, and ENOENT if the attribute is not 1379 * listed in $AttrDef. 1380 */ 1381errno_t ntfs_attr_size_bounds_check(const ntfs_volume *vol, 1382 const ATTR_TYPE type, const s64 size) 1383{ 1384 ATTR_DEF *ad; 1385 1386 if (size < 0) 1387 panic("%s(): size < 0\n", __FUNCTION__); 1388 /* 1389 * $ATTRIBUTE_LIST has a maximum size of 256kiB, but this is not 1390 * listed in $AttrDef. 1391 */ 1392 if (type == AT_ATTRIBUTE_LIST && size > NTFS_MAX_ATTR_LIST_SIZE) 1393 return ERANGE; 1394 /* Get the $AttrDef entry for the attribute @type. */ 1395 ad = ntfs_attr_find_in_attrdef(vol, type); 1396 if (!ad) 1397 return ENOENT; 1398 /* Do the bounds check. */ 1399 if ((sle64_to_cpu(ad->min_size) > 0 && 1400 size < sle64_to_cpu(ad->min_size)) || 1401 (sle64_to_cpu(ad->max_size) > 0 && 1402 size > sle64_to_cpu(ad->max_size)) || 1403 (u64)size > NTFS_MAX_ATTRIBUTE_SIZE) 1404 return ERANGE; 1405 return 0; 1406} 1407 1408/** 1409 * ntfs_attr_can_be_non_resident - check if an attribute can be non-resident 1410 * @vol: ntfs volume to which the attribute belongs 1411 * @type: attribute type which to check 1412 * 1413 * Check whether the attribute of @type on the ntfs volume @vol is allowed to 1414 * be non-resident. This information is obtained from $AttrDef system file. 1415 * 1416 * Return 0 if the attribute is allowed to be non-resident, EPERM if not, and 1417 * ENOENT if the attribute is not listed in $AttrDef. 1418 */ 1419static errno_t ntfs_attr_can_be_non_resident(const ntfs_volume *vol, 1420 const ATTR_TYPE type) 1421{ 1422 ATTR_DEF *ad; 1423 1424 /* Find the attribute definition record in $AttrDef. */ 1425 ad = ntfs_attr_find_in_attrdef(vol, type); 1426 if (!ad) 1427 return ENOENT; 1428 /* Check the flags and return the result. */ 1429 if (ad->flags & ATTR_DEF_RESIDENT) 1430 return EPERM; 1431 return 0; 1432} 1433 1434/** 1435 * ntfs_attr_can_be_resident - check if an attribute can be resident 1436 * @vol: ntfs volume to which the attribute belongs 1437 * @type: attribute type which to check 1438 * 1439 * Check whether the attribute of @type on the ntfs volume @vol is allowed to 1440 * be resident. This information is derived from our ntfs knowledge and may 1441 * not be completely accurate, especially when user defined attributes are 1442 * present. Basically we allow everything to be resident except for index 1443 * allocation attributes. 1444 * 1445 * Return 0 if the attribute is allowed to be resident and EPERM if not. 1446 * 1447 * Warning: In the system file $MFT the attribute $Bitmap must be non-resident 1448 * otherwise windows will not boot (blue screen of death)! We cannot 1449 * check for this here as we do not know which inode's $Bitmap is 1450 * being asked about so the caller needs to special case this. 1451 */ 1452errno_t ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) 1453{ 1454 if (type == AT_INDEX_ALLOCATION) 1455 return EPERM; 1456 return 0; 1457} 1458 1459/** 1460 * ntfs_attr_record_is_only_one - check if an attribute is the only one 1461 * @m: the mft record in which the attribute to check resides 1462 * @a: the attribute to check 1463 * 1464 * Check if the attribute @a is the only attribute record in its mft record @m. 1465 * 1466 * Return true if @a is the only attribute record in its mft record @m and 1467 * false if @a is not the only attribute record in its mft record @m. 1468 */ 1469BOOL ntfs_attr_record_is_only_one(MFT_RECORD *m, ATTR_RECORD *a) 1470{ 1471 ATTR_RECORD *first_a, *next_a; 1472 1473 first_a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 1474 next_a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 1475 return (first_a == a && next_a->type == AT_END); 1476} 1477 1478/** 1479 * ntfs_attr_record_delete_internal - delete attribute record from mft record 1480 * @m: mft record containing attribute record to delete 1481 * @a: attribute record to delete 1482 * 1483 * Delete the attribute record @a, i.e. the resident part of the attribute, 1484 * from the mft record @m. 1485 * 1486 * This function cannot fail. 1487 * 1488 * Note the caller is responsible for marking the mft record dirty after 1489 * calling this function. 1490 */ 1491void ntfs_attr_record_delete_internal(MFT_RECORD *m, ATTR_RECORD *a) 1492{ 1493 const u32 new_muse = le32_to_cpu(m->bytes_in_use) - 1494 le32_to_cpu(a->length); 1495 /* Move attributes following @a into the position of @a. */ 1496 memmove(a, (u8*)a + le32_to_cpu(a->length), 1497 new_muse - ((u8*)a - (u8*)m)); 1498 /* Adjust @m to reflect the change in used space. */ 1499 m->bytes_in_use = cpu_to_le32(new_muse); 1500} 1501 1502/** 1503 * ntfs_attr_record_delete - delete an attribute record from its mft record 1504 * @base_ni: base ntfs inode from which to delete the attribute 1505 * @ctx: attribute search context describing attribute record to delete 1506 * 1507 * Delete the attribute record, i.e. the resident part of the attribute, 1508 * described by @ctx->a from its mft record @ctx->m and mark the mft record 1509 * dirty so it gets written out later. 1510 * 1511 * In an attribute list attribute is present also remove the attribute list 1512 * attribute entry corresponding to the attribute being deleted and update 1513 * the attribute list attribute record accordingly. 1514 * 1515 * If the only attribute in the mft record is the attribute being deleted then 1516 * instead of deleting the attribute we free the extent mft record altogether 1517 * taking care to disconnect it from the base ntfs inode in the process. As 1518 * above we update the attribute list attribute accordingly. 1519 * 1520 * If we end up freeing the extent mft record we go on to check the attribute 1521 * list attribute and if it no longer references any extent mft records we 1522 * remove the attribute list attribute altogether and update the base ntfs 1523 * inode to reflect the changed inode state. 1524 * 1525 * Return 0 on success and the error code on error. 1526 * 1527 * Note that on success the attribute search record is no longer valid and the 1528 * caller must either release it by calling ntfs_attr_search_ctx_put() or 1529 * reinitialize it by calling ntfs_attr_search_ctx_reinit(). Looking at the 1530 * search context or using it to call other functions would have unpredictable 1531 * results and could lead to crashes and file system corruption. 1532 */ 1533errno_t ntfs_attr_record_delete(ntfs_inode *base_ni, ntfs_attr_search_ctx *ctx) 1534{ 1535 ntfs_inode *ni; 1536 MFT_RECORD *m; 1537 ATTR_RECORD *a; 1538 ATTR_LIST_ENTRY *al_entry; 1539 errno_t err; 1540 unsigned al_ofs; 1541 BOOL al_needed; 1542 1543 ni = ctx->ni; 1544 m = ctx->m; 1545 a = ctx->a; 1546 ntfs_debug("Entering for attribute type 0x%x located in %s mft " 1547 "record 0x%llx. Attribute list attribute is " 1548 "%spresent.", (unsigned)le32_to_cpu(a->type), 1549 (base_ni == ni) ? "base" : "extent", 1550 (unsigned long long)ni->mft_no, 1551 NInoAttrList(base_ni) ? "" : "not "); 1552 /* 1553 * If there is no attribute list attribute, the mft record must be a 1554 * base mft record and thus it cannot be becoming empty as a 1555 * consequence of deleting the attribute record. Thus for inodes 1556 * without an attribute list attribute we have a fast path of simply 1557 * going ahead and deleting the attribute record and returning. 1558 */ 1559 if (!NInoAttrList(base_ni)) { 1560 ntfs_attr_record_delete_internal(m, a); 1561 NInoSetMrecNeedsDirtying(base_ni); 1562 ntfs_debug("Done (no attribute list attribute)."); 1563 return 0; 1564 } 1565 if (a->type == AT_ATTRIBUTE_LIST) 1566 panic("%s(): a->type == AT_ATTRIBUTE_LIST\n", __FUNCTION__); 1567 al_entry = ctx->al_entry; 1568 if (!al_entry) 1569 panic("%s(): !al_entry\n", __FUNCTION__); 1570 /* 1571 * We have an attribute list attribute. To begin with check if the 1572 * attribute to be deleted is in the base mft record or if it is not 1573 * the only attribute in the extent mft record. In both of these cases 1574 * we need to delete the attribute record from its mft record. 1575 * 1576 * Otherwise the attribute to be deleted is in an extent mft record and 1577 * it is the only attribute in the extent mft record thus we need to 1578 * free the extent mft record instead of deleting the attribute record. 1579 */ 1580 if (base_ni == ni || (u8*)m + le16_to_cpu(m->attrs_offset) != (u8*)a || 1581 ((ATTR_RECORD*)((u8*)a + 1582 le32_to_cpu(a->length)))->type != AT_END) { 1583 ntfs_attr_record_delete_internal(m, a); 1584 /* 1585 * If the attribute was not in the base mft record mark the 1586 * extent mft record dirty so it gets written out later. If 1587 * the attribute was in the base mft record it will be marked 1588 * dirty later when the attribute list attribute record is 1589 * updated which is in the base mft record by definition. 1590 * 1591 * We also unmap the extent mft record so we get to the same 1592 * state as in the above case where we freed the extent mft 1593 * record and we set @ctx->ni to equal the base inode @base_ni 1594 * so that the search context is initialized from scratch or 1595 * simply freed if the caller reinitializes or releases the 1596 * search context respectively. 1597 */ 1598 if (base_ni != ni) { 1599 NInoSetMrecNeedsDirtying(ni); 1600 ntfs_extent_mft_record_unmap(ni); 1601 ctx->ni = base_ni; 1602 } 1603 } else { 1604 err = ntfs_extent_mft_record_free(base_ni, ni, m); 1605 if (err) { 1606 /* 1607 * Ignore the error as we just end up with an unused 1608 * mft record that is marked in use. 1609 */ 1610 ntfs_error(ni->vol->mp, "Failed to free extent mft_no " 1611 "0x%llx (error %d). Unmount and run " 1612 "chkdsk to recover the lost inode.", 1613 (unsigned long long)ni->mft_no, err); 1614 NVolSetErrors(ni->vol); 1615 /* 1616 * Relese the extent mft record after dirtying it thus 1617 * simulating the effect of freeing it. 1618 */ 1619 NInoSetMrecNeedsDirtying(ni); 1620 ntfs_extent_mft_record_unmap(ni); 1621 } 1622 /* 1623 * The attribute search context still points to the no longer 1624 * mapped extent inode thus we need to change it to point to 1625 * the base inode instead so the context can be reinitialized 1626 * or released safely. 1627 */ 1628 ctx->ni = base_ni; 1629 /* 1630 * Check the attribute list attribute. If there are no other 1631 * attribute list attribute entries referencing extent mft 1632 * records delete the attribute list attribute altogether. 1633 * 1634 * If this fails it does not matter as we simply retain the 1635 * attribute list attribute so we ignore the error and go on to 1636 * delete the attribute list attribute entry instead. 1637 * 1638 * If there are other attribute list attribute entries 1639 * referencing extent mft records we still need the attribute 1640 * list attribute thus we go on to delete the attribute list 1641 * entry corresponding to the attribute record we just deleted 1642 * by freeing its extent mft record. 1643 */ 1644 err = ntfs_attr_list_is_needed(base_ni, al_entry, &al_needed); 1645 if (err) 1646 ntfs_warning(ni->vol->mp, "Failed to determine if " 1647 "attribute list attribute of mft_no " 1648 "0x%llx if still needed (error %d). " 1649 "Assuming it is still needed and " 1650 "continuing.", 1651 (unsigned long long)base_ni->mft_no, 1652 err); 1653 else if (!al_needed) { 1654 /* 1655 * No more extent mft records are in use. Delete the 1656 * attribute list attribute. 1657 */ 1658 ntfs_attr_search_ctx_reinit(ctx); 1659 err = ntfs_attr_list_delete(base_ni, ctx); 1660 if (!err) { 1661 /* 1662 * We deleted the attribute list attribute and 1663 * this will have updated the base inode 1664 * appropriately thus we are done. 1665 */ 1666 ntfs_debug("Done (deleted attribute list " 1667 "attribute)."); 1668 return 0; 1669 } 1670 ntfs_warning(ni->vol->mp, "Failed to delete attribute " 1671 "list attribute of mft_no 0x%llx " 1672 "(error %d). Continuing by trying to " 1673 "delete the attribute list entry of " 1674 "the deleted attribute instead.", 1675 (unsigned long long)base_ni->mft_no, 1676 err); 1677 } 1678 } 1679 /* 1680 * Both @ctx and @ni are now invalid and cannot be used any more which 1681 * is fine as we have finished dealing with the attribute record. 1682 * 1683 * We now need to delete the corresponding attribute list attribute 1684 * entry. 1685 */ 1686 al_ofs = (u8*)al_entry - base_ni->attr_list; 1687 ntfs_attr_list_entry_delete(base_ni, al_entry); 1688 ntfs_attr_search_ctx_reinit(ctx); 1689 err = ntfs_attr_list_sync_shrink(base_ni, al_ofs, ctx); 1690 if (!err) { 1691 ntfs_debug("Done (deleted attribute list attribute entry)."); 1692 return 0; 1693 } 1694 NInoSetMrecNeedsDirtying(base_ni); 1695 ntfs_error(ni->vol->mp, "Failed to delete attribute list attribute " 1696 "entry in base mft_no 0x%llx (error %d). Leaving " 1697 "inconsistent metadata. Unmount and run chkdsk.", 1698 (unsigned long long)base_ni->mft_no, err); 1699 NVolSetErrors(ni->vol); 1700 return err; 1701} 1702 1703/** 1704 * ntfs_attr_record_make_space - make space for a new attribute record 1705 * @m: mft record in which to make space for the new attribute record 1706 * @a: attribute record in front of which to make space 1707 * @size: byte size of the new attribute record for which to make space 1708 * 1709 * Make space for a new attribute record of size @size in the mft record @m, in 1710 * front of the existing attribute record @a. 1711 * 1712 * Return 0 on success and errno on error. The following error codes are 1713 * defined: 1714 * ENOSPC - Not enough space in the mft record @m. 1715 * 1716 * Note: On error, no modifications have been performed whatsoever. 1717 */ 1718errno_t ntfs_attr_record_make_space(MFT_RECORD *m, ATTR_RECORD *a, u32 size) 1719{ 1720 u32 new_muse; 1721 const u32 muse = le32_to_cpu(m->bytes_in_use); 1722 /* Align to 8 bytes if it is not already done. */ 1723 if (size & 7) 1724 size = (size + 7) & ~7; 1725 new_muse = muse + size; 1726 /* Not enough space in this mft record. */ 1727 if (new_muse > le32_to_cpu(m->bytes_allocated)) 1728 return ENOSPC; 1729 /* Move attributes starting with @a to make space of @size bytes. */ 1730 memmove((u8*)a + size, a, muse - ((u8*)a - (u8*)m)); 1731 /* Adjust @m to reflect the change in used space. */ 1732 m->bytes_in_use = cpu_to_le32(new_muse); 1733 /* Clear the created space so we start with a clean slate. */ 1734 bzero(a, size); 1735 /* 1736 * Set the attribute size in the newly created attribute, now at @a. 1737 * We do this here so that the caller does not need to worry about 1738 * rounding up the size to set the attribute length. 1739 */ 1740 a->length = cpu_to_le32(size); 1741 return 0; 1742} 1743 1744/** 1745 * ntfs_attr_record_resize - resize an attribute record 1746 * @m: mft record containing attribute record 1747 * @a: attribute record to resize 1748 * @new_size: new size in bytes to which to resize the attribute record @a 1749 * 1750 * Resize the attribute record @a, i.e. the resident part of the attribute, in 1751 * the mft record @m to @new_size bytes. 1752 * 1753 * Return 0 on success and errno on error. The following error codes are 1754 * defined: 1755 * ENOSPC - Not enough space in the mft record @m to perform the resize. 1756 * 1757 * Note: On error, no modifications have been performed whatsoever. 1758 * 1759 * Warning: If you make a record smaller without having copied all the data you 1760 * are interested in the data may be overwritten. 1761 */ 1762errno_t ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size) 1763{ 1764 const u32 old_size = le32_to_cpu(a->length); 1765 1766 ntfs_debug("Entering for new_size %u.", new_size); 1767 /* Align to 8 bytes if it is not already done. */ 1768 if (new_size & 7) 1769 new_size = (new_size + 7) & ~7; 1770 /* If the actual attribute length has changed, move things around. */ 1771 if (new_size != old_size) { 1772 const u32 muse = le32_to_cpu(m->bytes_in_use); 1773 const u32 new_muse = muse - old_size + new_size; 1774 /* Not enough space in this mft record. */ 1775 if (new_muse > le32_to_cpu(m->bytes_allocated)) 1776 return ENOSPC; 1777 /* Move attributes following @a to their new location. */ 1778 memmove((u8*)a + new_size, (u8*)a + old_size, 1779 muse - ((u8*)a - (u8*)m) - old_size); 1780 /* Adjust @m to reflect the change in used space. */ 1781 m->bytes_in_use = cpu_to_le32(new_muse); 1782 /* Adjust @a to reflect the new size. */ 1783 if (new_size >= offsetof(ATTR_REC, length) + sizeof(a->length)) 1784 a->length = cpu_to_le32(new_size); 1785 } 1786 return 0; 1787} 1788 1789/** 1790 * ntfs_attr_mapping_pairs_update - update an attribute's mapping pairs array 1791 * @base_ni: base ntfs inode to which the attribute belongs 1792 * @ni: ntfs inode of attribute whose mapping pairs array to update 1793 * @first_vcn: first vcn which to update in the mapping pairs array 1794 * @last_vcn: last vcn which to update in the mapping pairs array 1795 * @ctx: search context describing the attribute to work on or NULL 1796 * 1797 * Create or update the mapping pairs arrays from the locked runlist of the 1798 * attribute @ni, i.e. @ni->rl, starting at vcn @first_vcn and finishing with 1799 * vcn @last_vcn. The update can actually start before @first_vcn and finish 1800 * after @last_vcn but guarantees to at least include the range between 1801 * @first_vcn and @last_vcn, inclusive. 1802 * 1803 * This function is called from a variety of places after clusters have been 1804 * allocated to and/or freed from an attribute. The runlist has already been 1805 * updated to reflect the allocated/freed clusters. This functions takes the 1806 * modified runlist range and syncs it to the attribute record(s) by 1807 * compressing the runlist into mapping pairs array fragments and writing them 1808 * into the attribute record(s) of the attribute. 1809 * 1810 * This function also updates the attribute sizes using the values from the 1811 * ntfs inode @ni and syncs them to the base attribute record and if the 1812 * attribute has become sparse but the attribute record is not marked sparse or 1813 * the attribute is no longer sparse but the attribute record is marked sparse 1814 * the base attribute record is updated to reflect the changed state which 1815 * involves setting/clearing the sparse flag as well as the addition/removal of 1816 * the compressed size to the attribute record. When the compressed size is 1817 * added this can lead to a larger portion of the mapping pairs array being 1818 * updated because there may not be enough space in the mft record to extend 1819 * the base attribute record to fit the compressed size. When updating the 1820 * attribute record the compression state of the attribute is also taken into 1821 * consideration as the compressed size is used both with compressed and sparse 1822 * attributes. 1823 * 1824 * The update can involve the allocation/freeing of extent mft records and/or 1825 * extent attribute records. If this happens the attribute list attribute in 1826 * the base ntfs inode @base_ni is updated appropriately both in memory and in 1827 * the attribute list attribute record in the base mft record. 1828 * 1829 * A @last_vcn of -1 means end of runlist and in that case the mapping pairs 1830 * array corresponding to the runlist starting at vcn @first_vcn and finishing 1831 * at the end of the runlist is updated. 1832 * 1833 * If @ctx is NULL, it is assumed that the attribute mft record is not mapped 1834 * and hence a new search context is allocated, the mft record is mapped, and 1835 * the attribute is looked up. On completion the allocated search context is 1836 * released if it was allocated by ntfs_attr_mapping_pairs_update(). 1837 * 1838 * Return 0 on success and errno on error. 1839 * 1840 * Locking: The runlist @ni->rl must be locked for writing, it remains locked 1841 * throughout, and is left locked upon return. 1842 */ 1843#if 0 1844errno_t ntfs_attr_mapping_pairs_update(ntfs_inode *base_ni, ntfs_inode *ni, 1845 VCN first_vcn, VCN last_vcn, ntfs_attr_search_ctx *ctx) 1846{ 1847 VCN lowest_vcn, highest_vcn, stop_vcn; 1848 ntfs_volume *vol; 1849 ATTR_RECORD *a; 1850 errno_t err; 1851 BOOL mpa_is_valid, was_sparse, is_sparse; 1852 ntfs_attr_search_ctx attr_ctx; 1853 1854 ntfs_debug("Entering for base mft_no 0x%llx, attribute type 0x%x, " 1855 "name len 0x%x, first_vcn 0x%llx, last_vcn 0x%llx, " 1856 "ctx is %spresent.", 1857 (unsigned long long)base_ni->mft_no, 1858 (unsigned)le32_to_cpu(ni->type), ni->name_len, 1859 (unsigned long long)first_vcn, 1860 (unsigned long long)last_vcn, 1861 ctx ? "" : "not "); 1862 vol = base_ni->vol; 1863 /* 1864 * If no search context was specified use ours, initialize it, and look 1865 * up the base attribute record so we can update the sizes, flags, and 1866 * add/remove the compressed size if needed. 1867 * 1868 * We also need to look up the base attribute record if a search 1869 * context was specified but it points to an extent attribute record. 1870 */ 1871 if (!ctx || ctx->a->lowest_vcn) { 1872 if (!ctx) { 1873 MFT_RECORD *base_m; 1874 1875 err = ntfs_mft_record_map(base_ni, &base_m); 1876 if (err) { 1877 ntfs_error(vol->mp, "Failed to map mft_no " 1878 "0x%llx (error %d).", 1879 (unsigned long long) 1880 base_ni->mft_no, err); 1881 return err; 1882 } 1883 ctx = &attr_ctx; 1884 ntfs_attr_search_ctx_init(ctx, base_ni, base_m); 1885 } 1886 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 1887 NULL, 0, ctx); 1888 if (err) { 1889 if (err == ENOENT) 1890 err = EIO; 1891 ntfs_error(vol->mp, "Failed to look up base attribute " 1892 "record in mft_no 0x%llx (error %d).", 1893 (unsigned long long)base_ni->mft_no, 1894 err); 1895 goto err; 1896 } 1897 } 1898 a = ctx->a; 1899 if (!NInoNonResident(ni) || !a->non_resident) 1900 panic("%s(): !NInoNonResident(ni) || !a->non_resident\n", 1901 __FUNCTION__); 1902 mpa_is_valid = TRUE; 1903 /* 1904 * If the attribute was sparse and is no longer sparse or it was not 1905 * sparse and is now sparse, update the sparse state and add/remove the 1906 * compressed size. 1907 */ 1908 was_sparse = a->flags & ATTR_IS_SPARSE; 1909 is_sparse = NInoSparse(ni); 1910 if (was_sparse == is_sparse) 1911 goto sparse_done; 1912 if (is_sparse) { 1913 a->flags |= ATTR_IS_SPARSE; 1914 if (NInoCompressed(ni)) 1915 goto sparse_done; 1916 if (a->flags & ATTR_IS_COMPRESSED) 1917 panic("%s(): a->flags & ATTR_IS_COMPRESSED\n", 1918 __FUNCTION__); 1919 /* 1920 * Add the compressed size and set up the relevant fields in 1921 * the attribute record. 1922 * 1923 * If there is enough space in the mft record and we do not 1924 * need to rewrite the mapping pairs array in this attribute 1925 * record, resize the attribute record and move the mapping 1926 * pairs array. 1927 * 1928 * If there is not enough space to perform the resize then do 1929 * not preserve the mapping pairs array in this attribute 1930 * record. 1931 * 1932 * If there still is not enough space to add the compressed 1933 * size move the attribute record to an extent mft record (this 1934 * cannot be the only attribute record in the current mft 1935 * record). If we do this do not preserve the mapping pairs 1936 * array so we can make better use of the extent mft record. 1937 * 1938 * Note we need to ensure we have already mapped the runlist 1939 * fragment described by the current mapping pairs array if we 1940 * are not going to preserve it or we would lose the data. 1941 */ 1942 a->compression_unit = 0; 1943 if (vol->major_ver <= 1) 1944 a->compression_unit = NTFS_COMPRESSION_UNIT; 1945restart_compressed_size_add: 1946 if ((first_vcn > sle64_to_cpu(a->highest_vcn) + 1) && 1947 !(err = ntfs_attr_record_resize(ctx->m, a, 1948 le32_to_cpu(a->length) + 1949 sizeof(a->compressed_size)))) { 1950 /* 1951 * Move everything at the offset of the compressed size 1952 * to make space for the compressed size. 1953 */ 1954 memmove((u8*)a + offsetof(ATTR_RECORD, 1955 compressed_size) + 1956 sizeof(a->compressed_size), (u8*)a + 1957 offsetof(ATTR_RECORD, compressed_size), 1958 le32_to_cpu(a->length) - offsetof( 1959 ATTR_RECORD, compressed_size)); 1960 /* 1961 * Update the name offset to match the moved data. If 1962 * there is no name then set the name offset to the 1963 * correct position instead of adding to a potentially 1964 * incorrect value. 1965 */ 1966 if (a->name_length) 1967 a->name_offset = cpu_to_le16(le16_to_cpu( 1968 a->name_offset) + 1969 sizeof(a->compressed_size)); 1970 else 1971 a->name_offset = const_cpu_to_le16(offsetof( 1972 ATTR_RECORD, compressed_size) + 1973 sizeof(a->compressed_size)); 1974 /* Update the mapping pairs offset. */ 1975 mp_ofs = le16_to_cpu(a->mapping_pairs_offset) + 1976 sizeof(a->compressed_size); 1977 goto sparse_done; 1978 } 1979 /* Ensure this runlist fragment is mapped. */ 1980 if (ni->allocated_size && (!ni->rl.elements || 1981 ni->rl.rl->lcn == LCN_RL_NOT_MAPPED)) { 1982 err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl); 1983 if (err) { 1984 ntfs_error(vol->mp, "Failed to decompress " 1985 "mapping pairs array (error " 1986 "%d).", err); 1987 goto err; 1988 } 1989 } 1990 /* 1991 * Check whether the attribute is big enough to have the 1992 * compressed size added to it. We need at the very least 1993 * space for the record header, the name, and a zero byte for 1994 * an empty mapping pairs array and we need to allow for all 1995 * the needed alignment padding. 1996 */ 1997 if (((sizeof(ATTR_RECORD) + a->name_length * sizeof(ntfschar) + 1998 7) & ~7) + 8 <= le32_to_cpu(a->length)) { 1999add_compressed_size: 2000 /* 2001 * Move the name back to the new end of the attribute 2002 * record header thus adding the compressed size. 2003 */ 2004 if (a->name_length) 2005 memmove((u8*)a + sizeof(ATTR_RECORD), (u8*)a + 2006 le16_to_cpu(a->name_offset), 2007 a->name_length * 2008 sizeof(ntfschar)); 2009 /* 2010 * Update the name offset and the mapping pairs offset 2011 * to match the moved name. 2012 */ 2013 a->name_offset = const_cpu_to_le16(sizeof(ATTR_RECORD)); 2014 a->mapping_pairs_offset = cpu_to_le16( 2015 (sizeof(ATTR_RECORD) + a->name_length * 2016 sizeof(ntfschar) + 7) & ~7); 2017 /* 2018 * We no longer have a valid mapping pairs array in the 2019 * current attribute record. 2020 */ 2021 mpa_is_valid = FALSE; 2022 goto sparse_done; 2023 } 2024 /* 2025 * The attribute record is not big enough so try to extend it 2026 * (in case we did not try to extend it above). 2027 */ 2028 err = ntfs_attr_record_resize(ctx->m, a, 2029 ((sizeof(ATTR_RECORD) + a->name_length * 2030 sizeof(ntfschar) + 7) & ~7) + 8); 2031 if (!err) 2032 goto add_compressed_size; 2033 /* 2034 * The attribute record cannot be the only one in the mft 2035 * record if it is not large enough to hold an empty attribute 2036 * record and there is not enough space to grow it. 2037 */ 2038 if (ntfs_attr_record_is_only_one(ctx->m, a)) 2039 panic("%s(): ntfs_attr_is_only_one(ctx->m, a)\n", 2040 __FUNCTION__); 2041 /* 2042 * This is our last resort. Move the attribute to an extent 2043 * mft record. 2044 * 2045 * First, add the attribute list attribute if it is not already 2046 * present. 2047 */ 2048 if (!NInoAttrList(base_ni)) { 2049 err = ntfs_attr_list_add(base_ni, ctx->m, ctx); 2050 if (err || ctx->is_error) { 2051 if (!err) 2052 err = ctx->error; 2053 ntfs_error(vol->mp, "Failed to %s mft_no " 2054 "0x%llx (error %d).", 2055 ctx->is_error ? 2056 "remap extent mft record of" : 2057 "add attribute list attribute " 2058 "to", (unsigned long long) 2059 base_ni->mft_no, err); 2060 goto err; 2061 } 2062 /* 2063 * The attribute location will have changed so update 2064 * it from the search context. 2065 */ 2066 a = ctx->a; 2067 /* 2068 * Retry the attribute record resize as we may now have 2069 * enough space to add the compressed size. 2070 * 2071 * This can for example happen when the attribute was 2072 * moved out to an extent mft record which has much 2073 * more free space than the base mft record had or of 2074 * course other attributes may have been moved out to 2075 * extent mft records which has created enough space in 2076 * the base mft record. 2077 * 2078 * If the attribute record was moved to an empty extent 2079 * mft record this is the same case as if we moved the 2080 * attribute record below so treat it the same, i.e. we 2081 * do not preserve the mapping pairs array and use the 2082 * maximum possible size for the mft record to allow us 2083 * to consolidate the mapping pairs arrays. 2084 */ 2085 if (ntfs_attr_record_is_only_one(ctx->m, a)) 2086 goto attr_is_only_one; 2087 goto restart_compressed_size_add; 2088 } 2089 /* Move the attribute to an extent mft record. */ 2090 lck_rw_lock_shared(&base_ni->attr_list_rl.lock); 2091 err = ntfs_attr_record_move(ctx); 2092 lck_rw_unlock_shared(&base_ni->attr_list_rl.lock); 2093 if (err) { 2094 ntfs_error(vol->mp, "Failed to move attribute extent " 2095 "from mft record 0x%llx to an extent " 2096 "mft record (error %d).", 2097 (unsigned long long)ctx->ni->mft_no, 2098 err); 2099 /* 2100 * We could try to remove the attribute list attribute 2101 * if we added it above but this will require 2102 * attributes to be moved back into the base mft record 2103 * from extent mft records so is a lot of work and 2104 * given we are in an error code path and given that it 2105 * is ok to just leave the inode with an attribute list 2106 * attribute we do not bother and just bail out. 2107 */ 2108 goto err; 2109 } 2110 /* 2111 * The attribute location will have changed so update it from 2112 * the search context. 2113 */ 2114 a = ctx->a; 2115attr_is_only_one: 2116 /* 2117 * We now have enough space to add the compressed size so 2118 * resize the attribute record. Note we do not want to 2119 * preserve the mapping pairs array as we will have 2120 * significanly more space in the extent mft record thus we 2121 * want to consolidate the mapping pairs arrays which is why we 2122 * resize straight to the maximum possible size for the mft 2123 * record. 2124 */ 2125 err = ntfs_attr_record_resize(ctx->m, a, 2126 le32_to_cpu(m->bytes_allocated) - 2127 le32_to_cpu(m->bytes_in_use) + 2128 le32_to_cpu(a->length)); 2129 if (err) 2130 panic("%s(): err - resize failed\n", __FUNCTION__); 2131 if (((sizeof(ATTR_RECORD) + a->name_length * sizeof(ntfschar) + 2132 7) & ~7) + 8 > le32_to_cpu(a->length)) 2133 panic("%s(): attribute record is still too small\n", 2134 __FUNCTION__); 2135 goto add_compressed_size; 2136 } 2137 /* The attribute is becoming non-sparse. */ 2138 a->flags &= ~ATTR_IS_SPARSE; 2139 if (NInoCompressed(ni)) 2140 goto sparse_done; 2141 if (a->flags & ATTR_IS_COMPRESSED) 2142 panic("%s(): a->flags & ATTR_IS_COMPRESSED\n", __FUNCTION__); 2143 /* 2144 * Remove the compressed size and set up the relevant fields in the 2145 * attribute record. 2146 * 2147 * If we do not need to rewrite the mapping pairs array in this 2148 * attribute record, move the mapping pairs array and then resize the 2149 * attribute record. 2150 * 2151 * Note we need to ensure we have already mapped the runlist fragment 2152 * described by the current mapping pairs array if we are not going to 2153 * preserve it or we would lose the data. 2154 */ 2155 a->compression_unit = 0; 2156 if (first_vcn > sle64_to_cpu(a->highest_vcn) + 1) { 2157 /* 2158 * Move everything after the compressed size forward to the 2159 * offset of the compressed size thus deleting the compressed 2160 * size. 2161 */ 2162 memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size), 2163 (u8*)a + offsetof(ATTR_RECORD, 2164 compressed_size) + sizeof(a->compressed_size), 2165 le32_to_cpu(a->length) - (offsetof(ATTR_RECORD, 2166 compressed_size) + sizeof(a->compressed_size))); 2167 /* 2168 * Update the name offset and the mapping pairs offset to match 2169 * the moved data. If there is no name then set the name 2170 * offset to the correct position instead of subtracting from a 2171 * potentially incorrect value. 2172 */ 2173 if (!a->name_length) 2174 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, 2175 compressed_size)); 2176 else 2177 a->name_offset = cpu_to_le16( 2178 le16_to_cpu(a->name_offset) - 2179 sizeof(a->compressed_size)); 2180 a->mapping_pairs_offset = cpu_to_le16( 2181 le16_to_cpu(a->mapping_pairs_offset) - 2182 sizeof(a->compressed_size)); 2183 /* 2184 * Shrink the attribute record to reflect the removal of the 2185 * compressed size. Note this cannot fail since we are making 2186 * the attribute smaller thus by definition there there is 2187 * enough space to do so. 2188 */ 2189 err = ntfs_attr_record_resize(ctx->m, a, le32_to_cpu( 2190 a->length) - sizeof(a->compressed_size)); 2191 if (err) 2192 panic("%s(): err\n", __FUNCTION__); 2193 goto sparse_done; 2194 } 2195 /* Ensure this runlist fragment is mapped. */ 2196 if (ni->allocated_size && (!ni->rl.elements || 2197 ni->rl.rl->lcn == LCN_RL_NOT_MAPPED)) { 2198 err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl); 2199 if (err) { 2200 ntfs_error(vol->mp, "Failed to decompress mapping " 2201 "pairs array (error %d).", err); 2202 goto err; 2203 } 2204 } 2205 mpa_is_valid = FALSE; 2206 /* 2207 * Move the name forward to the offset of the compressed size thus 2208 * deleting the compressed size. 2209 */ 2210 if (a->name_length) 2211 memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size), 2212 (u8*)a + le16_to_cpu(a->name_offset), 2213 a->name_length * sizeof(ntfschar)); 2214 /* 2215 * Update the name offset and the mapping pairs offset to match the 2216 * moved name. 2217 */ 2218 a->name_offset = const_cpu_to_le16( 2219 offsetof(ATTR_RECORD, compressed_size)); 2220 a->mapping_pairs_offset = cpu_to_le16( 2221 (offsetof(ATTR_RECORD, compressed_size) + 2222 (a->name_length * sizeof(ntfschar)) + 7) & ~7); 2223sparse_done: 2224 /* 2225 * Update the attribute sizes. 2226 * 2227 * TODO: Need to figure out whether we really need to update the data 2228 * and initialized sizes or whether updating just the allocated and 2229 * compressed sizes is sufficient in which case we can save a few CPU 2230 * cycles by not updating the data and initialized sizes here. 2231 */ 2232 lck_spin_lock(&ni->size_lock); 2233 a->allocated_size = cpu_to_sle64(ni->allocated_size); 2234 a->data_size = cpu_to_sle64(ni->data_size); 2235 a->initialized_size = cpu_to_sle64(ni->initialized_size); 2236 if (a->flags & (ATTR_IS_COMPRESSED | ATTR_IS_SPARSE)) 2237 a->compressed_size = cpu_to_sle64(ni->compressed_size); 2238 lck_spin_unlock(&ni->size_lock); 2239 /* 2240 * If the current mapping pairs array is valid and the first vcn at 2241 * which we need to update the mapping pairs array is not in this 2242 * attribute extent, look up the attribute extent containing the first 2243 * vcn. 2244 */ 2245 if (mpa_is_valid && first_vcn > sle64_to_cpu(a->highest_vcn) + 1) { 2246 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 2247 first_vcn, NULL, 0, ctx); 2248 if (err) { 2249 if (err == ENOENT) 2250 err = EIO; 2251 ntfs_error(vol->mp, "Failed to look up extent " 2252 "attribute record containing VCN " 2253 "0x%llx in mft_no 0x%llx (error %d).", 2254 (unsigned long long)first_vcn, 2255 (unsigned long long)base_ni->mft_no, 2256 err); 2257 goto err; 2258 } 2259 a = ctx->a; 2260 } 2261 /* 2262 * We need to rebuild the mapping pairs array in this attribute extent. 2263 * But first, check if we can grow the attribute extent. If this is 2264 * the base extent and the attribute is not sparse nor compressed and 2265 * it is allowed to be sparse then reserve the size of the compressed 2266 * size field in the mft record so it is easier to make the attribute 2267 * sparse later on. 2268 * 2269 * FIXME: But we don't want to do that if the attribute extent is in 2270 * the base mft record and the attribute is $DATA or $INDEX_ALLOCATION, 2271 * etc as we want to keep the first extent of theese base attribute 2272 * extents in the base mft record thus we have to keep them small to 2273 * allow the attribute list attribute to grow over time. 2274 * 2275 * FIXME: Need to make sure we map any unmapped regions of the runlist 2276 * when determining the size of the mapping pairs array. 2277 * 2278 * FIXME: If we don't impose a last vcn when getting the size it would 2279 * just cause the entirety of the mapping pairs array starting with the 2280 * current extent to be mapped in, which is not necessarilly a bad 2281 * thing as it will then be already mapped for all subsequent writes. 2282 * 2283 * FIXME: We do not want to keep rewriting the entire mapping pairs 2284 * array every time we fill a hole so need to be careful when 2285 * consolidating the mapping pairs array fragments. OTOH we do not 2286 * want to end up with millions of very short attribute extents so need 2287 * to be careful about that, too. 2288 */ 2289// TODO: I AM HERE: 2290 ntfs_error(vol->mp, "FIXME: TODO..."); 2291 return ENOTSUP; 2292 ntfs_debug("Done."); 2293 return 0; 2294err: 2295 /* 2296 * If we mapped the mft record and looked up the attribute, release the 2297 * mapped mft record(s) here. 2298 */ 2299 if (ctx == &attr_ctx) { 2300 if (ctx->ni != base_ni) 2301 ntfs_extent_mft_record_unmap(ctx->ni); 2302 ntfs_mft_record_unmap(base_ni); 2303 } 2304 return err; 2305} 2306#endif 2307 2308/** 2309 * ntfs_resident_attr_record_insert_internal - insert a resident attribute 2310 * @m: mft record in which to insert the resident attribute 2311 * @a: attribute in front of which to insert the new attribute 2312 * @type: attribute type of new attribute 2313 * @name: Unicode name of new attribute 2314 * @name_len: Unicode character size of name of new attribute 2315 * @val_len: byte size of attribute value of new attribute 2316 * 2317 * Insert a new resident attribute in the mft record @m, in front of the 2318 * existing attribute record @a. The new attribute is of type @type, and has a 2319 * name of @name which is @name_len Unicode characters long. The new attribute 2320 * value is @val_len bytes and is initialized to zero. 2321 * 2322 * Note: If the inode uses the attribute list attribute the caller is 2323 * responsible for adding an entry for the inserted attribute to the attribute 2324 * list attribute. 2325 * 2326 * Return 0 on success and errno on error. The following error codes are 2327 * defined: 2328 * ENOSPC - Not enough space in the mft record @m. 2329 * 2330 * Note: On error, no modifications have been performed whatsoever. 2331 */ 2332errno_t ntfs_resident_attr_record_insert_internal(MFT_RECORD *m, 2333 ATTR_RECORD *a, const ATTR_TYPE type, const ntfschar *name, 2334 const u8 name_len, const u32 val_len) 2335{ 2336 unsigned name_ofs, val_ofs; 2337 2338 /* 2339 * Calculate the offset into the new attribute at which the attribute 2340 * name begins. The name is placed directly after the resident 2341 * attribute record itself. 2342 */ 2343 name_ofs = offsetof(ATTR_RECORD, reservedR) + sizeof(a->reservedR); 2344 /* 2345 * Calculate the offset into the new attribute at which the attribute 2346 * value begins. The attribute value is placed after the name aligned 2347 * to an 8-byte boundary. 2348 */ 2349 val_ofs = name_ofs + (((name_len << NTFSCHAR_SIZE_SHIFT) + 7) & ~7); 2350 /* 2351 * Work out the size for the attribute record. We simply take the 2352 * offset to the attribute value we worked out above and add the size 2353 * of the attribute value in bytes aligned to an 8-byte boundary. Note 2354 * we do not need to do the alignment as ntfs_attr_record_make_space() 2355 * does it anyway. 2356 */ 2357 if (ntfs_attr_record_make_space(m, a, val_ofs + val_len)) 2358 return ENOSPC; 2359 /* 2360 * Now setup the new attribute record. The entire attribute has been 2361 * zeroed and the length of the attribute record has been set up by 2362 * ntfs_attr_record_make_space(). 2363 */ 2364 a->type = type; 2365 a->name_length = name_len; 2366 a->name_offset = cpu_to_le16(name_ofs); 2367 a->instance = m->next_attr_instance; 2368 /* 2369 * Increment the next attribute instance number in the mft record as we 2370 * consumed the old one. 2371 */ 2372 m->next_attr_instance = cpu_to_le16( 2373 (le16_to_cpu(m->next_attr_instance) + 1) & 0xffff); 2374 a->value_length = cpu_to_le32(val_len); 2375 a->value_offset = cpu_to_le16(val_ofs); 2376 if (type == AT_FILENAME) 2377 a->resident_flags = RESIDENT_ATTR_IS_INDEXED; 2378 /* Copy the attribute name into place. */ 2379 if (name_len) 2380 memcpy((u8*)a + name_ofs, name, 2381 name_len << NTFSCHAR_SIZE_SHIFT); 2382 return 0; 2383} 2384 2385/** 2386 * ntfs_resident_attr_record_insert - insert a resident attribute record 2387 * @ni: base ntfs inode to which the attribute is being added 2388 * @ctx: search context describing where to insert the resident attribute 2389 * @type: attribute type of new attribute 2390 * @name: Unicode name of new attribute 2391 * @name_len: Unicode character size of name of new attribute 2392 * @val: attribute value of new attribute (optional, can be NULL) 2393 * @val_len: byte size of attribute value of new attribute 2394 * 2395 * Insert a new resident attribute in the base ntfs inode @ni at the position 2396 * indicated by the attribute search context @ctx and add an attribute list 2397 * attribute entry for it if the inode uses the attribute list attribute. 2398 * 2399 * The new attribute is of type @type, has a name of @name which is @name_len 2400 * Unicode characters long, and has a value of @val with size @val_len bytes. 2401 * If @val is NULL, the value of size @val_len is zeroed. 2402 * 2403 * If @val is NULL, the caller is responsible for marking the extent mft record 2404 * the attribute is in dirty. We do it this way because we assume the caller 2405 * is going to modify the attribute further and will then mark it dirty. 2406 * 2407 * If the attribute is in the base mft record then the caller is always 2408 * responsible for marking the mft record dirty. 2409 * 2410 * Return 0 on success and errno on error. 2411 * 2412 * WARNING: Regardless of whether success or failure is returned, you need to 2413 * check @ctx->is_error and if 1 the @ctx is no longer valid, i.e. you 2414 * need to either call ntfs_attr_search_ctx_reinit() or 2415 * ntfs_attr_search_ctx_put() on it. In that case @ctx->error will 2416 * give you the error code for why the mapping of the inode failed. 2417 */ 2418errno_t ntfs_resident_attr_record_insert(ntfs_inode *ni, 2419 ntfs_attr_search_ctx *ctx, const ATTR_TYPE type, 2420 const ntfschar *name, const u8 name_len, 2421 const void *val, const u32 val_len) 2422{ 2423 ntfs_volume *vol; 2424 MFT_RECORD *base_m, *m; 2425 ATTR_RECORD *a; 2426 ATTR_LIST_ENTRY *al_entry; 2427 unsigned name_ofs, val_ofs, al_entry_used, al_entry_len, new_al_size; 2428 unsigned new_al_alloc; 2429 errno_t err; 2430 BOOL al_entry_added; 2431 2432 ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, name_len " 2433 "0x%x, val_len 0x%x.", (unsigned long long)ni->mft_no, 2434 (unsigned)le32_to_cpu(type), name_len, val_len); 2435 vol = ni->vol; 2436 /* 2437 * Calculate the offset into the new attribute at which the attribute 2438 * name begins. The name is placed directly after the resident 2439 * attribute record itself. 2440 */ 2441 name_ofs = offsetof(ATTR_RECORD, reservedR) + sizeof(a->reservedR); 2442 /* 2443 * Calculate the offset into the new attribute at which the attribute 2444 * value begins. The attribute value is placed after the name aligned 2445 * to an 8-byte boundary. 2446 */ 2447 val_ofs = name_ofs + (((name_len << NTFSCHAR_SIZE_SHIFT) + 7) & ~7); 2448 /* 2449 * Work out the size for the attribute record. We simply take the 2450 * offset to the attribute value we worked out above and add the size 2451 * of the attribute value in bytes aligned to an 8-byte boundary. Note 2452 * we do not need to do the alignment as ntfs_attr_record_make_space() 2453 * does it anyway. 2454 */ 2455 /* 2456 * The current implementation of ntfs_attr_lookup() will always return 2457 * pointing into the base mft record when an attribute is not found. 2458 */ 2459 base_m = ctx->m; 2460retry: 2461 if (ni != ctx->ni) 2462 panic("%s(): ni != ctx->ni\n", __FUNCTION__); 2463 m = ctx->m; 2464 a = ctx->a; 2465 err = ntfs_attr_record_make_space(m, a, val_ofs + val_len); 2466 if (err) { 2467 ntfs_inode *eni; 2468 2469 if (err != ENOSPC) 2470 panic("%s(): err != ENOSPC\n", __FUNCTION__); 2471 /* 2472 * There was not enough space in the mft record to insert the 2473 * new attribute record which means we will need to insert it 2474 * into an extent mft record. 2475 * 2476 * To avoid bugs and impossible situations, check that the 2477 * attribute is not already the only attribute in the mft 2478 * record otherwise moving it would not give us anything. 2479 */ 2480 if (ntfs_attr_record_is_only_one(m, a)) 2481 panic("%s(): ntfs_attr_record_is_only_one(m, a)\n", 2482 __FUNCTION__); 2483 /* 2484 * Before we can allocate an extent mft record, we need to 2485 * ensure that the inode has an attribute list attribute. 2486 */ 2487 if (!NInoAttrList(ni)) { 2488 err = ntfs_attr_list_add(ni, m, NULL); 2489 if (err) { 2490 ntfs_error(vol->mp, "Failed to add attribute " 2491 "list attribute to mft_no " 2492 "0x%llx (error %d).", 2493 (unsigned long long)ni->mft_no, 2494 err); 2495 return err; 2496 } 2497 /* 2498 * Adding the attribute list attribute may have 2499 * generated enough space in the base mft record to 2500 * fit the attribute so try again. 2501 */ 2502 ntfs_attr_search_ctx_reinit(ctx); 2503 err = ntfs_attr_lookup(type, name, name_len, 0, val, 2504 val_len, ctx); 2505 if (err == ENOENT) { 2506 /* 2507 * The current implementation of 2508 * ntfs_attr_lookup() will always return 2509 * pointing into the base mft record when an 2510 * attribute is not found. 2511 */ 2512 if (m != ctx->m) 2513 panic("%s(): m != ctx->m\n", 2514 __FUNCTION__); 2515 goto retry; 2516 } 2517 /* 2518 * We cannot have found the attribute as we have 2519 * exclusive access and know that it does not exist 2520 * already. 2521 */ 2522 if (!err) 2523 panic("%s(): !err\n", __FUNCTION__); 2524 /* 2525 * Something has gone wrong. Note we have to bail out 2526 * as a failing attribute lookup indicates corruption 2527 * and/or disk failure and/or not enough memory all of 2528 * which would prevent us from rolling back the 2529 * attribute list attribute addition. 2530 */ 2531 ntfs_error(vol->mp, "Failed to add attribute type " 2532 "0x%x to mft_no 0x%llx because looking " 2533 "up the attribute failed (error %d).", 2534 (unsigned)le32_to_cpu(type), 2535 (unsigned long long)ni->mft_no, -err); 2536 return err; 2537 } 2538 /* 2539 * We now need to allocate a new extent mft record, attach it 2540 * to the base ntfs inode and set up the search context to 2541 * point to it, then insert the new attribute into it. 2542 */ 2543 err = ntfs_mft_record_alloc(vol, NULL, NULL, ni, &eni, &m, &a); 2544 if (err) { 2545 ntfs_error(vol->mp, "Failed to add attribute type " 2546 "0x%x to mft_no 0x%llx because " 2547 "allocating a new extent mft record " 2548 "failed (error %d).", 2549 (unsigned)le32_to_cpu(type), 2550 (unsigned long long)ni->mft_no, err); 2551 /* 2552 * If we added the attribute list attribute above we 2553 * now remove it again. This may require moving 2554 * attributes back into the base mft record so is not a 2555 * trivial amount of work and in the end it does not 2556 * really matter if we leave an inode with an attribute 2557 * list attribute that does not really need it. So it 2558 * will only be removed if there are no extent mft 2559 * records at all, i.e. if adding the attribute list 2560 * attribute did not cause any attribute records to be 2561 * moved out to extent mft records. 2562 */ 2563 al_entry_added = FALSE; 2564 al_entry = NULL; 2565 goto remove_al; 2566 } 2567 ctx->m = m; 2568 ctx->a = a; 2569 ctx->ni = eni; 2570 /* 2571 * Make space for the new attribute. This cannot fail as we 2572 * now have an empty mft record which by definition can hold 2573 * a maximum size resident attribute record. 2574 */ 2575 err = ntfs_attr_record_make_space(m, a, val_ofs + val_len); 2576 if (err) 2577 panic("%s(): err (ntfs_attr_record_make_space())\n", 2578 __FUNCTION__); 2579 } 2580 /* 2581 * Now setup the new attribute record. The entire attribute has been 2582 * zeroed and the length of the attribute record has been set up by 2583 * ntfs_attr_record_make_space(). 2584 */ 2585 a->type = type; 2586 a->name_length = name_len; 2587 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, reservedR) + 2588 sizeof(a->reservedR)); 2589 a->instance = m->next_attr_instance; 2590 /* 2591 * Increment the next attribute instance number in the mft record as we 2592 * consumed the old one. 2593 */ 2594 m->next_attr_instance = cpu_to_le16( 2595 (le16_to_cpu(m->next_attr_instance) + 1) & 0xffff); 2596 a->value_length = cpu_to_le32(val_len); 2597 a->value_offset = cpu_to_le16(val_ofs); 2598 if (type == AT_FILENAME) 2599 a->resident_flags = RESIDENT_ATTR_IS_INDEXED; 2600 /* Copy the attribute name into place. */ 2601 if (name_len) 2602 memcpy((u8*)a + name_ofs, name, 2603 name_len << NTFSCHAR_SIZE_SHIFT); 2604 /* If a value is specified, copy it into place. */ 2605 if (val) { 2606 memcpy((u8*)a + le16_to_cpu(a->value_offset), val, val_len); 2607 /* 2608 * Ensure the mft record containing the new filename attribute 2609 * gets written out. 2610 */ 2611 if (ctx->ni != ni) 2612 NInoSetMrecNeedsDirtying(ctx->ni); 2613 } 2614 /* 2615 * If the inode does not use the attribute list attribute we are done. 2616 * 2617 * If the inode uses the attribute list attribute (including the case 2618 * where we just created it), we need to add an attribute list 2619 * attribute entry for the attribute. 2620 */ 2621 if (!NInoAttrList(ni)) 2622 goto done; 2623 /* Add an attribute list attribute entry for the inserted attribute. */ 2624 al_entry = ctx->al_entry; 2625 al_entry_used = offsetof(ATTR_LIST_ENTRY, name) + 2626 (name_len << NTFSCHAR_SIZE_SHIFT); 2627 al_entry_len = (al_entry_used + 7) & ~7; 2628 new_al_size = ni->attr_list_size + al_entry_len; 2629 /* Out of bounds checks. */ 2630 if ((u8*)al_entry < ni->attr_list || 2631 (u8*)al_entry > ni->attr_list + new_al_size || 2632 (u8*)al_entry + al_entry_len > 2633 ni->attr_list + new_al_size) { 2634 /* Inode is corrupt. */ 2635 ntfs_error(vol->mp, "Mft_no 0x%llx is corrupt. Run chkdsk.", 2636 (unsigned long long)ni->mft_no); 2637 err = EIO; 2638 goto undo; 2639 } 2640 err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST, new_al_size); 2641 if (err) { 2642 if (err == ERANGE) { 2643 ntfs_error(vol->mp, "Cannot insert attribute into " 2644 "mft_no 0x%llx because the attribute " 2645 "list attribute would become too " 2646 "large. You need to defragment your " 2647 "volume and then try again.", 2648 (unsigned long long)ni->mft_no); 2649 err = ENOSPC; 2650 } else { 2651 ntfs_error(vol->mp, "Attribute list attribute is " 2652 "unknown on the volume. The volume " 2653 "is corrupt. Run chkdsk."); 2654 NVolSetErrors(vol); 2655 err = EIO; 2656 } 2657 goto undo; 2658 } 2659 /* 2660 * Reallocate the memory buffer if needed and create space for the new 2661 * entry. 2662 */ 2663 new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) & 2664 ~(NTFS_ALLOC_BLOCK - 1); 2665 if (new_al_alloc > ni->attr_list_alloc) { 2666 u8 *tmp, *al, *al_end; 2667 unsigned al_entry_ofs; 2668 2669 tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag); 2670 if (!tmp) { 2671 ntfs_error(vol->mp, "Not enough memory to extend " 2672 "attribute list attribute of mft_no " 2673 "0x%llx.", 2674 (unsigned long long)ni->mft_no); 2675 err = ENOMEM; 2676 goto undo; 2677 } 2678 al = ni->attr_list; 2679 al_entry_ofs = (u8*)al_entry - al; 2680 al_end = al + ni->attr_list_size; 2681 memcpy(tmp, al, al_entry_ofs); 2682 if ((u8*)al_entry < al_end) 2683 memcpy(tmp + al_entry_ofs + al_entry_len, 2684 al + al_entry_ofs, 2685 ni->attr_list_size - al_entry_ofs); 2686 al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)(tmp + 2687 al_entry_ofs); 2688 OSFree(ni->attr_list, ni->attr_list_alloc, ntfs_malloc_tag); 2689 ni->attr_list_alloc = new_al_alloc; 2690 ni->attr_list = tmp; 2691 } else if ((u8*)al_entry < ni->attr_list + ni->attr_list_size) 2692 memmove((u8*)al_entry + al_entry_len, al_entry, 2693 ni->attr_list_size - ((u8*)al_entry - 2694 ni->attr_list)); 2695 ni->attr_list_size = new_al_size; 2696 /* Set up the attribute list entry. */ 2697 al_entry->type = type; 2698 al_entry->length = cpu_to_le16(al_entry_len); 2699 al_entry->name_length = name_len; 2700 al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name); 2701 al_entry->lowest_vcn = 0; 2702 al_entry->mft_reference = MK_LE_MREF(ctx->ni->mft_no, ctx->ni->seq_no); 2703 al_entry->instance = a->instance; 2704 /* Copy the attribute name into place. */ 2705 if (name_len) 2706 memcpy((u8*)&al_entry->name, name, 2707 name_len << NTFSCHAR_SIZE_SHIFT); 2708 /* For tidyness, zero any unused space. */ 2709 if (al_entry_len != al_entry_used) { 2710 if (al_entry_len < al_entry_used) 2711 panic("%s(): al_entry_len < al_entry_used\n", 2712 __FUNCTION__); 2713 memset((u8*)al_entry + al_entry_used, 0, 2714 al_entry_len - al_entry_used); 2715 } 2716 /* 2717 * Extend the attribute list attribute and copy in the modified 2718 * value from the cache. 2719 */ 2720 err = ntfs_attr_list_sync_extend(ni, base_m, 2721 (u8*)al_entry - ni->attr_list, ctx); 2722 if (err) { 2723 ntfs_error(vol->mp, "Failed to extend attribute list " 2724 "attribute of mft_no 0x%llx (error %d).", 2725 (unsigned long long)ni->mft_no, err); 2726 al_entry_added = TRUE; 2727 goto undo_al; 2728 } 2729done: 2730 ntfs_debug("Done."); 2731 return 0; 2732undo: 2733 al_entry_added = FALSE; 2734undo_al: 2735 /* 2736 * Need to remove the attribute again or free the extent mft record if 2737 * there are no attributes remaining in it. 2738 */ 2739 if (m == base_m || !ntfs_attr_record_is_only_one(m, a)) { 2740 ntfs_attr_record_delete_internal(m, a); 2741 /* 2742 * If the attribute was not in the base mft record mark the 2743 * extent mft record dirty so it gets written out later. If 2744 * the attribute was in the base mft record it will be marked 2745 * dirty later. 2746 * 2747 * We also unmap the extent mft record and we set @ctx->ni to 2748 * equal the base inode @ni so that the search context is 2749 * initialized from scratch or simply freed if the caller 2750 * reinitializes or releases the search context respectively. 2751 */ 2752 if (m != base_m) { 2753 NInoSetMrecNeedsDirtying(ctx->ni); 2754 ntfs_extent_mft_record_unmap(ctx->ni); 2755 ctx->ni = ni; 2756 } 2757 } else { 2758 int err2; 2759 BOOL al_needed; 2760 2761 err2 = ntfs_extent_mft_record_free(ni, ctx->ni, m); 2762 if (err2) { 2763 /* 2764 * Ignore the error as we just end up with an unused 2765 * mft record that is marked in use. 2766 */ 2767 ntfs_error(vol->mp, "Failed to free extent mft_no " 2768 "0x%llx (error %d). Unmount and run " 2769 "chkdsk to recover the lost inode.", 2770 (unsigned long long)ctx->ni->mft_no, 2771 err2); 2772 NVolSetErrors(vol); 2773 /* 2774 * Relese the extent mft record after dirtying it thus 2775 * simulating the effect of freeing it. 2776 */ 2777 NInoSetMrecNeedsDirtying(ctx->ni); 2778 ntfs_extent_mft_record_unmap(ctx->ni); 2779 } 2780 /* 2781 * The attribute search context still points to the no longer 2782 * mapped extent inode thus we need to change it to point to 2783 * the base inode instead so the context can be reinitialized 2784 * or released safely. 2785 */ 2786 ctx->ni = ni; 2787remove_al: 2788 /* 2789 * Check the attribute list attribute. If there are no other 2790 * attribute list attribute entries referencing extent mft 2791 * records delete the attribute list attribute altogether. 2792 * 2793 * If this fails it does not matter as we simply retain the 2794 * attribute list attribute so we ignore the error and go on to 2795 * delete the attribute list attribute entry instead. 2796 * 2797 * If there are other attribute list attribute entries 2798 * referencing extent mft records we still need the attribute 2799 * list attribute thus we go on to delete the attribute list 2800 * entry corresponding to the attribute record we just deleted 2801 * by freeing its extent mft record. 2802 */ 2803 err2 = ntfs_attr_list_is_needed(ni, 2804 al_entry_added ? al_entry : NULL, &al_needed); 2805 if (err2) 2806 ntfs_warning(vol->mp, "Failed to determine if " 2807 "attribute list attribute of mft_no " 2808 "0x%llx if still needed (error %d). " 2809 "Assuming it is still needed and " 2810 "continuing.", 2811 (unsigned long long)ni->mft_no, err2); 2812 else if (!al_needed) { 2813 /* 2814 * No more extent mft records are in use. Delete the 2815 * attribute list attribute. 2816 */ 2817 ntfs_attr_search_ctx_reinit(ctx); 2818 err2 = ntfs_attr_list_delete(ni, ctx); 2819 if (!err2) { 2820 /* 2821 * We deleted the attribute list attribute and 2822 * this will have updated the base inode 2823 * appropriately thus we have restored 2824 * everything as it was before. 2825 */ 2826 return err; 2827 } 2828 ntfs_warning(vol->mp, "Failed to delete attribute " 2829 "list attribute of mft_no 0x%llx " 2830 "(error %d). Continuing using " 2831 "alternative error recovery method.", 2832 (unsigned long long)ni->mft_no, err2); 2833 } 2834 } 2835 /* 2836 * Both @ctx and @ni are now invalid and cannot be used any more which 2837 * is fine as we have finished dealing with the attribute record. 2838 * 2839 * We now need to delete the corresponding attribute list attribute 2840 * entry if we created it. 2841 * 2842 * Then we need to rewrite the attribute list attribute again because 2843 * ntfs_attr_list_sync_extend() may have left it in an indeterminate 2844 * state. 2845 */ 2846 if (al_entry_added) { 2847 int err2; 2848 2849 ntfs_attr_list_entry_delete(ni, al_entry); 2850 ntfs_attr_search_ctx_reinit(ctx); 2851 err2 = ntfs_attr_list_sync_shrink(ni, 0, ctx); 2852 if (err2) { 2853 ntfs_error(vol->mp, "Failed to restore attribute list " 2854 "attribute in base mft_no 0x%llx " 2855 "(error %d). Leaving inconsistent " 2856 "metadata. Unmount and run chkdsk.", 2857 (unsigned long long)ni->mft_no, err2); 2858 NVolSetErrors(vol); 2859 } 2860 } 2861 /* Make sure any changes are written out. */ 2862 NInoSetMrecNeedsDirtying(ni); 2863 return err; 2864} 2865 2866/** 2867 * ntfs_resident_attr_value_resize - resize the value of a resident attribute 2868 * @m: mft record containing attribute record 2869 * @a: attribute record whose value to resize 2870 * @new_size: new size in bytes to which to resize the attribute value of @a 2871 * 2872 * Resize the value of the attribute @a in the mft record @m to @new_size 2873 * bytes. If the value is made bigger, the newly allocated space is cleared. 2874 * 2875 * Return 0 on success and errno on error. The following error codes are 2876 * defined: 2877 * ENOSPC - Not enough space in the mft record @m to perform the resize. 2878 * 2879 * Note: On error, no modifications have been performed whatsoever. 2880 * 2881 * Warning: If you make a record smaller without having copied all the data you 2882 * are interested in the data may be overwritten. 2883 */ 2884errno_t ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, 2885 const u32 new_size) 2886{ 2887 const u32 old_size = le32_to_cpu(a->value_length); 2888 2889 /* Resize the resident part of the attribute record. */ 2890 if (ntfs_attr_record_resize(m, a, le16_to_cpu(a->value_offset) + 2891 new_size)) 2892 return ENOSPC; 2893 /* 2894 * The resize succeeded! If we made the attribute value bigger, clear 2895 * the area between the old size and @new_size. 2896 */ 2897 if (new_size > old_size) 2898 bzero((u8*)a + le16_to_cpu(a->value_offset) + old_size, 2899 new_size - old_size); 2900 /* Finally update the length of the attribute value. */ 2901 a->value_length = cpu_to_le32(new_size); 2902 return 0; 2903} 2904 2905/** 2906 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute 2907 * @ni: ntfs inode describing the attribute to convert 2908 * 2909 * Convert the resident ntfs attribute described by the ntfs inode @ni to a 2910 * non-resident one. 2911 * 2912 * Return 0 on success and errno on error. The following error return codes 2913 * are defined: 2914 * EPERM - The attribute is not allowed to be non-resident. 2915 * ENOMEM - Not enough memory. 2916 * ENOSPC - Not enough disk space. 2917 * EINVAL - Attribute not defined on the volume. 2918 * EIO - I/o error or other error. 2919 * 2920 * Note that if an error other than EPERM is returned it is possible that the 2921 * attribute has been made non-resident but for example the attribute list 2922 * attribute failed to be written out thus the base mft record is now corrupt 2923 * and all operations should be aborted by the caller. 2924 * 2925 * Locking: The caller must hold @ni->lock on the inode for writing. 2926 */ 2927errno_t ntfs_attr_make_non_resident(ntfs_inode *ni) 2928{ 2929 leMFT_REF mref; 2930 s64 new_size, data_size; 2931 ntfs_volume *vol = ni->vol; 2932 ntfs_inode *base_ni; 2933 MFT_RECORD *base_m, *m; 2934 ATTR_RECORD *a; 2935 upl_t upl; 2936 upl_page_info_array_t pl; 2937 u8 *kaddr, *al_end; 2938 unsigned mp_size, mp_ofs, name_ofs, arec_size, attr_size, bytes_needed; 2939 unsigned al_ofs = 0; 2940 errno_t err, err2; 2941 le32 type; 2942 u8 old_res_attr_flags; 2943 ntfs_attr_search_ctx ctx, actx; 2944 BOOL al_dirty = FALSE; 2945 2946 /* Check that the attribute is allowed to be non-resident. */ 2947 err = ntfs_attr_can_be_non_resident(vol, ni->type); 2948 if (err) { 2949 if (err == EPERM) 2950 ntfs_debug("Attribute is not allowed to be " 2951 "non-resident."); 2952 else 2953 ntfs_debug("Attribute not defined on the NTFS " 2954 "volume!"); 2955 return err; 2956 } 2957 /* 2958 * FIXME: Compressed and encrypted attributes are not supported when 2959 * writing and we should never have gotten here for them. 2960 */ 2961 if (NInoCompressed(ni)) 2962 panic("%s(): NInoCompressed(ni)\n", __FUNCTION__); 2963 if (NInoEncrypted(ni)) 2964 panic("%s(): NInoEncrypted(ni)\n", __FUNCTION__); 2965 /* 2966 * The size needs to be aligned to a cluster boundary for allocation 2967 * purposes. 2968 */ 2969 lck_spin_lock(&ni->size_lock); 2970 data_size = ni->data_size; 2971 lck_spin_unlock(&ni->size_lock); 2972 new_size = (data_size + vol->cluster_size_mask) & 2973 ~vol->cluster_size_mask; 2974 lck_rw_lock_exclusive(&ni->rl.lock); 2975 if (ni->rl.elements) 2976 panic("%s(): ni->rl.elements\n", __FUNCTION__); 2977 upl = NULL; 2978 if (new_size > 0) { 2979 /* Start by allocating clusters to hold the attribute value. */ 2980 err = ntfs_cluster_alloc(vol, 0, new_size >> 2981 vol->cluster_size_shift, -1, DATA_ZONE, TRUE, 2982 &ni->rl); 2983 if (err) { 2984 if (err != ENOSPC) 2985 ntfs_error(vol->mp, "Failed to allocate " 2986 "cluster%s, error code %d.", 2987 (new_size >> 2988 vol->cluster_size_shift) > 1 ? 2989 "s" : "", err); 2990 goto unl_err; 2991 } 2992 /* 2993 * Will need the page later and since the page lock nests 2994 * outside all ntfs locks, we need to get the page now. 2995 */ 2996 err = ntfs_page_grab(ni, 0, &upl, &pl, &kaddr, TRUE); 2997 if (err) 2998 goto page_err; 2999 } 3000 /* Determine the size of the mapping pairs array. */ 3001 err = ntfs_get_size_for_mapping_pairs(vol, 3002 ni->rl.elements ? ni->rl.rl : NULL, 0, -1, &mp_size); 3003 if (err) { 3004 ntfs_error(vol->mp, "Failed to get size for mapping pairs " 3005 "array (error %d).", err); 3006 goto rl_err; 3007 } 3008 base_ni = ni; 3009 if (NInoAttr(ni)) 3010 base_ni = ni->base_ni; 3011 err = ntfs_mft_record_map(base_ni, &base_m); 3012 if (err) 3013 goto rl_err; 3014 ntfs_attr_search_ctx_init(&ctx, base_ni, base_m); 3015 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 3016 &ctx); 3017 if (err) { 3018 if (err == ENOENT) 3019 err = EIO; 3020 goto unm_err; 3021 } 3022 m = ctx.m; 3023 a = ctx.a; 3024 if (NInoNonResident(ni)) 3025 panic("%s(): NINonResident(ni)\n", __FUNCTION__); 3026 if (a->non_resident) 3027 panic("%s(): a->non_resident\n", __FUNCTION__); 3028 /* Calculate new offsets for the name and the mapping pairs array. */ 3029 name_ofs = offsetof(ATTR_REC, compressed_size); 3030 if (NInoSparse(ni) || NInoCompressed(ni)) 3031 name_ofs += sizeof(a->compressed_size); 3032 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; 3033 /* 3034 * Determine the size of the resident part of the now non-resident 3035 * attribute record. 3036 */ 3037 arec_size = (mp_ofs + mp_size + 7) & ~7; 3038 /* 3039 * If the page is not uptodate bring it uptodate by copying from the 3040 * attribute value. 3041 */ 3042 attr_size = le32_to_cpu(a->value_length); 3043 if (attr_size != data_size) 3044 panic("%s(): attr_size != data_size\n", __FUNCTION__); 3045 if (upl && !upl_valid_page(pl, 0)) { 3046 memcpy(kaddr, (u8*)a + le16_to_cpu(a->value_offset), 3047 attr_size); 3048 bzero(kaddr + attr_size, PAGE_SIZE - attr_size); 3049 } 3050 /* Backup the attribute flags. */ 3051 old_res_attr_flags = a->resident_flags; 3052retry_resize: 3053 /* Resize the resident part of the attribute record. */ 3054 err = ntfs_attr_record_resize(m, a, arec_size); 3055 if (!err) { 3056 al_ofs = 0; 3057 goto do_switch; 3058 } 3059 if (err != ENOSPC) 3060 panic("%s(): err != ENOSPC\n", __FUNCTION__); 3061 /* 3062 * The attribute record size required cannot be larger than the amount 3063 * of space in an mft record. 3064 */ 3065 if (arec_size > le32_to_cpu(m->bytes_allocated) - 3066 le16_to_cpu(m->attrs_offset)) 3067 panic("%s(): arec_size > le32_to_cpu(m->bytes_allocated) - " 3068 "le16_to_cpu(m->attrs_offset)\n", 3069 __FUNCTION__); 3070 /* 3071 * To make space in the mft record we would like to try to make other 3072 * attributes non-resident if that would save space. 3073 * 3074 * FIXME: We cannot do this at present unless the attribute is the 3075 * attribute being resized as there could be an ntfs inode matching 3076 * this attribute in memory and it would become out of date with its 3077 * metadata if we touch its attribute record. 3078 * 3079 * FIXME: We do not need to do this if this is the attribute being 3080 * resized as we already tried to make the attribute non-resident and 3081 * it did not work or we would never have gotten here in the first 3082 * place. 3083 * 3084 * Thus we have to either move other attributes to extent mft records 3085 * thus making more space in the base mft record or we have to move the 3086 * attribute being resized to an extent mft record thus giving it more 3087 * space. In any case we need to have an attribute list attribute so 3088 * start by adding it if it does not yet exist. 3089 * 3090 * If the addition succeeds but the remapping of the extent mft record 3091 * fails (i.e. the !err && IS_ERR(ctx.m) case below) we bail out 3092 * without trying to remove the attribute list attribute because to do 3093 * so we would have to map the extent mft record in order to move the 3094 * attribute(s) in it back into the base mft record and we know the 3095 * mapping just failed so it is unlikely to succeed now. In any case 3096 * the metadata is consistent we just cannot make further progress. 3097 */ 3098 if (!NInoAttrList(base_ni)) { 3099 err = ntfs_attr_list_add(base_ni, base_m, &ctx); 3100 if (err || ctx.is_error) { 3101 if (!err) 3102 err = ctx.error; 3103 ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error " 3104 "%d).", ctx.is_error ? 3105 "remap extent mft record of" : 3106 "add attribute list attribute to", 3107 (unsigned long long)base_ni->mft_no, 3108 err); 3109 goto unm_err; 3110 } 3111 /* 3112 * The attribute location will have changed so update it from 3113 * the search context. 3114 */ 3115 m = ctx.m; 3116 a = ctx.a; 3117 /* 3118 * Check that the logic in ntfs_attr_list_add() has not changed 3119 * without the code here being updated. At present it will 3120 * never make resident attributes non-resident. 3121 */ 3122 if (a->non_resident) 3123 panic("%s(): a->non_resident\n", __FUNCTION__); 3124 /* 3125 * We now have an attribute list attribute. This may have 3126 * caused the attribute to be made non-resident to be moved out 3127 * to an extent mft record in which case there would now be 3128 * enough space to resize the attribute record. 3129 * 3130 * Alternatively some other large attribute may have been moved 3131 * out to an extent mft record thus generating enough space in 3132 * the base mft record for the attribute to be made 3133 * non-resident. 3134 * 3135 * In either case we simply want to retry the resize. 3136 */ 3137 goto retry_resize; 3138 } 3139 /* 3140 * We now know we have an attribute list attribute and that we still do 3141 * not have enough space to make the attribute non-resident. 3142 * 3143 * As discussed above we need to start moving attributes out of the 3144 * base mft record to make enough space. 3145 * 3146 * Note that if the attribute to be made non-resident had been moved 3147 * out of the base mft record we would then have had enough space for 3148 * the resize thus we would never have gotten here. We detect this 3149 * case and BUG() in case we change the logic in ntfs_attr_list_add() 3150 * some day to remind us to update the code here to match. 3151 */ 3152 if (ctx.ni != base_ni) 3153 panic("%s(): ctx.ni != base_ni\n", __FUNCTION__); 3154 /* 3155 * If this is the only attribute record in the mft record we cannot 3156 * gain anything by moving it or anything else. This really cannot 3157 * happen as we ensure above that the attribute is in the base mft 3158 * record. 3159 */ 3160 if (ntfs_attr_record_is_only_one(m, a)) 3161 panic("%s(): ntfs_attr_record_is_only_one(m, a)\n", 3162 __FUNCTION__); 3163 /* 3164 * If the attribute to be resized is the standard information, index 3165 * root, or unnamed $DATA attribute try to move other attributes out 3166 * into extent mft records. If none of these then move the attribute 3167 * to be resized out to an extent mft record. 3168 */ 3169 type = ni->type; 3170 if (type != AT_STANDARD_INFORMATION && type != AT_INDEX_ROOT && 3171 (type != AT_DATA || ni->name_len)) { 3172 lck_rw_lock_shared(&base_ni->attr_list_rl.lock); 3173 err = ntfs_attr_record_move(&ctx); 3174 lck_rw_unlock_shared(&base_ni->attr_list_rl.lock); 3175 if (!err) { 3176 /* The attribute has moved so update our variables. */ 3177 m = ctx.m; 3178 a = ctx.a; 3179 /* The resize will now succeed. */ 3180 goto retry_resize; 3181 } 3182 ntfs_error(vol->mp, "Failed to move attribute type 0x%x out " 3183 "of base mft_no 0x%llx into an extent mft " 3184 "record (error %d).", le32_to_cpu(type), 3185 base_ni->mft_no, err); 3186 goto unm_err; 3187 } 3188 type = AT_UNUSED; 3189 /* 3190 * The number of free bytes needed in the mft record so the resize can 3191 * succeed. 3192 */ 3193 bytes_needed = arec_size - le32_to_cpu(a->length); 3194 /* 3195 * The MFT reference of the mft record in which the attribute to be 3196 * made non-resident is located. 3197 */ 3198 mref = MK_LE_MREF(base_ni->mft_no, base_ni->seq_no); 3199 al_ofs = base_ni->attr_list_size; 3200 al_end = base_ni->attr_list + al_ofs; 3201next_pass: 3202 ntfs_attr_search_ctx_init(&actx, base_ni, base_m); 3203 actx.is_iteration = 1; 3204 do { 3205 ntfschar *a_name; 3206 ATTR_LIST_ENTRY *al_entry; 3207 3208 /* Get the next attribute in the mft record. */ 3209 err = ntfs_attr_find_in_mft_record(type, NULL, 0, NULL, 0, 3210 &actx); 3211 if (err) { 3212 if (err == ENOENT) { 3213 /* 3214 * If we have more passes to go do the next 3215 * pass which will try harder to move things 3216 * out of the way. 3217 */ 3218 if (type == AT_UNUSED) { 3219 type = AT_DATA; 3220 goto next_pass; 3221 } 3222 /* 3223 * TODO: Need to get these cases triggered and 3224 * then need to run chkdsk to check for 3225 * validity of moving these attributes out of 3226 * the base mft record. 3227 */ 3228 if (type == AT_DATA) { 3229 type = AT_INDEX_ROOT; 3230 goto next_pass; 3231 } 3232 if (type == AT_INDEX_ROOT) { 3233 type = AT_STANDARD_INFORMATION; 3234 goto next_pass; 3235 } 3236 /* 3237 * We can only get here when the attribute to 3238 * be made non-resident is the standard 3239 * information attribute and for some reason it 3240 * does not exist in the mft record. That can 3241 * only happen with some sort of corruption or 3242 * due to a bug. 3243 */ 3244 ntfs_error(vol->mp, "Standard information " 3245 "attribute is missing from " 3246 "mft_no 0x%llx. Run chkdsk.", 3247 (unsigned long long) 3248 base_ni->mft_no); 3249 err = EIO; 3250 NVolSetErrors(vol); 3251 goto unm_err; 3252 } 3253 ntfs_error(vol->mp, "Failed to iterate over attribute " 3254 "records in base mft record 0x%llx " 3255 "(error %d).", 3256 (unsigned long long)base_ni->mft_no, 3257 err); 3258 goto unm_err; 3259 } 3260 a = actx.a; 3261 if (type == AT_UNUSED) { 3262 /* 3263 * Skip the attribute list attribute itself as that is 3264 * not represented inside itself and we cannot move it 3265 * out anyway. 3266 * 3267 * Also, do not touch standard information, index root, 3268 * and unnamed $DATA attributes. They will be moved 3269 * out to extent mft records in later passes if really 3270 * necessary. 3271 */ 3272 if (a->type == AT_ATTRIBUTE_LIST || 3273 a->type == AT_STANDARD_INFORMATION || 3274 a->type == AT_INDEX_ROOT || 3275 (a->type == AT_DATA && 3276 !a->name_length)) 3277 continue; 3278 } 3279 /* 3280 * Move the attribute out to an extent mft record and update 3281 * its attribute list entry. 3282 * 3283 * But first find the attribute list entry matching the 3284 * attribute record so it can be updated. 3285 */ 3286 a_name = (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset)); 3287 al_entry = (ATTR_LIST_ENTRY*)base_ni->attr_list; 3288 do { 3289 /* 3290 * The attribute must be present in the attribute list 3291 * attribute or something is corrupt. 3292 */ 3293 if ((u8*)al_entry >= al_end || !al_entry->length) { 3294 ntfs_error(vol->mp, "Attribute type 0x%x not " 3295 "found in attribute list " 3296 "attribute of base mft record " 3297 "0x%llx. Run chkdsk.", 3298 (unsigned)le32_to_cpu(a->type), 3299 (unsigned long long) 3300 base_ni->mft_no); 3301 NVolSetErrors(vol); 3302 err = EIO; 3303 goto unm_err; 3304 } 3305 if (al_entry->mft_reference == mref && 3306 al_entry->instance == a->instance) { 3307 /* 3308 * We found the entry, stop looking but first 3309 * perform a quick sanity check that we really 3310 * do have the correct attribute record. 3311 */ 3312 if (al_entry->type == a->type && 3313 ntfs_are_names_equal( 3314 (ntfschar*)((u8*)al_entry + 3315 al_entry->name_offset), 3316 al_entry->name_length, a_name, 3317 a->name_length, TRUE, 3318 vol->upcase, vol->upcase_len)) 3319 break; 3320 ntfs_error(vol->mp, "Found corrupt attribute " 3321 "list attribute when looking " 3322 "for attribute type 0x%x in " 3323 "attribute list attribute of " 3324 "base mft record 0x%llx. Run " 3325 "chkdsk.", 3326 (unsigned)le32_to_cpu(a->type), 3327 (unsigned long long) 3328 base_ni->mft_no); 3329 NVolSetErrors(vol); 3330 err = EIO; 3331 goto unm_err; 3332 } 3333 /* Go to the next attribute list entry. */ 3334 al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + 3335 le16_to_cpu(al_entry->length)); 3336 } while (1); 3337 /* Finally, move the attribute to an extent record. */ 3338 err = ntfs_attr_record_move_for_attr_list_attribute(&actx, 3339 al_entry, &ctx, NULL); 3340 if (err) { 3341 ntfs_error(vol->mp, "Failed to move attribute type " 3342 "0x%x out of base mft record 0x%llx " 3343 "and into an extent mft record (error " 3344 "%d). Run chkdsk.", 3345 (unsigned)le32_to_cpu(a->type), 3346 (unsigned long long)base_ni->mft_no, 3347 err); 3348 NVolSetErrors(vol); 3349 goto unm_err; 3350 } 3351 /* 3352 * If the modified attribute list entry is before the current 3353 * start of attribute list modification we need to sync this 3354 * entry as well. For simplicity we just set @al_ofs to the 3355 * new value thus syncing everything starting at that offset. 3356 */ 3357 if ((u8*)al_entry - base_ni->attr_list < (long)al_ofs) { 3358 al_ofs = (u8*)al_entry - base_ni->attr_list; 3359 al_dirty = TRUE; 3360 } 3361 /* 3362 * If we moved the attribute to be made non-resident we will 3363 * now have enough space so retry the resize. 3364 */ 3365 if (ctx.ni != base_ni) { 3366 /* 3367 * @ctx is not in the base mft record, map the extent 3368 * inode it is in and if it is mapped at a different 3369 * address than before update the pointers in @ctx. 3370 */ 3371retry_map: 3372 err = ntfs_mft_record_map(ctx.ni, &m); 3373 if (err) { 3374 /* 3375 * Something bad has happened. If out of 3376 * memory retry till it succeeds. Any other 3377 * errors are fatal and we have to abort. 3378 * 3379 * We do not need to undo anything as the 3380 * metadata is self-consistent except for the 3381 * attribute list attribute which we need to 3382 * write out. 3383 */ 3384 if (err == ENOMEM) { 3385 (void)thread_block( 3386 THREAD_CONTINUE_NULL); 3387 goto retry_map; 3388 } 3389 ctx.ni = base_ni; 3390 goto unm_err; 3391 } 3392 if (ctx.m != m) { 3393 ctx.a = (ATTR_RECORD*)((u8*)m + 3394 ((u8*)ctx.a - (u8*)ctx.m)); 3395 ctx.m = m; 3396 } 3397 a = ctx.a; 3398 goto retry_resize; 3399 } 3400 /* If we now have enough space retry the resize. */ 3401 if (bytes_needed > le32_to_cpu(m->bytes_allocated) - 3402 le32_to_cpu(m->bytes_in_use)) { 3403 a = ctx.a; 3404 goto retry_resize; 3405 } 3406 } while (1); 3407do_switch: 3408 /* 3409 * Convert the resident part of the attribute record to describe a 3410 * non-resident attribute. 3411 */ 3412 a->non_resident = 1; 3413 /* Move the attribute name if it exists and update the offset. */ 3414 if (a->name_length) 3415 memmove((u8*)a + name_ofs, 3416 (u8*)a + le16_to_cpu(a->name_offset), 3417 a->name_length * sizeof(ntfschar)); 3418 a->name_offset = cpu_to_le16(name_ofs); 3419 /* Setup the fields specific to non-resident attributes. */ 3420 a->lowest_vcn = 0; 3421 a->highest_vcn = cpu_to_sle64((new_size - 1) >> 3422 vol->cluster_size_shift); 3423 a->mapping_pairs_offset = cpu_to_le16(mp_ofs); 3424 bzero(&a->reservedN, sizeof(a->reservedN)); 3425 a->allocated_size = cpu_to_sle64(new_size); 3426 a->data_size = a->initialized_size = cpu_to_sle64(attr_size); 3427 a->compression_unit = 0; 3428 if (NInoSparse(ni) || NInoCompressed(ni)) { 3429 if (NInoCompressed(ni) || vol->major_ver <= 1) 3430 a->compression_unit = NTFS_COMPRESSION_UNIT; 3431 a->compressed_size = a->allocated_size; 3432 } 3433 /* 3434 * Generate the mapping pairs array into the attribute record. 3435 * 3436 * This cannot fail as we have already checked the size we need to 3437 * build the mapping pairs array. 3438 */ 3439 err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, arec_size - mp_ofs, 3440 ni->rl.elements ? ni->rl.rl : NULL, 0, -1, NULL); 3441 if (err) 3442 panic("%s(): err\n", __FUNCTION__); 3443 /* Setup the in-memory attribute structure to be non-resident. */ 3444 lck_spin_lock(&ni->size_lock); 3445 ni->allocated_size = new_size; 3446 if (NInoSparse(ni) || NInoCompressed(ni)) { 3447 ni->compressed_size = ni->allocated_size; 3448 if (a->compression_unit) { 3449 ni->compression_block_size = 1U << 3450 (a->compression_unit + 3451 vol->cluster_size_shift); 3452 ni->compression_block_size_shift = 3453 ffs(ni->compression_block_size) - 1; 3454 ni->compression_block_clusters = 1U << 3455 a->compression_unit; 3456 } else { 3457 ni->compression_block_size = 0; 3458 ni->compression_block_size_shift = 0; 3459 ni->compression_block_clusters = 0; 3460 } 3461 } 3462 lck_spin_unlock(&ni->size_lock); 3463 /* 3464 * This needs to be last since we are not allowed to fail once we flip 3465 * this switch. 3466 */ 3467 NInoSetNonResident(ni); 3468 /* Mark the mft record dirty, so it gets written back. */ 3469 NInoSetMrecNeedsDirtying(ctx.ni); 3470 if (ctx.ni != base_ni) 3471 ntfs_extent_mft_record_unmap(ctx.ni); 3472 if (al_dirty) { 3473 ntfs_attr_search_ctx_reinit(&actx); 3474 err = ntfs_attr_list_sync(base_ni, al_ofs, &actx); 3475 if (err) { 3476 ntfs_error(vol->mp, "Failed to write attribute list " 3477 "attribute of mft_no 0x%llx (error " 3478 "%d). Leaving corrupt metadata. Run " 3479 "chkdsk.", 3480 (unsigned long long)base_ni->mft_no, 3481 err); 3482 NVolSetErrors(vol); 3483 } 3484 /* Mark the base mft record dirty, so it gets written back. */ 3485 NInoSetMrecNeedsDirtying(base_ni); 3486 } 3487 ntfs_mft_record_unmap(base_ni); 3488 lck_rw_unlock_exclusive(&ni->rl.lock); 3489 /* 3490 * We have modified the allocated size. If the ntfs inode is the base 3491 * inode, cause the sizes to be written to all the directory index 3492 * entries pointing to the base inode when the inode is written to 3493 * disk. Do not do this for directories as they have both sizes set to 3494 * zero in their index entries. 3495 */ 3496 if (ni == base_ni && !S_ISDIR(ni->mode)) 3497 NInoSetDirtySizes(ni); 3498 if (upl) 3499 ntfs_page_unmap(ni, upl, pl, TRUE); 3500 ntfs_debug("Done."); 3501 return 0; 3502unm_err: 3503 if (ctx.ni != base_ni) { 3504 NInoSetMrecNeedsDirtying(ctx.ni); 3505 ntfs_extent_mft_record_unmap(ctx.ni); 3506 } 3507 if (al_dirty) { 3508 ntfs_attr_search_ctx_reinit(&actx); 3509 err2 = ntfs_attr_list_sync(base_ni, al_ofs, &actx); 3510 if (err2) { 3511 ntfs_error(vol->mp, "Failed to write attribute list " 3512 "attribute in error code path (error " 3513 "%d). Leaving corrupt metadata. Run " 3514 "chkdsk.", err2); 3515 NVolSetErrors(vol); 3516 } 3517 } 3518 NInoSetMrecNeedsDirtying(base_ni); 3519 ntfs_mft_record_unmap(base_ni); 3520rl_err: 3521 if (upl) { 3522 /* 3523 * If the page was valid release it back to the VM. If it was 3524 * not valid throw it away altogether. 3525 * TODO: We could wrap this up in a ntfs_page_unmap_ext() 3526 * function which takes an extra parameter to specify whether 3527 * to keep the page or to dump it if it is invalid... 3528 */ 3529 if (upl_valid_page(pl, 0)) 3530 ntfs_page_unmap(ni, upl, pl, FALSE); 3531 else 3532 ntfs_page_dump(ni, upl, pl); 3533 } 3534page_err: 3535 if (ni->rl.elements > 0) { 3536 err2 = ntfs_cluster_free_from_rl(vol, ni->rl.rl, 0, -1, NULL); 3537 if (err2) { 3538 ntfs_error(vol->mp, "Failed to undo cluster " 3539 "allocation (error %d). Run chkdsk " 3540 "to recover the lost space.", err2); 3541 NVolSetErrors(vol); 3542 } 3543 err2 = ntfs_rl_truncate_nolock(vol, &ni->rl, 0); 3544 if (err2) 3545 panic("%s(): err2\n", __FUNCTION__); 3546 } 3547unl_err: 3548 lck_rw_unlock_exclusive(&ni->rl.lock); 3549 if (err == EINVAL) 3550 err = EIO; 3551 return err; 3552} 3553 3554/** 3555 * ntfs_attr_record_move_for_attr_list_attribute - move an attribute record 3556 * @al_ctx: search context describing the attribute to move 3557 * @al_entry: attribute list entry of the attribute to move 3558 * @ctx: search context of attribute being resized or NULL 3559 * @remap_needed: [OUT] pointer to remap_needed variable or NULL 3560 * 3561 * Move the attribute described by the attribute search context @al_ctx and 3562 * @al_entry from its mft record to a newly allocated extent mft record and 3563 * update @ctx to reflect this fact (if @ctx is not NULL, otherwise it is 3564 * ignored). 3565 * 3566 * If @ctx is present and is the attribute moved out then set *@remap_needed to 3567 * true. If the caller is not interested in this then @remap_needed can be set 3568 * to NULL in which case it is ignored. 3569 * 3570 * Return 0 on success and the negative error code on error. 3571 */ 3572errno_t ntfs_attr_record_move_for_attr_list_attribute( 3573 ntfs_attr_search_ctx *al_ctx, ATTR_LIST_ENTRY *al_entry, 3574 ntfs_attr_search_ctx *ctx, BOOL *remap_needed) 3575{ 3576 ntfs_inode *base_ni, *ni; 3577 MFT_RECORD *m; 3578 ATTR_RECORD *a; 3579 unsigned attr_len; 3580 errno_t err; 3581 3582 base_ni = al_ctx->ni; 3583 ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x.", 3584 (unsigned long long)base_ni->mft_no, 3585 (unsigned)le32_to_cpu(al_entry->type)); 3586 /* 3587 * Allocate a new extent mft record, attach it to the base ntfs inode 3588 * and set up the search context to point to it. 3589 * 3590 * FIXME: We should go through all existing extent mft records which 3591 * will all be attached to @base_ni->extent_nis and for each of them we 3592 * should map the extent mft record, check for free space and if we 3593 * find enough free space for the attribute being moved we should move 3594 * the attribute there instead of allocating a new extent mft record. 3595 */ 3596 err = ntfs_mft_record_alloc(base_ni->vol, NULL, NULL, base_ni, &ni, &m, 3597 &a); 3598 if (err) { 3599 ntfs_error(base_ni->vol->mp, "Failed to move attribute to a " 3600 "new mft record because allocation of the new " 3601 "mft record failed (error %d).", err); 3602 return err; 3603 } 3604 attr_len = le32_to_cpu(al_ctx->a->length); 3605 /* Make space for the attribute extent and copy it into place. */ 3606 err = ntfs_attr_record_make_space(m, a, attr_len); 3607 /* 3608 * This cannot fail as the new mft record must have enough space to 3609 * hold the attribute record given it fitted inside the old mft record. 3610 */ 3611 if (err) 3612 panic("%s(): err\n", __FUNCTION__); 3613 memcpy(a, al_ctx->a, attr_len); 3614 /* Delete the attribute record from the base mft record. */ 3615 ntfs_attr_record_delete_internal(al_ctx->m, al_ctx->a); 3616 /* 3617 * We moved the attribute out of the mft record thus @al_ctx->a now 3618 * points to the next attribute. Since the caller will want to look at 3619 * that next attribute we set @al_ctx->is_first so that the next call 3620 * to ntfs_attr_find_in_mft_record() will return the currently pointed 3621 * at attribute. 3622 */ 3623 al_ctx->is_first = 1; 3624 /* 3625 * Change the moved attribute record to reflect the new sequence number 3626 * and the current attribute list attribute entry to reflect the new 3627 * mft record reference and sequence number. 3628 */ 3629 al_entry->mft_reference = MK_LE_MREF(ni->mft_no, ni->seq_no); 3630 a->instance = al_entry->instance = m->next_attr_instance; 3631 /* 3632 * Increment the next attribute instance number in the mft record as we 3633 * consumed the old one. 3634 */ 3635 m->next_attr_instance = cpu_to_le16( 3636 (le16_to_cpu(m->next_attr_instance) + 1) & 0xffff); 3637 /* 3638 * Ensure the changes make it to disk later and unmap the mft record as 3639 * we do not need it any more right now. 3640 */ 3641 NInoSetMrecNeedsDirtying(ni); 3642 ntfs_extent_mft_record_unmap(ni); 3643 /* 3644 * Update @ctx if the attribute it describes is still in the base mft 3645 * record and the attribute that was deleted was either in front of the 3646 * attribute described by @ctx or it was the attribute described by 3647 * @ctx. 3648 * 3649 * FIXME: When we fix the above FIXME and we thus start to place 3650 * multiple attributes in each extent mft record we will need to update 3651 * @ctx in a more complex fashion here. 3652 */ 3653 if (ctx && ctx->ni == base_ni) { 3654 if ((u8*)al_ctx->a < (u8*)ctx->a) 3655 ctx->a = (ATTR_RECORD*)((u8*)ctx->a - attr_len); 3656 else if (al_ctx->a == ctx->a) { 3657 ctx->m = m; 3658 ctx->a = a; 3659 ctx->ni = ni; 3660 if (remap_needed) 3661 *remap_needed = TRUE; 3662 } 3663 } 3664 ntfs_debug("Done."); 3665 return 0; 3666} 3667 3668/** 3669 * ntfs_attr_record_move - move an attribute record to another mft record 3670 * @ctx: attribute search context describing the attribute to move 3671 * 3672 * Move the attribute described by the attribute search context @ctx from its 3673 * mft record to a newly allocated extent mft record. On successful return 3674 * @ctx is setup to point to the moved attribute. 3675 * 3676 * Return 0 on success and the negative error code on error. On error, the 3677 * attribute search context is invalid and must be either reinitialized or 3678 * released. 3679 * 3680 * NOTE: This function expects that an attribute list attribute is already 3681 * present. 3682 * 3683 * Locking: Caller must hold lock on attribute list attribute runlist, i.e. 3684 * @ctx->base_ni->attr_list_rl.lock. 3685 */ 3686errno_t ntfs_attr_record_move(ntfs_attr_search_ctx *ctx) 3687{ 3688 ntfs_inode *base_ni, *ni; 3689 MFT_RECORD *m; 3690 ATTR_RECORD *a; 3691 u8 *a_copy; 3692 unsigned attr_len; 3693 errno_t err, err2; 3694 ntfs_attr_search_ctx al_ctx; 3695 static const char es[] = " Leaving inconsistent metadata. Unmount " 3696 "and run chkdsk."; 3697 3698 base_ni = ctx->base_ni; 3699 if (!base_ni || !NInoAttrList(base_ni)) 3700 panic("%s(): !base_ni || !NInoAttrList(base_ni)\n", 3701 __FUNCTION__); 3702 ni = ctx->ni; 3703 m = ctx->m; 3704 a = ctx->a; 3705 ntfs_debug("Entering for base mft_no 0x%llx, extent mft_no 0x%llx, " 3706 "attribute type 0x%x.", 3707 (unsigned long long)base_ni->mft_no, 3708 (unsigned long long)ni->mft_no, 3709 (unsigned)le32_to_cpu(a->type)); 3710 attr_len = le32_to_cpu(a->length); 3711 /* Allocate a temporary buffer to hold the attribute to be moved. */ 3712 a_copy = OSMalloc(attr_len, ntfs_malloc_tag); 3713 if (!a_copy) { 3714 ntfs_error(ni->vol->mp, "Not enough memory to allocate " 3715 "temporary attribute buffer."); 3716 return ENOMEM; 3717 } 3718 /* 3719 * Copy the attribute to the temporary buffer and delete it from its 3720 * original mft record. 3721 */ 3722 memcpy(a_copy, a, attr_len); 3723 ntfs_attr_record_delete_internal(m, a); 3724 /* 3725 * This function will never be called if the attribute is the only 3726 * attribute in the mft record as this would not gain anything thus 3727 * report a bug in this case. 3728 */ 3729 if (((ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)))->type == 3730 AT_END) 3731 panic("%s(): Is only attribute in mft record!\n", __FUNCTION__); 3732 /* Ensure the changes make it to disk later. */ 3733 NInoSetMrecNeedsDirtying(ni); 3734 /* 3735 * We have finished with this mft record thus if it is an extent mft 3736 * record we release it. We do this by hand as we want to keep the 3737 * current attribute list attribute entry. 3738 */ 3739 if (ni != base_ni) 3740 ntfs_extent_mft_record_unmap(ni); 3741 /* 3742 * Find the attribute list attribute in the base mft record. Doing 3743 * this now hugely simplifies error handling. 3744 */ 3745 ntfs_attr_search_ctx_init(&al_ctx, base_ni, ctx->base_m); 3746 err = ntfs_attr_find_in_mft_record(AT_ATTRIBUTE_LIST, AT_UNNAMED, 0, 3747 NULL, 0, &al_ctx); 3748 if (err) { 3749 ntfs_error(base_ni->vol->mp, "Failed to move attribute to a " 3750 "new mft record because looking up the " 3751 "attribute list attribute in the base inode " 3752 "failed (error %d).", err); 3753 goto undo_delete; 3754 } 3755 /* 3756 * Allocate a new extent mft record, attach it to the base ntfs inode 3757 * and set up the search context to point to it. 3758 */ 3759 err = ntfs_mft_record_alloc(base_ni->vol, NULL, NULL, base_ni, &ni, &m, 3760 &a); 3761 if (err) { 3762 ntfs_error(base_ni->vol->mp, "Failed to move attribute to a " 3763 "new mft record because allocation of the new " 3764 "mft record failed (error %d).", err); 3765 goto undo_delete; 3766 } 3767 ctx->ni = ni; 3768 ctx->m = m; 3769 ctx->a = a; 3770 /* Make space for the attribute extent and copy it into place. */ 3771 err = ntfs_attr_record_make_space(m, a, attr_len); 3772 /* 3773 * This cannot fail as the new mft record must have enough space to 3774 * hold the attribute record given it fitted inside the old mft record. 3775 */ 3776 if (err) 3777 panic("%s(): err (ntfs_attr_record_make_space())\n", 3778 __FUNCTION__); 3779 memcpy(a, a_copy, attr_len); 3780 /* We do not need the temporary buffer any more. */ 3781 OSFree(a_copy, attr_len, ntfs_malloc_tag); 3782 /* 3783 * Change the moved attribute record to reflect the new sequence number 3784 * and the current attribute list attribute entry to reflect the new 3785 * mft record reference and sequence number. 3786 */ 3787 ctx->al_entry->mft_reference = MK_LE_MREF(ni->mft_no, ni->seq_no); 3788 a->instance = ctx->al_entry->instance = m->next_attr_instance; 3789 /* 3790 * Increment the next attribute instance number in the mft record as we 3791 * consumed the old one. 3792 */ 3793 m->next_attr_instance = cpu_to_le16( 3794 (le16_to_cpu(m->next_attr_instance) + 1) & 0xffff); 3795 /* Ensure the changes make it to disk later. */ 3796 NInoSetMrecNeedsDirtying(ni); 3797 /* 3798 * Finally, sync the modified attribute list attribute from its in- 3799 * memory buffer to the on-disk metadata. 3800 */ 3801 a = al_ctx.a; 3802 if (a->non_resident) { 3803 unsigned ofs; 3804 3805 ofs = (u8*)ctx->al_entry - base_ni->attr_list; 3806 err = ntfs_rl_write(base_ni->vol, base_ni->attr_list, 3807 base_ni->attr_list_size, 3808 &base_ni->attr_list_rl, ofs, 3809 le16_to_cpu(ctx->al_entry->length)); 3810 if (err) { 3811 ntfs_error(base_ni->vol->mp, "Failed to update " 3812 "on-disk attribute list attribute of " 3813 "mft_no 0x%llx (error %d).%s", 3814 (unsigned long long)base_ni->mft_no, 3815 err, es); 3816 return err; 3817 } 3818 } else { 3819 ATTR_LIST_ENTRY *al_entry; 3820 3821 al_entry = (ATTR_LIST_ENTRY*)((u8*)a + 3822 le16_to_cpu(a->value_offset) + 3823 ((u8*)ctx->al_entry - base_ni->attr_list)); 3824 al_entry->mft_reference = ctx->al_entry->mft_reference; 3825 al_entry->instance = ctx->al_entry->instance; 3826 /* Ensure the changes make it to disk later. */ 3827 NInoSetMrecNeedsDirtying(base_ni); 3828 } 3829 ntfs_debug("Done."); 3830 return 0; 3831undo_delete: 3832 /* 3833 * Map the old mft record again (if we unmapped it) and re-insert the 3834 * deleted attribute record in its old place. 3835 */ 3836 ni = ctx->ni; 3837 if (ni != base_ni) { 3838 err2 = ntfs_mft_record_map(ni, &m); 3839 if (err2) { 3840 /* 3841 * Make it safe to release the attribute search 3842 * context. 3843 */ 3844 ctx->ni = base_ni; 3845 ntfs_error(base_ni->vol->mp, "Failed to restore " 3846 "attribute in mft_no 0x%llx after " 3847 "allocation failure (error %d).%s", 3848 (unsigned long long)base_ni->mft_no, 3849 err2, es); 3850 NVolSetErrors(base_ni->vol); 3851 goto err; 3852 } 3853 /* 3854 * If the extent mft record was mapped into a different 3855 * address, adjust the mft record and attribute record pointers 3856 * in the search context. 3857 */ 3858 if (m != ctx->m) { 3859 ctx->a = (ATTR_RECORD*)((u8*)m + ((u8*)ctx->a - 3860 (u8*)ctx->m)); 3861 ctx->m = m; 3862 } 3863 } 3864 /* 3865 * Creating space for the attribute in its old mft record cannot fail 3866 * because we only just deleted the attribute from the mft record thus 3867 * there must be enough space in it. 3868 */ 3869 err2 = ntfs_attr_record_make_space(ctx->m, ctx->a, attr_len); 3870 if (err2) 3871 panic("%s(): err2\n", __FUNCTION__); 3872 memcpy(ctx->a, a_copy, attr_len); 3873 /* Ensure the changes make it to disk later. */ 3874 NInoSetMrecNeedsDirtying(ni); 3875err: 3876 OSFree(a_copy, attr_len, ntfs_malloc_tag); 3877 return err; 3878} 3879 3880/** 3881 * ntfs_attr_set_initialized_size - extend the initialized size of an attribute 3882 * @ni: ntfs inode whose sizes to extend 3883 * @new_init_size: the new initialized size to set @ni to or -1 3884 * 3885 * If @new_init_size is >= 0, set the initialized size in the ntfs inode @ni 3886 * to @new_init_size. Otherwise ignore @new_init_size and do not change the 3887 * initialized size in @ni. 3888 * 3889 * If the new initialized size is bigger than the data size of the ntfs inode, 3890 * update the data size to equal the initialized size. In this case also set 3891 * the size in the ubc. 3892 * 3893 * Then, set the data and initialized sizes in the attribute record of the 3894 * attribute specified by the ntfs inode @ni to the values in the ntfs inode 3895 * @ni. 3896 * 3897 * Thus, if @new_init_size is >= 0, both @ni and its underlying attribute have 3898 * their initialized size set to @new_init_size and if @new_init_size is < 0, 3899 * the underlying attribute initialized size is set to the initialized size of 3900 * the ntfs inode @ni. 3901 * 3902 * Note the caller is responsible for any zeroing that needs to happen between 3903 * the old initialized size and @new_init_size. 3904 * 3905 * Note when this function is called for resident attributes it requires that 3906 * the initialized size equals the data size as anything else does not make 3907 * sense for resident attributes. Further, @new_init_size must be >= 0, i.e. a 3908 * specific value must be provided as the call would otherwise be pointless as 3909 * there is no such thing as an initialized size for resident attributes. 3910 * 3911 * Return 0 on success and errno on error. 3912 * 3913 * Locking: The caller must hold @ni->lock on the inode for writing. 3914 */ 3915errno_t ntfs_attr_set_initialized_size(ntfs_inode *ni, s64 new_init_size) 3916{ 3917 ntfs_inode *base_ni; 3918 MFT_RECORD *m; 3919 ntfs_attr_search_ctx *ctx; 3920 ATTR_RECORD *a; 3921 errno_t err; 3922 BOOL data_size_updated = FALSE; 3923 3924#ifdef DEBUG 3925 lck_spin_lock(&ni->size_lock); 3926 ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, old data " 3927 "size 0x%llx, old initialized size 0x%llx, new " 3928 "initialized size 0x%llx.", 3929 (unsigned long long)ni->mft_no, 3930 (unsigned)le32_to_cpu(ni->type), 3931 (unsigned long long)ni->data_size, 3932 (unsigned long long)ni->initialized_size, 3933 (unsigned long long)new_init_size); 3934 lck_spin_unlock(&ni->size_lock); 3935#endif /* DEBUG */ 3936 base_ni = ni; 3937 if (NInoAttr(ni)) 3938 base_ni = ni->base_ni; 3939 /* Map, pin, and lock the mft record. */ 3940 err = ntfs_mft_record_map(base_ni, &m); 3941 if (err) 3942 goto err; 3943 ctx = ntfs_attr_search_ctx_get(base_ni, m); 3944 if (!ctx) { 3945 err = ENOMEM; 3946 goto unm_err; 3947 } 3948 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 3949 ctx); 3950 if (err) { 3951 if (err == ENOENT) 3952 err = EIO; 3953 goto put_err; 3954 } 3955 a = ctx->a; 3956 lck_spin_lock(&ni->size_lock); 3957 if (new_init_size >= 0) { 3958 if (new_init_size < ni->initialized_size) 3959 panic("%s(): new_init_size < ni->initialized_size\n", 3960 __FUNCTION__); 3961 /* 3962 * If the new initialized size exceeds the data size extend the 3963 * data size to cover the new initialized size. 3964 */ 3965 if (new_init_size > ni->data_size) { 3966 ni->data_size = new_init_size; 3967 if (a->non_resident) 3968 a->data_size = cpu_to_sle64(new_init_size); 3969 else { 3970 if (NInoNonResident(ni)) 3971 panic("%s(): NInoNonResident(ni)\n", 3972 __FUNCTION__); 3973 if (new_init_size >> 32) 3974 panic("%s(): new_init_size >> 32\n", 3975 __FUNCTION__); 3976 if (new_init_size > le32_to_cpu(a->length) - 3977 le16_to_cpu(a->value_offset)) 3978 panic("%s(): new_init_size > " 3979 "le32_to_cpu(" 3980 "a->length) - " 3981 "le16_to_cpu(" 3982 "a->value_offset)\n", 3983 __FUNCTION__); 3984 a->value_length = cpu_to_le32(new_init_size); 3985 } 3986 data_size_updated = TRUE; 3987 if (ni == base_ni && !S_ISDIR(ni->mode)) 3988 NInoSetDirtySizes(ni); 3989 } 3990 ni->initialized_size = new_init_size; 3991 } else { 3992 if (!a->non_resident) 3993 panic("%s(): !a->non_resident\n", __FUNCTION__); 3994 if (ni->initialized_size > ni->data_size) 3995 panic("%s(): ni->initialized_size > ni->data_size\n", 3996 __FUNCTION__); 3997 new_init_size = ni->initialized_size; 3998 } 3999 if (a->non_resident) { 4000 if (!NInoNonResident(ni)) 4001 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 4002 a->initialized_size = cpu_to_sle64(new_init_size); 4003 } 4004 lck_spin_unlock(&ni->size_lock); 4005 /* 4006 * If this is a directory B+tree index allocation attribute also update 4007 * the sizes in the base inode. 4008 */ 4009 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 4010 lck_spin_lock(&base_ni->size_lock); 4011 if (data_size_updated) 4012 base_ni->data_size = new_init_size; 4013 base_ni->initialized_size = new_init_size; 4014 lck_spin_unlock(&base_ni->size_lock); 4015 } 4016 /* Mark the mft record dirty to ensure it gets written out. */ 4017 NInoSetMrecNeedsDirtying(ctx->ni); 4018put_err: 4019 ntfs_attr_search_ctx_put(ctx); 4020unm_err: 4021 ntfs_mft_record_unmap(base_ni); 4022 if (data_size_updated) { 4023 if (!ubc_setsize(ni->vn, new_init_size)) 4024 panic("%s(): ubc_setsize() failed.\n", __FUNCTION__); 4025 } 4026 if (!err) 4027 ntfs_debug("Done."); 4028 else { 4029err: 4030 ntfs_error(ni->vol->mp, "Failed (error %d).", err); 4031 } 4032 return err; 4033} 4034 4035/** 4036 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 4037 * @ni: ntfs inode of the attribute to extend 4038 * @new_init_size: requested new initialized size in bytes 4039 * 4040 * Extend the initialized size of an attribute described by the ntfs inode @ni 4041 * to @new_init_size bytes. This involves zeroing any non-sparse space between 4042 * the old initialized size and @new_init_size both in the VM page cache and on 4043 * disk (if relevant complete pages are already uptodate in the VM page cache 4044 * then these are simply marked dirty). 4045 * 4046 * As a side-effect, the data size as well as the ubc size may be incremented 4047 * as, in the resident attribute case, it is tied to the initialized size and, 4048 * in the non-resident attribute case, it may not fall below the initialized 4049 * size. 4050 * 4051 * Note that if the attribute is resident, we do not need to touch the VM page 4052 * cache at all. This is because if the VM page is not uptodate we bring it 4053 * uptodate later, when doing the write to the mft record since we then already 4054 * have the page mapped. And if the page is uptodate, the non-initialized 4055 * region will already have been zeroed when the page was brought uptodate and 4056 * the region may in fact already have been overwritten with new data via 4057 * mmap() based writes, so we cannot just zero it. And since POSIX specifies 4058 * that the behaviour of resizing a file whilst it is mmap()ped is unspecified, 4059 * we choose not to do zeroing and thus we do not need to touch the VM page at 4060 * all. 4061 * 4062 * Return 0 on success and errno on error. In the case that an error is 4063 * encountered it is possible that the initialized size and/or the data size 4064 * will already have been incremented some way towards @new_init_size but it is 4065 * guaranteed that if this is the case, the necessary zeroing will also have 4066 * happened and that all metadata is self-consistent. 4067 * 4068 * Locking: - Caller must hold @ni->lock on the inode for writing. 4069 * - The runlist @ni must be unlocked as it is taken for writing. 4070 */ 4071errno_t ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) 4072{ 4073 VCN vcn, end_vcn; 4074 s64 size, old_init_size, ofs; 4075 ntfs_volume *vol; 4076 ntfs_inode *base_ni; 4077 MFT_RECORD *m; 4078 ATTR_RECORD *a; 4079 ntfs_attr_search_ctx *ctx; 4080 u8 *kattr; 4081 ntfs_rl_element *rl = NULL; 4082 errno_t err; 4083 unsigned attr_len; 4084 BOOL locked, write_locked, is_sparse, mark_sizes_dirty; 4085 4086 lck_spin_lock(&ni->size_lock); 4087 if (new_init_size > ni->allocated_size) 4088 panic("%s(): new_init_size > ni->allocated_size\n", 4089 __FUNCTION__); 4090 size = ni->data_size; 4091 old_init_size = ni->initialized_size; 4092 lck_spin_unlock(&ni->size_lock); 4093 if (new_init_size <= old_init_size) 4094 panic("%s(): new_init_size <= old_init_size\n", 4095 __FUNCTION__); 4096 mark_sizes_dirty = write_locked = FALSE; 4097 vol = ni->vol; 4098 ntfs_debug("Entering for mft_no 0x%llx, old initialized size 0x%llx, " 4099 "new initialized size 0x%llx, old data size 0x%llx.", 4100 (unsigned long long)ni->mft_no, 4101 (unsigned long long)old_init_size, 4102 (unsigned long long)new_init_size, 4103 (unsigned long long)size); 4104 base_ni = ni; 4105 if (NInoAttr(ni)) 4106 base_ni = ni->base_ni; 4107 /* Use goto to reduce indentation and we need the label below anyway. */ 4108 if (NInoNonResident(ni)) 4109 goto do_non_resident_extend; 4110 if (old_init_size != size) 4111 panic("%s(): old_init_size != size\n", __FUNCTION__); 4112 /* Map, pin, and lock the mft record. */ 4113 err = ntfs_mft_record_map(base_ni, &m); 4114 if (err) 4115 goto err; 4116 ctx = ntfs_attr_search_ctx_get(base_ni, m); 4117 if (!ctx) { 4118 err = ENOMEM; 4119 goto unm_err; 4120 } 4121 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 4122 ctx); 4123 if (err) { 4124 if (err == ENOENT) 4125 err = EIO; 4126 goto put_err; 4127 } 4128 a = ctx->a; 4129 if (a->non_resident) 4130 panic("%s(): a->non_resident\n", __FUNCTION__); 4131 /* The total length of the attribute value. */ 4132 attr_len = le32_to_cpu(a->value_length); 4133 if (size != attr_len) 4134 panic("%s(): size != attr_len\n", __FUNCTION__); 4135 /* 4136 * Do the zeroing in the mft record and update the attribute size in 4137 * the mft record. 4138 */ 4139 kattr = (u8*)a + le16_to_cpu(a->value_offset); 4140 bzero(kattr + attr_len, new_init_size - attr_len); 4141 a->value_length = cpu_to_le32((u32)new_init_size); 4142 /* Update the sizes in the ntfs inode as well as the ubc size. */ 4143 lck_spin_lock(&ni->size_lock); 4144 ni->initialized_size = ni->data_size = size = new_init_size; 4145 lck_spin_unlock(&ni->size_lock); 4146 /* Mark the mft record dirty to ensure it gets written out. */ 4147 NInoSetMrecNeedsDirtying(ctx->ni); 4148 ntfs_attr_search_ctx_put(ctx); 4149 ntfs_mft_record_unmap(base_ni); 4150 ubc_setsize(ni->vn, new_init_size); 4151 mark_sizes_dirty = TRUE; 4152 goto done; 4153do_non_resident_extend: 4154 /* 4155 * If the new initialized size @new_init_size exceeds the current data 4156 * size we need to extend the file size to the new initialized size. 4157 */ 4158 if (new_init_size > size) { 4159 /* Map, pin, and lock the mft record. */ 4160 err = ntfs_mft_record_map(base_ni, &m); 4161 if (err) 4162 goto err; 4163 ctx = ntfs_attr_search_ctx_get(base_ni, m); 4164 if (!ctx) { 4165 err = ENOMEM; 4166 goto unm_err; 4167 } 4168 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 4169 NULL, 0, ctx); 4170 if (err) { 4171 if (err == ENOENT) 4172 err = EIO; 4173 goto put_err; 4174 } 4175 a = ctx->a; 4176 if (!a->non_resident) 4177 panic("%s(): !a->non_resident\n", __FUNCTION__); 4178 if (size != sle64_to_cpu(a->data_size)) 4179 panic("%s(): size != sle64_to_cpu(a->data_size)\n", 4180 __FUNCTION__); 4181 size = new_init_size; 4182 lck_spin_lock(&ni->size_lock); 4183 ni->data_size = new_init_size; 4184 lck_spin_unlock(&ni->size_lock); 4185 a->data_size = cpu_to_sle64(new_init_size); 4186 /* Mark the mft record dirty to ensure it gets written out. */ 4187 NInoSetMrecNeedsDirtying(ctx->ni); 4188 ntfs_attr_search_ctx_put(ctx); 4189 ntfs_mft_record_unmap(base_ni); 4190 mark_sizes_dirty = TRUE; 4191 ubc_setsize(ni->vn, new_init_size); 4192 } 4193 /* 4194 * If the attribute is not sparse we can simply map each page between 4195 * the old initialized size and the new initialized size which takes 4196 * care of any needed zeroing and then unmap the page again marking it 4197 * dirty so the VM later causes it to be written out. 4198 * 4199 * If the file is sparse on the other hand things are more complicated 4200 * because we want to skip any sparse regions because mapping a sparse 4201 * page and then unmapping it again and marking it dirty would cause 4202 * the hole to be filled when the page is written out. 4203 * 4204 * Thus for sparse files we walk the runlist before we start doing 4205 * anything and check whether there are any sparse regions between the 4206 * old initialized size and the new initialized size. If there are no 4207 * sparse regions we can simply proceed as if this attribute was not 4208 * sparse. 4209 * 4210 * If there are sparse regions then we ensure that all runlist 4211 * fragments between the old initialized size and new initialized size 4212 * are mapped and then we hold the runlist lock shared and walk the 4213 * runlist and only for non-sparse regions do we do the page mapping, 4214 * unmapping and dirtying. 4215 */ 4216 ofs = old_init_size & ~PAGE_MASK_64; 4217 write_locked = locked = FALSE; 4218 is_sparse = (NInoSparse(ni)); 4219 if (is_sparse) { 4220 BOOL have_holes = FALSE; 4221 4222 locked = TRUE; 4223 lck_rw_lock_shared(&ni->rl.lock); 4224 vcn = ofs >> vol->cluster_size_shift; 4225 end_vcn = (new_init_size + vol->cluster_size_mask) >> 4226 vol->cluster_size_shift; 4227retry_remap: 4228 rl = ni->rl.rl; 4229 if (!ni->rl.elements || vcn < rl->vcn || !rl->length) { 4230map_vcn: 4231 if (!write_locked) { 4232 write_locked = TRUE; 4233 if (!lck_rw_lock_shared_to_exclusive( 4234 &ni->rl.lock)) { 4235 lck_rw_lock_exclusive(&ni->rl.lock); 4236 goto retry_remap; 4237 } 4238 } 4239 /* Need to map the runlist fragment containing @vcn. */ 4240 err = ntfs_map_runlist_nolock(ni, vcn, NULL); 4241 if (err) { 4242 ntfs_error(vol->mp, "Failed to map runlist " 4243 "fragment (error %d).", err); 4244 if (err == EINVAL) 4245 err = EIO; 4246 goto unl_err; 4247 } 4248 rl = ni->rl.rl; 4249 if (!ni->rl.elements || vcn < rl->vcn || !rl->length) 4250 panic("%s(): !ni->rl.elements || " 4251 "vcn < rl[0].vcn || " 4252 "!rl->length\n", __FUNCTION__); 4253 } 4254 /* Seek to the runlist element containing @vcn. */ 4255 while (rl->length && vcn >= rl[1].vcn) 4256 rl++; 4257 do { 4258 /* 4259 * If this run is not mapped map it now and start again 4260 * as the runlist will have been updated. 4261 */ 4262 if (rl->lcn == LCN_RL_NOT_MAPPED) { 4263 vcn = rl->vcn; 4264 goto map_vcn; 4265 } 4266 /* If this run is not valid abort with an error. */ 4267 if (!rl->length || rl->lcn < LCN_HOLE) 4268 goto rl_err; 4269 if (rl->lcn == LCN_HOLE) { 4270 have_holes = TRUE; 4271 /* 4272 * If the current initialized size is inside 4273 * the current run we can move the initialized 4274 * size forward to the end of this run taking 4275 * care not to go beyond the new initialized 4276 * size. 4277 * 4278 * Note we also have to take care not to move 4279 * the initialized size backwards thus we only 4280 * have to update the initialized size if the 4281 * current offset is above the old initialized 4282 * size. 4283 */ 4284 if (ofs >> vol->cluster_size_shift >= rl->vcn) { 4285 ofs = rl[1].vcn << 4286 vol->cluster_size_shift; 4287 if (ofs > old_init_size) { 4288 if (ofs > new_init_size) 4289 ofs = new_init_size; 4290 lck_spin_lock(&ni->size_lock); 4291 ni->initialized_size = ofs; 4292 lck_spin_unlock(&ni->size_lock); 4293 if (ofs == new_init_size) 4294 goto update_done; 4295 } 4296 } 4297 } 4298 /* Proceed to the next run. */ 4299 rl++; 4300 } while (rl->vcn < end_vcn); 4301 /* 4302 * If we encountered sparse regions in the runlist then we need 4303 * to keep the runlist lock shared. 4304 * 4305 * If there were no sparse regions we do not need the runlist 4306 * lock at all any more so we release it and we pretend this 4307 * attribute is not sparse. 4308 */ 4309 if (have_holes) { 4310 if (write_locked) { 4311 lck_rw_lock_exclusive_to_shared(&ni->rl.lock); 4312 write_locked = FALSE; 4313 } 4314 /* 4315 * We may have moved @ofs forward in which case it will 4316 * be cluster aligned instead of page aligned and the 4317 * two are not equal when the cluster size is less than 4318 * the page size so we need to align at @ofs to the 4319 * page size again. 4320 */ 4321 ofs &= ~PAGE_MASK_64; 4322 rl = ni->rl.rl; 4323 } else { 4324 if (write_locked) 4325 lck_rw_unlock_exclusive(&ni->rl.lock); 4326 else 4327 lck_rw_unlock_shared(&ni->rl.lock); 4328 locked = FALSE; 4329 is_sparse = FALSE; 4330 } 4331 } 4332 do { 4333 /* 4334 * If the file is sparse, check if the current page is 4335 * completely sparse and if so skip it. 4336 * 4337 * Otherwise take care of zeroing the uninitialized region. 4338 */ 4339 if (is_sparse) { 4340 /* We need to update @vcn to the current offset @ofs. */ 4341 vcn = ofs >> vol->cluster_size_shift; 4342 /* Determine the first VCN outside the current page. */ 4343 end_vcn = (ofs + PAGE_SIZE + vol->cluster_size_mask) >> 4344 vol->cluster_size_shift; 4345 /* Seek to the runlist element containing @vcn. */ 4346 while (rl->length && vcn >= rl[1].vcn) 4347 rl++; 4348 /* If this run is not valid abort with an error. */ 4349 if (!rl->length || rl->lcn < LCN_HOLE) 4350 goto rl_err; 4351 /* 4352 * @rl is the runlist element containing @ofs, the 4353 * current initialized size, and the current @vcn. 4354 * 4355 * Check whether the current page is completely sparse. 4356 * This is complicated slightly by the fact that a page 4357 * can span multiple clusters when the cluster size is 4358 * less than the page size. 4359 * 4360 * As an optimization when a sparse run spans more than 4361 * one page we forward both @ofs and the initialized 4362 * size to the end of the run (ensuring it is page 4363 * aligned). 4364 */ 4365 do { 4366 if (rl->lcn >= 0) { 4367 /* This page is not entirely sparse. */ 4368 goto on_disk_page; 4369 } 4370 /* Proceed to the next run. */ 4371 rl++; 4372 vcn = rl->vcn; 4373 } while (vcn < end_vcn && rl->length); 4374 /* 4375 * The page is entirely sparse. 4376 * 4377 * Check how many pages are entirely sparse and move 4378 * the initialized size up to the end of the sparse 4379 * region ensuring we maintain page alignment. 4380 */ 4381 while (rl->lcn == LCN_HOLE && rl->length) 4382 rl++; 4383 ofs = (rl->vcn << vol->cluster_size_shift) & 4384 ~PAGE_MASK_64; 4385 /* 4386 * Update the initialized size in the ntfs inode. This 4387 * is enough to make ntfs_vnop_pageout() work. We 4388 * could postpone this until we actually are going to 4389 * unmap a page or we have reached the end of the 4390 * region to be initialized but we do it now to 4391 * minimize our impact on processes that are performing 4392 * concurrent mmap() based writes to this attribute. 4393 * 4394 * FIXME: This is not actually true as the caller is 4395 * holding the ntfs inode lock for writing thus no 4396 * pageouts on this inode can occur at all. We 4397 * probably need to fix this so we cannot bring the 4398 * system out of memory. 4399 */ 4400 if (ofs > new_init_size) 4401 ofs = new_init_size; 4402 lck_spin_lock(&ni->size_lock); 4403 ni->initialized_size = ofs; 4404 lck_spin_unlock(&ni->size_lock); 4405 } else /* if (!is_sparse) */ { 4406 upl_t upl; 4407 upl_page_info_array_t pl; 4408 4409on_disk_page: 4410 /* 4411 * Read the page. If the page is not present, 4412 * ntfs_page_map() will zero the uninitialized/sparse 4413 * regions for us. 4414 * 4415 * TODO: An optimization would be to do things by hand 4416 * taking advantage of dealing with multiple pages at 4417 * once instead of working one page at a time. 4418 * 4419 * FIXME: We are potentially creating a lot of dirty 4420 * pages here and since the caller is holding the ntfs 4421 * inode lock for writing no pageouts on this inode can 4422 * occur at all. We probably need to fix this so we 4423 * cannot bring the system out of memory. 4424 */ 4425// TODO: This should never happen. Just adding it so we can detect if we were 4426// going to deadlock. If it triggers need to fix it in the code so it does 4427// not. Or perhaps just remove the warning and use this as the solution. 4428 if (locked && write_locked) { 4429 write_locked = FALSE; 4430 lck_rw_lock_exclusive_to_shared(&ni->rl.lock); 4431 ntfs_warning(vol->mp, "Switching runlist lock " 4432 "to shared to avoid " 4433 "deadlock."); 4434 } 4435 err = ntfs_page_map(ni, ofs, &upl, &pl, &kattr, TRUE); 4436 if (err) 4437 goto unl_err; 4438 /* 4439 * Update the initialized size in the ntfs inode. This 4440 * is enough to make ntfs_vnop_pageout() work. 4441 */ 4442 ofs += PAGE_SIZE; 4443 if (ofs > new_init_size) 4444 ofs = new_init_size; 4445 lck_spin_lock(&ni->size_lock); 4446 ni->initialized_size = ofs; 4447 lck_spin_unlock(&ni->size_lock); 4448 /* Set the page dirty so it gets written out. */ 4449 ntfs_page_unmap(ni, upl, pl, TRUE); 4450 } 4451 } while (ofs < new_init_size); 4452 lck_spin_lock(&ni->size_lock); 4453 if (ni->initialized_size != new_init_size) 4454 panic("%s(): ni->initialized_size != new_init_size\n", 4455 __FUNCTION__); 4456 lck_spin_unlock(&ni->size_lock); 4457update_done: 4458 /* If we are holding the runlist lock, release it now. */ 4459 if (locked) { 4460 if (write_locked) 4461 lck_rw_unlock_exclusive(&ni->rl.lock); 4462 else 4463 lck_rw_unlock_shared(&ni->rl.lock); 4464 locked = FALSE; 4465 } 4466 /* Bring up to date the initialized_size in the attribute record. */ 4467 err = ntfs_attr_set_initialized_size(ni, -1); 4468 if (err) 4469 goto unl_err; 4470done: 4471 /* 4472 * If we have modified the size of the base inode, cause the sizes to 4473 * be written to all the directory index entries pointing to the base 4474 * inode when the inode is written to disk. 4475 */ 4476 if (mark_sizes_dirty && ni == base_ni && !S_ISDIR(ni->mode)) 4477 NInoSetDirtySizes(ni); 4478 ntfs_debug("Done, new initialized size 0x%llx, new data size 0x%llx.", 4479 (unsigned long long)new_init_size, 4480 (unsigned long long)size); 4481 return 0; 4482rl_err: 4483 ntfs_error(vol->mp, "Runlist is corrupt. Unmount and run chkdsk."); 4484 NVolSetErrors(vol); 4485 err = EIO; 4486unl_err: 4487 if (locked) { 4488 if (write_locked) 4489 lck_rw_unlock_exclusive(&ni->rl.lock); 4490 else 4491 lck_rw_unlock_shared(&ni->rl.lock); 4492 } 4493 lck_spin_lock(&ni->size_lock); 4494 ni->initialized_size = old_init_size; 4495 lck_spin_unlock(&ni->size_lock); 4496 goto err; 4497put_err: 4498 ntfs_attr_search_ctx_put(ctx); 4499unm_err: 4500 ntfs_mft_record_unmap(base_ni); 4501err: 4502 ntfs_debug("Failed (error %d).", err); 4503 return err; 4504} 4505 4506/** 4507 * ntfs_attr_sparse_set - switch an attribute to be sparse 4508 * @base_ni: base ntfs inode to which the attribute belongs 4509 * @ni: ntfs inode of attribute which to cause to be sparse 4510 * @ctx: attribute search context describing the attribute to work on 4511 * 4512 * Switch the non-sparse, base attribute described by @ni and @ctx belonging to 4513 * the base ntfs inode @base_ni to be sparse. 4514 * 4515 * Return 0 on success and errno on error. 4516 * 4517 * Note that the attribute may be moved to be able to extend it when adding the 4518 * compressed size. Thus any cached values of @ctx->ni, @ctx->m, and @ctx->a 4519 * are invalid after this function returns. 4520 */ 4521static errno_t ntfs_attr_sparse_set(ntfs_inode *base_ni, ntfs_inode *ni, 4522 ntfs_attr_search_ctx *ctx) 4523{ 4524#if 0 4525 VCN highest_vcn, stop_vcn; 4526 ntfs_volume *vol; 4527 MFT_RECORD *base_m, *m; 4528 ATTR_RECORD *a; 4529 ntfs_rl_element *rl; 4530 ntfs_inode *eni; 4531 ATTR_LIST_ENTRY *al_entry; 4532 unsigned name_size, mp_ofs, mp_size, al_entry_len, new_al_size; 4533 unsigned new_al_alloc; 4534 errno_t err; 4535 BOOL rewrite; 4536#endif 4537 4538 ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len 0x%x.", 4539 (unsigned long long)base_ni->mft_no, 4540 (unsigned)le32_to_cpu(ni->type), ni->name_len); 4541 return ENOTSUP; 4542#if 0 4543 vol = base_ni->vol; 4544 base_m = base_ni->m; 4545 m = ctx->m; 4546 a = ctx->a; 4547 rewrite = FALSE; 4548 /* 4549 * We should only be called for non-sparse, non-resident, $DATA 4550 * attributes. 4551 */ 4552 if (a->type != AT_DATA || !NInoNonResident(ni) || !a->non_resident || 4553 NInoSparse(ni) || a->flags & ATTR_IS_SPARSE) 4554 panic("%s(): a->type != AT_DATA || !NInoNonResident(ni) || " 4555 "!a->non_resident || NInoSparse(ni) || " 4556 "a->flags & ATTR_IS_SPARSE\n", __FUNCTION__); 4557 /* 4558 * If the attribute is not compressed either, we need to add the 4559 * compressed size to the attribute record and to switch all relevant 4560 * fields to match. 4561 */ 4562 if (NInoCompressed(ni)) 4563 goto is_compressed; 4564 if (a->flags & ATTR_IS_COMPRESSED) 4565 panic("%s(): a->flags & ATTR_IS_COMPRESSED)\n", __FUNCTION__); 4566retry_attr_rec_resize: 4567 err = ntfs_attr_record_resize(m, a, le32_to_cpu(a->length) + 4568 sizeof(a->compressed_size)); 4569 if (!err) { 4570 /* 4571 * Move everything at the offset of the compressed size to make 4572 * space for the compressed size. 4573 */ 4574 memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size) + 4575 sizeof(a->compressed_size), (u8*)a + 4576 offsetof(ATTR_RECORD, compressed_size), 4577 le32_to_cpu(a->length) - offsetof(ATTR_RECORD, 4578 compressed_size)); 4579 /* 4580 * Update the name offset to match the moved data. If there is 4581 * no name then set the name offset to the correct position 4582 * instead of adding to a potentially incorrect value. 4583 */ 4584 if (a->name_length) 4585 a->name_offset = cpu_to_le16( 4586 le16_to_cpu(a->name_offset) + 4587 sizeof(a->compressed_size)); 4588 else 4589 a->name_offset = const_cpu_to_le16( 4590 offsetof(ATTR_RECORD, 4591 compressed_size) + 4592 sizeof(a->compressed_size)); 4593 /* Update the mapping pairs offset to its new location. */ 4594 mp_ofs = le16_to_cpu(a->mapping_pairs_offset) + 4595 sizeof(a->compressed_size); 4596 goto set_compressed_size; 4597 } 4598 /* 4599 * There is not enough space in the mft record. 4600 * 4601 * We need to add an attribute list attribute if it is not already 4602 * present. 4603 */ 4604 if (!NInoAttrList(base_ni)) { 4605 err = ntfs_attr_list_add(base_ni, base_m, ctx); 4606 if (err || ctx->is_error) { 4607 if (!err) 4608 err = ctx->error; 4609 ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx " 4610 "(error %d).", ctx->is_error ? 4611 "remap extent mft record of" : 4612 "add attribute list attribute to", 4613 (unsigned long long)base_ni->mft_no, 4614 err); 4615 return err; 4616 } 4617 /* 4618 * The attribute location will have changed so update it from 4619 * the search context. 4620 */ 4621 m = ctx->m; 4622 a = ctx->a; 4623 /* 4624 * Retry the original attribute record resize as we may now 4625 * have enough space to add the compressed size to the 4626 * attribute record. 4627 * 4628 * This can for example happen when the attribute was moved out 4629 * to an extent mft record which has much more free space than 4630 * the base mft record had. 4631 */ 4632 goto retry_attr_rec_resize; 4633 } 4634 /* 4635 * If this is not the only attribute record in the mft record then move 4636 * it out to a new extent mft record which is guaranteed to generate 4637 * enough space to add the compressed size to the attribute record. 4638 */ 4639 if (!ntfs_attr_record_is_only_one(m, a)) { 4640 lck_rw_lock_shared(&base_ni->attr_list_rl.lock); 4641 err = ntfs_attr_record_move(ctx); 4642 lck_rw_unlock_shared(&base_ni->attr_list_rl.lock); 4643 if (err) { 4644 ntfs_error(vol->mp, "Failed to move attribute extent " 4645 "from mft record 0x%llx to an extent " 4646 "mft record (error %d).", 4647 (unsigned long long)ctx->ni->mft_no, 4648 err); 4649 /* 4650 * We could try to remove the attribute list attribute 4651 * if we added it above but this will require 4652 * attributes to be moved back into the base mft record 4653 * from extent mft records so is a lot of work and 4654 * given we are in an error code path and given that it 4655 * is ok to just leave the inode with an attribute list 4656 * attribute we do not bother and just bail out. 4657 */ 4658 return err; 4659 } 4660 /* 4661 * The attribute location will have changed so update it from 4662 * the search context. 4663 */ 4664 m = ctx->m; 4665 a = ctx->a; 4666 /* 4667 * Retry the original attribute record resize as we will now 4668 * have enough space to add the compressed size to the 4669 * attribute record. 4670 */ 4671 goto retry_attr_rec_resize; 4672 } 4673 /* 4674 * This is the only attribute in the mft record thus there is nothing 4675 * to gain by moving it to another extent mft record. So to generate 4676 * space, we allocate a new extent mft record, create a new extent 4677 * attribute record in it and use it to catch the overflow mapping 4678 * pairs array data generated by the fact that we have added the 4679 * compressed size to the base extent. 4680 * 4681 * TODO: We could instead iterate over all existing extent attribute 4682 * records and rewrite the entire mapping pairs array but this could 4683 * potentially be a lot of overhead. On the other hand it would be an 4684 * infrequent event thus the overhead may be worth it in the long term 4685 * as it will generate better packed metadata. For now we choose the 4686 * simpler approach of just doing the splitting into a new extent 4687 * attribute record. 4688 * 4689 * As we are going to rewrite the mapping pairs array we need to make 4690 * sure we have decompressed the mapping pairs from the base attribute 4691 * extent and have them cached in the runlist. 4692 */ 4693 if (!ni->rl.elements || ni->rl.rl->lcn == LCN_RL_NOT_MAPPED) { 4694 err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl); 4695 if (err) { 4696 ntfs_error(vol->mp, "Mapping of the base runlist " 4697 "fragment failed (error %d).", err); 4698 if (err != ENOMEM) 4699 err = EIO; 4700 return err; 4701 } 4702 } 4703 rewrite = TRUE; 4704 /* 4705 * Now add the compressed size so we can unmap the mft record of the 4706 * base attribute extent if it is an extent mft record. 4707 * 4708 * First, move the name if present to its new location and update the 4709 * name offset to match the new location. 4710 */ 4711 name_size = a->name_length * sizeof(ntfschar); 4712 if (name_size) 4713 memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size) + 4714 sizeof(a->compressed_size), (u8*)a + 4715 le16_to_cpu(a->name_offset), name_size); 4716 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, 4717 compressed_size) + sizeof(a->compressed_size)); 4718 /* Update the mapping pairs offset to its new location. */ 4719 mp_ofs = (offsetof(ATTR_RECORD, compressed_size) + 4720 sizeof(a->compressed_size) + name_size + 7) & ~7; 4721set_compressed_size: 4722 a->mapping_pairs_offset = cpu_to_le16(mp_ofs); 4723 /* 4724 * Set the compression unit to 0 or 4 depending on the NTFS volume 4725 * version. FIXME: We know that NT4 uses 4 whilst XPSP2 uses 0 and we 4726 * do not know what 2k uses so we assume 2k is the same as XPSP2. 4727 */ 4728 if (vol->major_ver > 1) { 4729 a->compression_unit = 0; 4730 ni->compression_block_size = 0; 4731 ni->compression_block_clusters = 4732 ni->compression_block_size_shift = 0; 4733 } else { 4734 a->compression_unit = NTFS_COMPRESSION_UNIT; 4735 ni->compression_block_size = 1U << (NTFS_COMPRESSION_UNIT + 4736 vol->cluster_size_shift); 4737 ni->compression_block_size_shift = 4738 ffs(ni->compression_block_size) - 1; 4739 ni->compression_block_clusters = 1U << NTFS_COMPRESSION_UNIT; 4740 } 4741 lck_spin_lock(&ni->size_lock); 4742 ni->compressed_size = ni->allocated_size; 4743 a->compressed_size = a->allocated_size; 4744 lck_spin_unlock(&ni->size_lock); 4745is_compressed: 4746 /* Mark both the attribute and the ntfs inode as sparse. */ 4747 a->flags |= ATTR_IS_SPARSE; 4748 NInoSetSparse(ni); 4749 /* 4750 * If this is the unnamed $DATA attribute, need to set the sparse flag 4751 * in the standard information attribute and in the directory entries, 4752 * too. 4753 */ 4754 if (ni == base_ni) { 4755 ni->file_attributes |= FILE_ATTR_SPARSE_FILE; 4756 NInoSetDirtyFileAttributes(ni); 4757 } 4758 /* If we do not need to rewrite the mapping pairs array we are done. */ 4759 if (!rewrite) 4760 goto done; 4761 /* 4762 * Determine the size of the mapping pairs array needed to fit all the 4763 * runlist elements that were stored in the base attribute extent 4764 * before we added the compressed size to the attribute record. 4765 */ 4766 highest_vcn = sle64_to_cpu(a->highest_vcn); 4767 err = ntfs_get_size_for_mapping_pairs(vol, ni->rl.elements ? 4768 ni->rl.rl : NULL, 0, highest_vcn, &mp_size); 4769 if (err) { 4770 ntfs_error(vol->mp, "Failed to get size for mapping pairs " 4771 "array (error %d).", err); 4772 goto undo1; 4773 } 4774 /* Write the mapping pairs array. */ 4775 err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 4776 le32_to_cpu(a->length) - mp_ofs, ni->rl.elements ? 4777 ni->rl.rl : NULL, 0, highest_vcn, &stop_vcn); 4778 if (err && err != ENOSPC) { 4779 ntfs_error(vol->mp, "Failed to rebuild mapping pairs array " 4780 "(error %d).", err); 4781 goto undo1; 4782 } 4783 /* If by some miracle it all fitted we are done. */ 4784 if (!err) 4785 goto done; 4786 /* Update the highest vcn to the new value. */ 4787 a->highest_vcn = cpu_to_sle64(stop_vcn - 1); 4788 /* 4789 * If the base attribute extent is in an extent mft record mark it 4790 * dirty so it gets written back and unmap the extent mft record so we 4791 * can allocate the new extent mft record. 4792 */ 4793 if (ctx->ni != base_ni) { 4794 NInoSetMrecNeedsDirtying(ctx->ni); 4795 ntfs_extent_mft_record_unmap(ctx->ni); 4796 /* Make the search context safe. */ 4797 ctx->ni = base_ni; 4798 } 4799 /* 4800 * Get the runlist element containing the lowest vcn for the new 4801 * attribute record, i.e. @stop_vcn. 4802 * 4803 * This cannot fail as we know the runlist is ok and the runlist 4804 * fragment containing @stop_vcn is mapped. 4805 */ 4806 rl = NULL; 4807 if (ni->rl.elements) { 4808 rl = ntfs_rl_find_vcn_nolock(ni->rl.rl, stop_vcn); 4809 if (!rl) 4810 panic("%s(): Memory corruption detected.\n", 4811 __FUNCTION__); 4812 } 4813 /* 4814 * Determine the size of the mapping pairs array needed to fit all the 4815 * remaining runlist elements that were stored in the base attribute 4816 * extent before we added the compressed size to the attribute record 4817 * but did now not fit. 4818 */ 4819 err = ntfs_get_size_for_mapping_pairs(vol, rl, stop_vcn, highest_vcn, 4820 &mp_size); 4821 if (err) { 4822 ntfs_error(vol->mp, "Failed to get size for mapping pairs " 4823 "array (error %d).", err); 4824 goto undo2; 4825 } 4826 /* 4827 * We now need to allocate a new extent mft record, attach it to the 4828 * base ntfs inode and set up the search context to point to it, then 4829 * insert the new attribute record into it. 4830 */ 4831 err = ntfs_mft_record_alloc(vol, NULL, NULL, ni, &eni, &m, &a); 4832 if (err) { 4833 ntfs_error(vol->mp, "Failed to allocate a new extent mft " 4834 "record (error %d).", err); 4835 goto undo2; 4836 } 4837 ctx->ni = eni; 4838 ctx->m = m; 4839 ctx->a = a; 4840 /* 4841 * Calculate the offset into the new attribute at which the mapping 4842 * pairs array begins. The mapping pairs array is placed after the 4843 * name aligned to an 8-byte boundary which in turn is placed 4844 * immediately after the non-resident attribute record itself. 4845 * 4846 * Note that extent attribute records do not have the compressed size 4847 * field in their attribute records. 4848 */ 4849 mp_ofs = (offsetof(ATTR_RECORD, compressed_size) + name_size + 7) & ~7; 4850 /* 4851 * Make space for the new attribute extent. This cannot fail as we now 4852 * have an empty mft record which by definition can hold a non-resident 4853 * attribute record with just a small mapping pairs array. 4854 */ 4855 err = ntfs_attr_record_make_space(m, a, mp_ofs + mp_size); 4856 if (err) 4857 panic("%s(): err (ntfs_attr_record_make_space())\n", 4858 __FUNCTION__); 4859 /* 4860 * Now setup the new attribute record. The entire attribute has been 4861 * zeroed and the length of the attribute record has been set. 4862 * 4863 * Before we proceed with setting up the attribute, add an attribute 4864 * list attribute entry for the created attribute extent. 4865 */ 4866 al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + 4867 le16_to_cpu(ctx->al_entry->length)); 4868 al_entry_len = (offsetof(ATTR_LIST_ENTRY, name) + name_size + 7) & ~7; 4869 new_al_size = base_ni->attr_list_size + al_entry_len; 4870 /* Out of bounds checks. */ 4871 if ((u8*)al_entry < base_ni->attr_list || (u8*)al_entry > 4872 base_ni->attr_list + new_al_size || (u8*)al_entry + 4873 al_entry_len > base_ni->attr_list + new_al_size) { 4874 /* Inode is corrupt. */ 4875 ntfs_error(vol->mp, "Inode 0x%llx is corrupt. Run chkdsk.", 4876 (unsigned long long)base_ni->mft_no); 4877 err = EIO; 4878 goto undo3; 4879 } 4880 err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST, new_al_size); 4881 if (err) { 4882 if (err == ERANGE) { 4883 ntfs_error(vol->mp, "Attribute list attribute would " 4884 "become to large. You need to " 4885 "defragment your volume and then try " 4886 "again."); 4887 err = ENOSPC; 4888 } else { 4889 ntfs_error(vol->mp, "Attribute list attribute is " 4890 "unknown on the volume. The volume " 4891 "is corrupt. Run chkdsk."); 4892 NVolSetErrors(vol); 4893 err = EIO; 4894 } 4895 goto undo3; 4896 } 4897 /* 4898 * Reallocate the memory buffer if needed and create space for the new 4899 * entry. 4900 */ 4901 new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) & 4902 ~(NTFS_ALLOC_BLOCK - 1); 4903 if (new_al_alloc > base_ni->attr_list_alloc) { 4904 u8 *tmp, *al, *al_end; 4905 unsigned al_entry_ofs; 4906 4907 tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag); 4908 if (!tmp) { 4909 ntfs_error(vol->mp, "Not enough memory to extend the " 4910 "attribute list attribute."); 4911 err = ENOMEM; 4912 goto undo3; 4913 } 4914 al = base_ni->attr_list; 4915 al_entry_ofs = (u8*)al_entry - al; 4916 al_end = al + base_ni->attr_list_size; 4917 memcpy(tmp, al, al_entry_ofs); 4918 if ((u8*)al_entry < al_end) 4919 memcpy(tmp + al_entry_ofs + al_entry_len, al + 4920 al_entry_ofs, base_ni->attr_list_size - 4921 al_entry_ofs); 4922 al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)(tmp + 4923 al_entry_ofs); 4924 OSFree(base_ni->attr_list, base_ni->attr_list_alloc, 4925 ntfs_malloc_tag); 4926 base_ni->attr_list_alloc = new_al_alloc; 4927 base_ni->attr_list = tmp; 4928 } else if ((u8*)al_entry < base_ni->attr_list + 4929 base_ni->attr_list_size) 4930 memmove((u8*)al_entry + al_entry_len, al_entry, 4931 base_ni->attr_list_size - ((u8*)al_entry - 4932 base_ni->attr_list)); 4933 base_ni->attr_list_size = new_al_size; 4934 /* Set up the attribute extent and the attribute list entry. */ 4935 al_entry->type = a->type = ni->type; 4936 al_entry->length = cpu_to_le16(al_entry_len); 4937 a->non_resident = 1; 4938 al_entry->name_length = a->name_length = ni->name_len; 4939 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, 4940 compressed_size)); 4941 al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name); 4942 al_entry->instance = a->instance = m->next_attr_instance; 4943 /* 4944 * Increment the next attribute instance number in the mft record as we 4945 * consumed the old one. 4946 */ 4947 m->next_attr_instance = cpu_to_le16( 4948 (le16_to_cpu(m->next_attr_instance) + 1) & 0xffff); 4949 al_entry->lowest_vcn = a->lowest_vcn = cpu_to_sle64(stop_vcn); 4950 a->highest_vcn = cpu_to_sle64(highest_vcn); 4951 al_entry->mft_reference = MK_LE_MREF(eni->mft_no, eni->seq_no); 4952 a->mapping_pairs_offset = cpu_to_le16(mp_ofs); 4953 /* Copy the attribute name into place. */ 4954 if (name_size) { 4955 memcpy((u8*)a + offsetof(ATTR_RECORD, compressed_size), 4956 ni->name, name_size); 4957 memcpy(&al_entry->name, ni->name, name_size); 4958 } 4959 /* For tidyness, zero out the unused space. */ 4960 if (al_entry_len > offsetof(ATTR_LIST_ENTRY, name) + name_size) 4961 memset((u8*)al_entry + offsetof(ATTR_LIST_ENTRY, name) + 4962 name_size, 0, al_entry_len - 4963 (offsetof(ATTR_LIST_ENTRY, name) + name_size)); 4964 /* 4965 * Extend the attribute list attribute and copy in the modified value 4966 * from the cache. 4967 */ 4968 err = ntfs_attr_list_sync_extend(base_ni, base_m, 4969 (u8*)al_entry - base_ni->attr_list, ctx); 4970 if (err || ctx->is_error) { 4971 /* 4972 * If @ctx->is_error indicates error this is fatal as we cannot 4973 * build the mapping pairs array into it as it is not mapped. 4974 * 4975 * However, we may still be able to recover from this situation 4976 * by freeing the extent mft record and thus deleting the 4977 * attribute record. This only works when this is the only 4978 * attribute record in the mft record and when we just created 4979 * this extent attribute record. We can easily determine if 4980 * this is the only attribute in the mft record by scanning 4981 * through the cached attribute list attribute. 4982 */ 4983 if (!err) 4984 err = ctx->error; 4985 ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error %d).", 4986 ctx->is_error ? "remap extent mft record of" : 4987 "extend and sync attribute list attribute to", 4988 (unsigned long long)base_ni->mft_no, err); 4989 goto undo4; 4990 } 4991 /* 4992 * Finally, proceed to building the mapping pairs array into the 4993 * attribute record. 4994 */ 4995 err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 4996 le32_to_cpu(a->length) - mp_ofs, rl, stop_vcn, 4997 highest_vcn, &stop_vcn); 4998 if (err && err != ENOSPC) { 4999 ntfs_error(vol->mp, "Failed to rebuild mapping pairs array " 5000 "(error %d).", err); 5001 goto undo5; 5002 } 5003 /* 5004 * We must have fully rebuilt the mapping pairs array as we made sure 5005 * there is enough space. 5006 */ 5007 if (err || stop_vcn != highest_vcn + 1) 5008 panic("%s(): err || stop_vcn != highest_vcn + 1\n", 5009 __FUNCTION__); 5010 /* 5011 * If the attribute extent is in an extent mft record mark it dirty so 5012 * it gets written back and unmap the extent mft record so we can map 5013 * the mft record containing the base extent again. 5014 */ 5015 if (eni != base_ni) { 5016 NInoSetMrecNeedsDirtying(eni); 5017 ntfs_extent_mft_record_unmap(eni); 5018 /* Make the search context safe. */ 5019 ctx->ni = base_ni; 5020 } 5021 /* 5022 * Look up the base attribute extent again so we restore the search 5023 * context as the caller expects it to be. 5024 */ 5025 ntfs_attr_search_ctx_reinit(ctx); 5026 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 5027 ctx); 5028 if (err) { 5029 ntfs_error(vol->mp, "Re-lookup of first attribute extent " 5030 "failed (error %d).", err); 5031 if (err == ENOENT) 5032 err = EIO; 5033 goto undo6; 5034 } 5035done: 5036 ntfs_debug("Done."); 5037 return 0; 5038// TODO: HERE: 5039undo6: 5040undo5: 5041undo4: 5042undo3: 5043undo2: 5044undo1: 5045 panic("%s(): TODO!\n", __FUNCTION__); 5046 return err; 5047#endif 5048} 5049 5050/** 5051 * ntfs_attr_sparse_clear - switch an attribute to not be sparse any more 5052 * @base_ni: base ntfs inode to which the attribute belongs 5053 * @ni: ntfs inode of attribute which to cause not to be sparse 5054 * @ctx: attribute search context describing the attribute to work on 5055 * 5056 * Switch the sparse attribute described by @ni and @ctx belonging to the base 5057 * ntfs inode @base_ni to not be sparse any more. 5058 * 5059 * This function cannot fail. 5060 */ 5061static void ntfs_attr_sparse_clear(ntfs_inode *base_ni, ntfs_inode *ni, 5062 ntfs_attr_search_ctx *ctx) 5063{ 5064 ATTR_RECORD *a; 5065 5066 a = ctx->a; 5067 /* 5068 * We should only be called for sparse, non-resident, $DATA attributes. 5069 */ 5070 if (a->type != AT_DATA || !NInoNonResident(ni) || !a->non_resident || 5071 !NInoSparse(ni) || !(a->flags & ATTR_IS_SPARSE)) 5072 panic("%s(): a->type != AT_DATA || !NInoNonResident(ni) || " 5073 "!a->non_resident || !NInoSparse(ni) || " 5074 "!(a->flags & ATTR_IS_SPARSE)\n", __FUNCTION__); 5075 /* 5076 * If the attribute is not compressed we need to remove the compressed 5077 * size from the attribute record and to switch all relevant fields to 5078 * match. 5079 */ 5080 if (!NInoCompressed(ni)) { 5081 errno_t err; 5082 5083 if (a->flags & ATTR_IS_COMPRESSED) 5084 panic("%s(): a->flags & ATTR_IS_COMPRESSED)\n", 5085 __FUNCTION__); 5086 /* 5087 * Move everything after the compressed size forward to the 5088 * offset of the compressed size thus deleting the compressed 5089 * size. 5090 */ 5091 memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size), 5092 (u8*)a + offsetof(ATTR_RECORD, 5093 compressed_size) + sizeof(a->compressed_size), 5094 le32_to_cpu(a->length) - (offsetof(ATTR_RECORD, 5095 compressed_size) + sizeof(a->compressed_size))); 5096 /* 5097 * Update the name offset and the mapping pairs offset to match 5098 * the moved data. If there is no name then set the name 5099 * offset to the correct position instead of subtracting from a 5100 * potentially incorrect value. 5101 */ 5102 if (!a->name_length) 5103 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, 5104 compressed_size)); 5105 else 5106 a->name_offset = cpu_to_le16( 5107 le16_to_cpu(a->name_offset) - 5108 sizeof(a->compressed_size)); 5109 a->mapping_pairs_offset = cpu_to_le16( 5110 le16_to_cpu(a->mapping_pairs_offset) - 5111 sizeof(a->compressed_size)); 5112 /* Set the compression unit to 0. */ 5113 a->compression_unit = 0; 5114 lck_spin_lock(&ni->size_lock); 5115 ni->compressed_size = 0; 5116 lck_spin_unlock(&ni->size_lock); 5117 /* Clear the other related fields. */ 5118 ni->compression_block_size = 0; 5119 ni->compression_block_clusters = 5120 ni->compression_block_size_shift = 0; 5121 /* 5122 * Finally shrink the attribute record to reflect the removal 5123 * of the compressed size. Note, this cannot fail since we are 5124 * making the attribute smaller thus by definition there is 5125 * enough space to do so. 5126 */ 5127 err = ntfs_attr_record_resize(ctx->m, a, 5128 le32_to_cpu(a->length) - 5129 sizeof(a->compressed_size)); 5130 if (err) 5131 panic("%s(): err\n", __FUNCTION__); 5132 } 5133 /* Mark both the attribute and the ntfs inode as non-sparse. */ 5134 a->flags &= ~ATTR_IS_SPARSE; 5135 NInoClearSparse(ni); 5136 /* 5137 * If this is the unnamed $DATA attribute, need to clear the sparse 5138 * flag in the standard information attribute and in the directory 5139 * entries, too. 5140 */ 5141 if (ni == base_ni) { 5142 ni->file_attributes &= ~FILE_ATTR_SPARSE_FILE; 5143 NInoSetDirtyFileAttributes(ni); 5144 } 5145} 5146 5147/** 5148 * ntfs_attr_instantiate_holes - instantiate the holes in an attribute region 5149 * @ni: ntfs inode of the attribute whose holes to instantiate 5150 * @start: start offset in bytes at which to begin instantiating holes 5151 * @end: end offset in bytes at which to stop instantiating holes 5152 * @new_end: return the offset at which we stopped instantiating holes 5153 * @atomic: if true must complete the entire exension or abort 5154 * 5155 * Scan the runlist (mapping any unmapped fragments as needed) starting at byte 5156 * offset @start into the attribute described by the ntfs inode @ni and 5157 * finishing at byte offset @end and instantiate any sparse regions located 5158 * between @start and @end with real clusters. 5159 * 5160 * Any clusters that are inside the initialized size are zeroed. 5161 * 5162 * If @atomic is true the whole instantiation must be complete so abort on 5163 * errors. If @atomic is false partial instantiations are acceptable (but we 5164 * still return an error if the instantiation is partial). In any case we set 5165 * *@new_end to the end of the instantiated range. Thus the caller has to 5166 * always check *@new_end. If *@new_end is equal to @end then the whole 5167 * instantiation was complete. If *@new_end is less than @end the 5168 * instantiation was partial. 5169 * 5170 * Note if @new_end is NULL, then @atomic is set to true as there is no way to 5171 * communicate to the caller that the hole instantiation was partial. 5172 * 5173 * Return 0 on success and errno on error. 5174 * 5175 * Locking: - Caller must hold @ni->lock on the inode for writing. 5176 * - The runlist @ni must be unlocked as it is taken for writing. 5177 */ 5178errno_t ntfs_attr_instantiate_holes(ntfs_inode *ni, s64 start, s64 end, 5179 s64 *new_end, BOOL atomic) 5180{ 5181#if 0 5182 VCN vcn, end_vcn; 5183 s64 allocated_size, initialized_size, compressed_size, len; 5184 ntfs_inode *base_ni; 5185 ntfs_volume *vol = ni->vol; 5186 ntfs_rl_element *rl; 5187 MFT_RECORD *base_m, *m; 5188 ntfs_attr_search_ctx *ctx; 5189 ATTR_RECORD *a; 5190 errno_t err, err2; 5191 BOOL write_locked; 5192 ntfs_runlist runlist; 5193#else 5194 ntfs_volume *vol = ni->vol; 5195 errno_t err; 5196#endif 5197 5198 err = 0; 5199 /* We should never be called for non-sparse attributes. */ 5200 if (!NInoSparse(ni)) 5201 panic("%s(): !NInoSparse(ni)\n", __FUNCTION__); 5202 /* We should never be called for resident attributes. */ 5203 if (!NInoNonResident(ni)) 5204 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 5205 /* We should only be called for $DATA attributes. */ 5206 if (ni->type != AT_DATA) 5207 panic("%s(): ni->type != AT_DATA\n", __FUNCTION__); 5208 /* Sanity check @start and @end. */ 5209 if (start >= end) 5210 panic("%s(): start >= end\n", __FUNCTION__); 5211 if (start & vol->cluster_size_mask || end & vol->cluster_size_mask) 5212 panic("%s(): start & vol->cluster_size_mask || " 5213 "end & vol->cluster_size_mask\n", __FUNCTION__); 5214 err = ENOTSUP; 5215 return err; 5216#if 0 5217 base_ni = ni; 5218 if (NInoAttr(ni)) 5219 base_ni = ni->base_ni; 5220 if (!new_end) 5221 atomic = TRUE; 5222 lck_rw_lock_shared(&ni->rl.lock); 5223 write_locked = FALSE; 5224 /* 5225 * We have to round down @start to the nearest page boundary and we 5226 * have to round up @end to the nearest page boundary for the cases 5227 * where the cluster size is smaller than the page size. It makes no 5228 * sense to instantiate only part of a page as a later pageout of the 5229 * dirty page would cause any sparse clusters inside the page to be 5230 * instantiated so we might as well do it now whilst we are 5231 * instantiating things. 5232 */ 5233 vcn = (start & ~PAGE_MASK_64) >> vol->cluster_size_shift; 5234 end_vcn = ((end + PAGE_MASK) & ~PAGE_MASK_64) >> 5235 vol->cluster_size_shift; 5236 /* Cache the sizes for the attribute so we take the size lock once. */ 5237 lck_spin_lock(&ni->size_lock); 5238 allocated_size = ni->allocated_size; 5239 initialized_size = ni->initialized_size; 5240 compressed_size = ni->compressed_size; 5241 lck_spin_unlock(&ni->size_lock); 5242 /* 5243 * We have to make sure that we stay within the existing allocated 5244 * size when instantiating holes as it would corrupt the attribute if 5245 * we were to extend the runlist beyond the allocated size. And our 5246 * rounding up of @end above could have caused us to go above the 5247 * allocated size so fix this up now. 5248 */ 5249 if (end_vcn > allocated_size >> vol->cluster_size_shift) 5250 end_vcn = allocated_size >> vol->cluster_size_shift; 5251retry_remap: 5252 rl = ni->rl.rl; 5253 if (!ni->rl.elements || vcn < rl->vcn || !rl->length) { 5254map_vcn: 5255 if (!write_locked) { 5256 write_locked = TRUE; 5257 if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) { 5258 lck_rw_lock_exclusive(&ni->rl.lock); 5259 goto retry_remap; 5260 } 5261 } 5262 /* Need to map the runlist fragment containing @vcn. */ 5263 err = ntfs_map_runlist_nolock(ni, vcn, NULL); 5264 if (err) { 5265 ntfs_error(vol->mp, "Failed to map runlist fragment " 5266 "(error %d).", err); 5267 if (err == EINVAL) 5268 err = EIO; 5269 goto err; 5270 } 5271 rl = ni->rl.rl; 5272 if (!ni->rl.elements || vcn < rl->vcn || !rl->length) 5273 panic("%s(): !ni->rl.elements || vcn < rl[0].vcn || " 5274 "!rl->length\n", __FUNCTION__); 5275 } 5276 do { 5277 VCN lowest_vcn, highest_vcn, stop_vcn; 5278 ntfs_rl_element *rl2; 5279 unsigned mp_size, mp_ofs; 5280 5281 /* Seek to the runlist element containing @vcn. */ 5282 while (rl->length && vcn >= rl[1].vcn) 5283 rl++; 5284 /* 5285 * Seek to the first sparse run or to the end of the region we 5286 * are interested in. 5287 */ 5288 while (rl->length && rl->lcn >= 0 && vcn < end_vcn) { 5289 rl++; 5290 vcn = rl->vcn; 5291 } 5292 /* 5293 * If there are no sparse runs (left) in the region of interest 5294 * we are done. 5295 */ 5296 if (vcn >= end_vcn) { 5297 vcn = end_vcn; 5298 break; 5299 } 5300 /* 5301 * If this run is not mapped map it now and start again as the 5302 * runlist will have been updated. 5303 */ 5304 if (rl->lcn == LCN_RL_NOT_MAPPED) 5305 goto map_vcn; 5306 /* If this run is not valid abort with an error. */ 5307 if (!rl->length || rl->lcn < LCN_HOLE) { 5308 ntfs_error(vol->mp, "Runlist is corrupt. Unmount and " 5309 "run chkdsk."); 5310 NVolSetErrors(vol); 5311 err = EIO; 5312 goto err; 5313 } 5314 /* 5315 * This run is sparse thus we need to instantiate it for which 5316 * we need to hold the runlist lock for writing. 5317 */ 5318 if (!write_locked) { 5319 write_locked = TRUE; 5320 if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) { 5321 lck_rw_lock_exclusive(&ni->rl.lock); 5322 goto retry_remap; 5323 } 5324 } 5325 /* 5326 * Make sure that we do not instantiate past @end_vcn as would 5327 * otherwise happen when the hole goes past @end_vcn. 5328 */ 5329 len = rl[1].vcn - vcn; 5330 if (rl[1].vcn > end_vcn) 5331 len = end_vcn - vcn; 5332// TODO: HERE: 5333 /* 5334 * If the entire run lies outside the initialized size we do 5335 * not need to do anything other than instantiating the hole 5336 * with real clusters. 5337 * 5338 * If part of the run (or the whole run) lies inside the 5339 * initialized size we need to zero the clusters in memory and 5340 * mark the pages dirty so they get written out later in 5341 * addition to instantiating the hole with real clusters. 5342 * 5343 * The need for zeroing causes two potential problems. The 5344 * first problem is that if the run being instantiated is very 5345 * large we could run out of memory due to us holding both the 5346 * inode lock and the runlist lock for writing so all the dirty 5347 * pages we create/release back to the VM cannot be paged out 5348 * until we release the locks and the second problem is that if 5349 * the cluster size is less than the page size we can encounter 5350 * partially sparse pages and if they are not already cached by 5351 * the VM we have to page them in. But to do so we have to not 5352 * hold the runlist lock for writing. We have two ways out of 5353 * this situation. Either we have to drop and re-acquire the 5354 * runlist lock around paging in such pages (with restarting 5355 * everything each time because we had dropped the lock) or we 5356 * have to read the non-sparse clusters in by hand using an 5357 * enhanced ntfs_rl_read() or even by calling buf_meta_bread() 5358 * directly. 5359 * 5360 * FIXME: We ignore the first problem for now until the code is 5361 * working and we can test it. The solution is probably to 5362 * break the work into chunks of a fixed size and the allocate 5363 * only enough clusters to complete the current chunk then 5364 * merge that with the runlist, dirty all corresponding pages, 5365 * then drop the locks to allow the pages to be written if 5366 * needed and then take the locks again and start again with 5367 * the next chunk. This does have one nasty side effect and 5368 * that is that whilst the locks are dropped a concurrent 5369 * process could do nasty things to the inode including 5370 * truncate our carefully allocated pages by shrinking the file 5371 * so a lot of sanity checking after re-taking the locks will 5372 * be needed. Alternatively perhaps we need to hold the inode 5373 * lock shared throughout this function so dropping the 5374 * runlist lock would be sufficient. We do not actually need 5375 * the inode lock for writing in this function as we do not 5376 * modify any of the inode sizes and the runlist lock will 5377 * protect us sufficiently from everything. 5378 * 5379 * FIXME: We also ignore the second problem for now and abort 5380 * if it bites us, again until the code is working and we can 5381 * test it. 5382 */ 5383 /* 5384 * Seek back to the last real LCN so we can try and extend the 5385 * hole at that LCN so the instantiated clusters are at least 5386 * in close proximity to the other data in the attribute. 5387 */ 5388 rl2 = rl; 5389 while (rl2->lcn < 0 && rl2 > ni->rl.rl) 5390 rl2--; 5391 runlist.rl = NULL; 5392 runlist.alloc = runlist.elements = 0; 5393 err = ntfs_cluster_alloc(vol, vcn, len, 5394 (rl2->lcn >= 0) ? rl2->lcn + rl2->length : -1, 5395 DATA_ZONE, FALSE, &runlist); 5396 if (err) { 5397 if (err != ENOSPC) 5398 ntfs_error(vol->mp, "Failed to allocate " 5399 "clusters (error %d).", err); 5400 goto err; 5401 } 5402// TODO: HERE: 5403 /* 5404 * If the instantiated hole starts before the initialized size 5405 * we need to zero it. 5406 * 5407 * FIXME: For now we do it in the most stupid way possible and 5408 * that is to synchronously write zeroes to disk via the device 5409 * hosting the volume. That way we get around our issues and 5410 * problems with the UBC and small/large cluster sizes. This 5411 * way if there is dirty data in the UBC it will still get 5412 * written on top of the zeroing we are now doing. Ordering is 5413 * guaranteed as no-one knows about the allocated clusters yet 5414 * as we have not merged the runlists yet. 5415 * 5416 * FIXME: TODO: It may be worth restricting ntfs_rl_set() to 5417 * only operate up to the initialized size as it could 5418 * otherwise do a lot of unneeded extra work. 5419 */ 5420 if (vcn << vol->cluster_size_shift < initialized_size) { 5421 ntfs_debug("Zeroing instantiated hole inside the " 5422 "initialized size."); 5423 if (!runlist.elements || !runlist.alloc) 5424 panic("%s(): !runlist.elements || " 5425 "!runlist.alloc\n", 5426 __FUNCTION__); 5427 err = ntfs_rl_set(vol, runlist.rl, 0); 5428 if (err) { 5429 ntfs_error(vol->mp, "Failed to zero newly " 5430 "allocated space (error %d).", 5431 err); 5432 goto undo_alloc; 5433 } 5434 } 5435 err = ntfs_rl_merge(&ni->rl, &runlist); 5436 if (err) { 5437 ntfs_error(vol->mp, "Failed to merge runlists (error " 5438 "%d).", err); 5439 goto undo_alloc; 5440 } 5441 /* 5442 * The runlist may have been reallocated so @rl needs to be 5443 * reset back to the beginning. 5444 */ 5445 rl = ni->rl.rl; 5446 /* 5447 * Need to update the mapping pairs array of the attribute. We 5448 * cannot postpone this till the end (which would be much more 5449 * efficient) because we could run out of space on the volume 5450 * when trying to update the mapping pairs array and then we 5451 * would not be able to roll back to the previous state because 5452 * we would not know which bits of the runlist are new and 5453 * which are old. Doing it now means that if we get an error 5454 * we still know the starting and ending VCNs of the run we 5455 * instantiated so we can punch the clusters out again thus 5456 * restoring the original hole. 5457 */ 5458 err = ntfs_mft_record_map(base_ni, &base_m); 5459 if (err) { 5460 ntfs_error(vol->mp, "Failed to map mft_no 0x%llx " 5461 "(error %d).", 5462 (unsigned long long)base_ni->mft_no, 5463 err); 5464 goto undo_merge; 5465 } 5466 ctx = ntfs_attr_search_ctx_get(base_ni, base_m); 5467 if (!ctx) { 5468 ntfs_error(vol->mp, "Failed to allocate attribute " 5469 "search context."); 5470 err = ENOMEM; 5471 goto unm_err; 5472 } 5473 /* 5474 * Get the base attribute record so we can update the 5475 * compressed size or so we can switch the attribute to not be 5476 * sparse any more if we just filled the last hole. 5477 */ 5478 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 5479 NULL, 0, ctx); 5480 if (err) { 5481 ntfs_error(vol->mp, "Failed to lookup base attribute " 5482 "extent in mft_no 0x%llx (error %d).", 5483 (unsigned long long)base_ni->mft_no, 5484 err); 5485 goto put_err; 5486 } 5487 m = ctx->m; 5488 a = ctx->a; 5489 /* 5490 * We added @len clusters thus the compressed size grows by 5491 * that many clusters whilst the allocated size does not change 5492 * as we have not extended the attribute. 5493 */ 5494 compressed_size += len << vol->cluster_size_shift; 5495 /* 5496 * Determine whether the attribute is still sparse by comparing 5497 * the new compressed size to the allocated size. If the two 5498 * have now become the same the attribute is no longer sparse. 5499 */ 5500 if (compressed_size >= allocated_size) { 5501 if (compressed_size != allocated_size) 5502 panic("%s(): compressed_size != " 5503 "allocated_size\n", 5504 __FUNCTION__); 5505 /* Switch the attribute to not be sparse any more. */ 5506 ntfs_attr_sparse_clear(base_ni, ni, ctx); 5507 } 5508 /* 5509 * If the attribute is (still) sparse or compressed, need to 5510 * update the compressed size. 5511 */ 5512 if (NInoSparse(ni) || NInoCompressed(ni)) { 5513 lck_spin_lock(&ni->size_lock); 5514 ni->compressed_size = compressed_size; 5515 a->compressed_size = cpu_to_sle64(compressed_size); 5516 lck_spin_unlock(&ni->size_lock); 5517 } 5518 /* 5519 * If this is the unnamed $DATA attribute also need to update 5520 * the sizes in the directory entries pointing to this inode. 5521 */ 5522 if (ni == base_ni) 5523 NInoSetDirtySizes(ni); 5524 /* 5525 * If the VCN we started allocating at is not in the base 5526 * attribute record get the attribute record containing it so 5527 * we can update the mapping pairs array. 5528 */ 5529 if (vcn > sle64_to_cpu(a->highest_vcn)) { 5530 /* Ensure the modified mft record is written out. */ 5531 NInoSetMrecNeedsDirtying(ctx->ni); 5532 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 5533 vcn, NULL, 0, ctx); 5534 if (err) { 5535 ntfs_error(vol->mp, "Failed to lookup " 5536 "attribute extent in mft_no " 5537 "0x%llx (error %d).", 5538 (unsigned long long) 5539 base_ni->mft_no, err); 5540 a = NULL; 5541 goto undo_sparse; 5542 } 5543 a = ctx->a; 5544 } 5545 /* 5546 * Get the size for the new mapping pairs array for this 5547 * attribute extent. 5548 */ 5549 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 5550 /* 5551 * Get the runlist element containing the lowest vcn. 5552 * 5553 * This cannot fail as we know the runlist is ok and the 5554 * runlist fragment containing the lowest vcn is mapped. 5555 */ 5556 rl2 = ntfs_rl_find_vcn_nolock(rl, lowest_vcn); 5557 if (!rl2) 5558 panic("%s(): Memory corruption detected.\n", 5559 __FUNCTION__); 5560 err = ntfs_get_size_for_mapping_pairs(vol, rl2, lowest_vcn, 5561 highest_vcn, &mp_size); 5562 if (err) { 5563 ntfs_error(vol->mp, "Failed to get size for mapping " 5564 "pairs array (error %d).", err); 5565 goto undo_sparse; 5566 } 5567 mp_ofs = le16_to_cpu(a->mapping_pairs_offset); 5568retry_attr_rec_resize: 5569 /* 5570 * Extend the attribute record to fit the bigger mapping pairs 5571 * array. 5572 */ 5573 err = ntfs_attr_record_resize(m, a, mp_size + mp_ofs); 5574 if (!err) 5575 goto build_mpa; 5576 if (err != ENOSPC) 5577 panic("%s(): err != ENOSPC\n", __FUNCTION__); 5578 /* 5579 * There is not enough space in the mft record. 5580 * 5581 * We need to add an attribute list attribute if it is not 5582 * already present. 5583 */ 5584 if (!NInoAttrList(base_ni)) { 5585 err = ntfs_attr_list_add(base_ni, base_m, ctx); 5586 if (err || ctx->is_error) { 5587 if (!err) 5588 err = ctx->error; 5589 ntfs_error(vol->mp, "Failed to %s mft_no " 5590 "0x%llx (error %d).", 5591 ctx->is_error ? 5592 "remap extent mft record of" : 5593 "add attribute list attribute " 5594 "to", (unsigned long long) 5595 base_ni->mft_no, err); 5596 goto undo1; 5597 } 5598 /* 5599 * The attribute location will have changed so update 5600 * it from the search context. 5601 */ 5602 m = ctx->m; 5603 a = ctx->a; 5604 /* 5605 * Retry the original attribute record resize as we may 5606 * now have enough space to create the needed mapping 5607 * pairs array in the moved attribute record. 5608 * 5609 * This can for example happen when the attribute was 5610 * moved out to an extent mft record which has much 5611 * more free space than the base mft record had. 5612 */ 5613 goto retry_attr_rec_resize; 5614 } 5615 /* 5616 * If this is not the only attribute record in the mft record 5617 * then move it out to a new extent mft record which will allow 5618 * the attribute record to grow larger thus reducing the total 5619 * number of extent attribute records needed to a minimum. 5620 */ 5621 if (!ntfs_attr_record_is_only_one(m, a)) { 5622 lck_rw_lock_shared(&base_ni->attr_list_rl.lock); 5623 err = ntfs_attr_record_move(ctx); 5624 lck_rw_unlock_shared(&base_ni->attr_list_rl.lock); 5625 if (err) { 5626 ntfs_error(vol->mp, "Failed to move attribute " 5627 "extent from mft record " 5628 "0x%llx to an extent mft " 5629 "record (error %d).", 5630 (unsigned long long) 5631 ctx->ni->mft_no, err); 5632 /* 5633 * We could try to remove the attribute list 5634 * attribute if we added it above but this 5635 * would probably require attributes to be 5636 * moved back into the base mft record from 5637 * extent mft records so is a lot of work and 5638 * given we are in an error code path and given 5639 * that it is ok to just leave the inode with 5640 * an attribute list attribute we do not bother 5641 * and just bail out. 5642 */ 5643 goto undo1; 5644 } 5645 /* 5646 * The attribute location will have changed so update 5647 * it from the search context. 5648 */ 5649 m = ctx->m; 5650 a = ctx->a; 5651 /* 5652 * Retry the original attribute record resize as we may 5653 * now have enough space to create the mapping pairs 5654 * array in the moved attribute record. 5655 */ 5656 goto retry_attr_rec_resize; 5657 } 5658 max_size = (le32_to_cpu(m->bytes_allocated) - 5659 le32_to_cpu(m->bytes_in_use)) & ~7; 5660 max_size += le32_to_cpu(a->length) - mp_ofs; 5661 err = ntfs_attr_record_resize(m, a, max_size + mp_ofs); 5662 /* 5663 * We worked out the exact size we can extend to so the resize 5664 * cannot fail. 5665 */ 5666 if (err) 5667 panic("%s(): err (ntfs_attr_record_resize())\n", 5668 __FUNCTION__); 5669build_mpa: 5670// TODO: HERE... 5671 mp_rebuilt = TRUE; 5672 /* 5673 * Generate the mapping pairs array directly into the attribute 5674 * record. 5675 * 5676 * This cannot fail as we have already checked the size we need 5677 * to build the mapping pairs array. 5678 */ 5679 err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 5680 le32_to_cpu(a->length) - mp_ofs, rl2, 5681 lowest_vcn, highest_vcn, &stop_vcn); 5682 if (err && err != ENOSPC) { 5683 ntfs_error(vol->mp, "Cannot fill hole of mft_no " 5684 "0x%llx, attribute type 0x%x, because " 5685 "building the mapping pairs array " 5686 "failed (error %d).", 5687 (unsigned long long)ni->mft_no, 5688 (unsigned)le32_to_cpu(ni->type), err); 5689 err = EIO; 5690 /* 5691 * Need to set @a->highest_vcn to enable correct error 5692 * recovery. 5693 */ 5694// TODO: HERE... 5695 if (!is_first) 5696 a->highest_vcn = cpu_to_sle64(sle64_to_cpu( 5697 a->lowest_vcn) - 1); 5698 goto undo; 5699 } 5700 /* Update the highest_vcn. */ 5701 a->highest_vcn = cpu_to_sle64(stop_vcn - 1); 5702 /* Ensure the modified mft record is written out. */ 5703 NInoSetMrecNeedsDirtying(ctx->ni); 5704 /* 5705 * If the mapping pairs build succeeded, i.e. the current 5706 * attribute extent contains the whole runlist fragment, we are 5707 * done and can proceed to the next run. 5708 */ 5709 if (!err) 5710 goto next_run; 5711 /* 5712 * Partial mapping pairs update. This means we need to create 5713 * one or me new attribute extents to hold the remainder of the 5714 * mapping pairs. 5715 * 5716 * Get the size of the remaining mapping pairs array. 5717 */ 5718 rl2 = ntfs_rl_find_vcn_nolock(rl2, stop_vcn); 5719 if (!rl2) 5720 panic("%s(): !rl2 (stop_vcn)\n", __FUNCTION__); 5721 if (!rl2->length) 5722 panic("%s(): !rl2->length (stop_vcn)\n", __FUNCTION__); 5723 if (rl2->lcn < LCN_HOLE) 5724 panic("%s(): rl2->lcn < LCN_HOLE (stop_vcn)\n", 5725 __FUNCTION__); 5726 err = ntfs_get_size_for_mapping_pairs(vol, rl2, stop_vcn, 5727 highest_vcn, &mp_size); 5728 if (err) { 5729 ntfs_error(vol->mp, "Cannot complete filling of hole " 5730 "of mft_no 0x%llx, attribute type " 5731 "0x%x, because determining the size " 5732 "for the mapping pairs failed (error " 5733 "%d).", (unsigned long long)ni->mft_no, 5734 (unsigned)le32_to_cpu(ni->type), err); 5735 err = EIO; 5736// TODO: HERE... 5737 goto undo; 5738 } 5739 /* We only release extent mft records. */ 5740 if (ctx->ni != base_ni) 5741 ntfs_extent_mft_record_unmap(ctx->ni); 5742// TODO: I AM HERE... Need to allocate an extent mft record, add an extent 5743// attribute record to it filling it with remaining mapping pairs array fragment 5744// and creating an attribute list attribute entry for it. Then if still not 5745// reached highest_vcn, need to repeat the process again. 5746next_run: 5747 ntfs_attr_search_ctx_put(ctx); 5748 ntfs_mft_record_unmap(base_ni); 5749 /* 5750 * If the attribute is no longer sparse there are no more holes 5751 * to instantiate thus we are done with the whole region of 5752 * interest. 5753 */ 5754 if (!NInoSparse(ni)) { 5755 vcn = end_vcn; 5756 break; 5757 } 5758 /* 5759 * We allocated @len clusters starting at @vcn. Thus the next 5760 * VCN we need to look at is at @vcn + @len. 5761 */ 5762 vcn += len; 5763 } while (vcn < end_vcn); 5764 if (vcn > end_vcn) 5765 panic("%s(): vcn > end_vcn\n", __FUNCTION__); 5766 ntfs_debug("Done, new_end 0x%llx.", 5767 (unsigned long long)vcn << vol->cluster_size_shift); 5768err: 5769 if (new_end) 5770 *new_end = vcn << vol->cluster_size_shift; 5771 if (write_locked) 5772 lck_rw_unlock_exclusive(&ni->rl.lock); 5773 else 5774 lck_rw_unlock_shared(&ni->rl.lock); 5775 return err; 5776undo_alloc: 5777 err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, -1, NULL); 5778 if (err2) { 5779 ntfs_error(vol->mp, "Failed to release allocated cluster(s) " 5780 "in error code path (error %d). Run chkdsk " 5781 "to recover the lost space.", err2); 5782 NVolSetErrors(vol); 5783 } 5784 OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag); 5785 goto err; 5786undo_sparse: 5787 /* 5788 * If looking up an attribute extent failed or we are not in the base 5789 * attribute record need to look up the base attribute record. 5790 */ 5791 if (!a || a->lowest_vcn) { 5792 ntfs_attr_search_ctx_reinit(ctx); 5793 err2 = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 5794 NULL, 0, ctx); 5795 if (err2) { 5796 ntfs_error(vol->mp, "Failed to re-lookup base " 5797 "attribute record in error code path " 5798 "(error %d). Leaving inconsistent " 5799 "metadata. Unmount and run chkdsk.", 5800 err2); 5801 NVolSetErrors(vol); 5802 goto put_err; 5803 } 5804 a = ctx->a; 5805 } 5806 /* 5807 * If we caused the attribute to no longer be sparse we need to make it 5808 * sparse again. 5809 */ 5810 if (!NInoSparse(ni)) { 5811 err2 = ntfs_attr_sparse_set(base_ni, ni, ctx); 5812 if (err2) { 5813 ntfs_error(vol->mp, "Failed to re-set the attribute " 5814 "to be sparse in error code path " 5815 "(error %d). Leaving inconsistent " 5816 "metadata. Unmount and run chkdsk.", 5817 err2); 5818 NVolSetErrors(vol); 5819 goto put_err; 5820 } 5821 /* 5822 * The attribute may have been moved to make space for the 5823 * compressed size so @a is now invalid. 5824 */ 5825 a = ctx->a; 5826 } 5827 /* Restore the compressed size to the old value. */ 5828 compressed_size -= len << vol->cluster_size_shift; 5829 lck_spin_lock(&ni->size_lock); 5830 ni->compressed_size = compressed_size; 5831 a->compressed_size = cpu_to_sle64(compressed_size); 5832 lck_spin_unlock(&ni->size_lock); 5833 /* Ensure the modified mft record is written out. */ 5834 NInoSetMrecNeedsDirtying(ctx->ni); 5835 if (ni == base_ni) 5836 NInoSetDirtySizes(ni); 5837put_err: 5838 ntfs_attr_search_ctx_put(ctx); 5839unm_err: 5840 ntfs_mft_record_unmap(base_ni); 5841undo_merge: 5842 /* Free the clusters we allocated. */ 5843 err2 = ntfs_cluster_free_from_rl(vol, rl, vcn, len, NULL); 5844 if (err2) { 5845 ntfs_error(vol->mp, "Failed to release allocated cluster(s) " 5846 "in error code path (error %d). Unmount and " 5847 "run chkdsk to recover the lost space.", err2); 5848 NVolSetErrors(vol); 5849 } 5850 /* Punch the original hole back into the runlist. */ 5851 err2 = ntfs_rl_punch_nolock(vol, &ni->rl, vcn, len); 5852 if (err2) { 5853 ntfs_error(vol->mp, "Failed to restore hole in error code " 5854 "path in error code path (error %d). Leaving " 5855 "inconsistent metadata. Unmount and run " 5856 "chkdsk.", err2); 5857 NVolSetErrors(vol); 5858 } 5859 goto err; 5860undo1: 5861 panic("%s(): TODO\n", __FUNCTION__); 5862 return err; 5863#endif 5864} 5865 5866/** 5867 * ntfs_attr_extend_allocation - extend the allocated space of an attribute 5868 * @ni: ntfs inode of the attribute whose allocation to extend 5869 * @new_alloc_size: new size in bytes to which to extend the allocation to 5870 * @new_data_size: new size in bytes to which to extend the data to 5871 * @data_start: beginning of region which is required to be non-sparse 5872 * @ictx: index context 5873 * @dst_alloc_size: if not NULL, this pointer is set to the allocated size 5874 * @atomic: if true must complete the entire exension or abort 5875 * 5876 * Extend the allocated space of an attribute described by the ntfs inode @ni 5877 * to @new_alloc_size bytes. If @data_start is -1, the whole extension may be 5878 * implemented as a hole in the file (as long as both the volume and the ntfs 5879 * inode @ni have sparse support enabled). If @data_start is >= 0, then the 5880 * region between the old allocated size and @data_start - 1 may be made sparse 5881 * but the regions between @data_start and @new_alloc_size must be backed by 5882 * actual clusters. 5883 * 5884 * If @new_data_size is -1, it is ignored. If it is >= 0, then the data size 5885 * of the attribute is extended to @new_data_size and the UBC size of the VFS 5886 * vnode is updated to match. 5887 * WARNING: It is a bug for @new_data_size to be smaller than the old data size 5888 * as well as for @new_data_size to be greater than @new_alloc_size. 5889 * 5890 * If @ictx is not NULL, the extension is for an index allocation or bitmap 5891 * attribute extension. In this case, if there is not enough space in the mft 5892 * record for the extended index allocation/bitmap attribute, the index root is 5893 * moved to an index block if it is not empty to create more space in the mft 5894 * record. NOTE: At present @ictx is only set when the attribute being resized 5895 * is non-resident. 5896 * 5897 * If @atomic is true only return success if the entire extension is complete. 5898 * If only a partial extension is possible abort with an appropriate error. If 5899 * @atomic is false partial extensions are acceptable in certain circumstances 5900 * (see below). 5901 * 5902 * For resident attributes extending the allocation involves resizing the 5903 * attribute record and if necessary moving it and/or other attributes into 5904 * extent mft records and/or converting the attribute to a non-resident 5905 * attribute which in turn involves extending the allocation of a non-resident 5906 * attribute as described below. 5907 * 5908 * For non-resident attributes this involves allocating clusters in the data 5909 * zone on the volume (except for regions that are being made sparse) and 5910 * extending the run list to describe the allocated clusters as well as 5911 * updating the mapping pairs array of the attribute. This in turn involves 5912 * resizing the attribute record and if necessary moving it and/or other 5913 * attributes into extent mft records and/or splitting the attribute record 5914 * into multiple extent attribute records. 5915 * 5916 * Also, the attribute list attribute is updated if present and in some of the 5917 * above cases (the ones where extent mft records/attributes come into play), 5918 * an attribute list attribute is created if not already present. 5919 * 5920 * Return 0 on success and errno on error. 5921 * 5922 * In the case that an error is encountered but a partial extension at least up 5923 * to @data_start (if present) is possible, the allocation is partially 5924 * extended and success is returned. If @data_start is -1 then partial 5925 * allocations are not performed. 5926 * 5927 * If @dst_alloc_size is not NULL, then *@dst_alloc_size is set to the new 5928 * allocated size when the ntfs_attr_extend_allocation() returns success. If 5929 * an error is returned *@dst_alloc_size is undefined. This is useful so that 5930 * the caller has a simple way of checking whether or not the allocation was 5931 * partial. 5932 * 5933 * Thus if @data_start is not -1 the caller should supply @dst_alloc_size and 5934 * then compare *@dst_alloc_size to @new_alloc_size to determine if the 5935 * allocation was partial. And if @data_start is -1 there is no point in 5936 * supplying @dst_alloc_size as *@dst_alloc_size will always be equal to 5937 * @new_alloc_size. 5938 * 5939 * Locking: - Caller must hold @ni->lock on the inode for writing. 5940 * - The runlist @ni must be unlocked as it is taken for writing. 5941 */ 5942errno_t ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size, 5943 const s64 new_data_size, const s64 data_start, 5944 ntfs_index_context *ictx, s64 *dst_alloc_size, 5945 const BOOL atomic) 5946{ 5947 VCN vcn, lowest_vcn, stop_vcn; 5948 s64 start, ll, old_alloc_size, alloc_size, alloc_start, alloc_end; 5949 s64 nr_allocated, nr_freed; 5950 ntfs_volume *vol = ni->vol; 5951 ntfs_inode *base_ni; 5952 MFT_RECORD *base_m, *m; 5953 ATTR_RECORD *a; 5954 ntfs_attr_search_ctx *actx; 5955 ntfs_rl_element *rl; 5956 unsigned attr_len, arec_size, name_size, mp_size, mp_ofs, max_size; 5957 unsigned al_entry_len, new_al_alloc; 5958 errno_t err, err2; 5959 BOOL is_sparse, is_first, mp_rebuilt, al_entry_added; 5960 ntfs_runlist runlist; 5961 5962 start = data_start; 5963#ifdef DEBUG 5964 lck_spin_lock(&ni->size_lock); 5965 old_alloc_size = ni->allocated_size; 5966 lck_spin_unlock(&ni->size_lock); 5967 ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, " 5968 "old_allocated_size 0x%llx, " 5969 "new_allocated_size 0x%llx, new_data_size 0x%llx, " 5970 "data_start 0x%llx.", (unsigned long long)ni->mft_no, 5971 (unsigned)le32_to_cpu(ni->type), 5972 (unsigned long long)old_alloc_size, 5973 (unsigned long long)new_alloc_size, 5974 (unsigned long long)new_data_size, 5975 (unsigned long long)start); 5976#endif 5977 /* This cannot be called for the attribute list attribute. */ 5978 if (ni->type == AT_ATTRIBUTE_LIST) 5979 panic("%s(): ni->type == AT_ATTRIBUTE_LIST\n", __FUNCTION__); 5980 name_size = ni->name_len * sizeof(ntfschar); 5981 base_ni = ni; 5982 if (NInoAttr(ni)) 5983 base_ni = ni->base_ni; 5984 is_first = TRUE; 5985retry_extend: 5986 /* 5987 * For non-resident attributes, @start and @new_size need to be aligned 5988 * to cluster boundaries for allocation purposes. 5989 */ 5990 if (NInoNonResident(ni)) { 5991 if (start > 0) 5992 start &= ~(s64)vol->cluster_size_mask; 5993 new_alloc_size = (new_alloc_size + vol->cluster_size - 1) & 5994 ~(s64)vol->cluster_size_mask; 5995 } 5996 if (new_data_size >= 0 && new_data_size > new_alloc_size) 5997 panic("%s(): new_data_size >= 0 && new_data_size > " 5998 "new_alloc_size\n", __FUNCTION__); 5999 /* Check if new size is allowed in $AttrDef. */ 6000 err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size); 6001 if (err) { 6002 /* Only emit errors when the write will fail completely. */ 6003 lck_spin_lock(&ni->size_lock); 6004 old_alloc_size = ni->allocated_size; 6005 lck_spin_unlock(&ni->size_lock); 6006 if (start < 0 || start >= old_alloc_size) { 6007 if (err == ERANGE) { 6008 ntfs_error(vol->mp, "Cannot extend allocation " 6009 "of mft_no 0x%llx, attribute " 6010 "type 0x%x, because the new " 6011 "allocation would exceed the " 6012 "maximum allowed size for " 6013 "this attribute type.", 6014 (unsigned long long)ni->mft_no, 6015 (unsigned) 6016 le32_to_cpu(ni->type)); 6017 } else { 6018 ntfs_error(vol->mp, "Cannot extend allocation " 6019 "of mft_no 0x%llx, attribute " 6020 "type 0x%x, because this " 6021 "attribute type is not " 6022 "defined on the NTFS volume. " 6023 "Possible corruption! You " 6024 "should run chkdsk!", 6025 (unsigned long long)ni->mft_no, 6026 (unsigned) 6027 le32_to_cpu(ni->type)); 6028 } 6029 } 6030 /* Translate error code to be POSIX conformant for write(2). */ 6031 if (err == ERANGE) 6032 err = EFBIG; 6033 else 6034 err = EIO; 6035 return err; 6036 } 6037 /* 6038 * We will be modifying both the runlist (if non-resident) and the mft 6039 * record so lock them both down. 6040 */ 6041 lck_rw_lock_exclusive(&ni->rl.lock); 6042 err = ntfs_mft_record_map(base_ni, &base_m); 6043 if (err) { 6044 base_m = NULL; 6045 actx = NULL; 6046 goto err_out; 6047 } 6048 actx = ntfs_attr_search_ctx_get(base_ni, base_m); 6049 if (!actx) { 6050 err = ENOMEM; 6051 goto err_out; 6052 } 6053 lck_spin_lock(&ni->size_lock); 6054 alloc_size = ni->allocated_size; 6055 lck_spin_unlock(&ni->size_lock); 6056 /* 6057 * If non-resident, seek to the last extent. If resident, there is 6058 * only one extent, so seek to that. 6059 */ 6060 vcn = (NInoNonResident(ni) && alloc_size > 0) ? 6061 (alloc_size - 1) >> vol->cluster_size_shift : 0; 6062 /* 6063 * Abort if someone did the work whilst we waited for the locks. If we 6064 * just converted the attribute from resident to non-resident it is 6065 * likely that exactly this has happened already. We cannot quite 6066 * abort if we need to update the data size. 6067 */ 6068 if (new_alloc_size <= alloc_size) { 6069 ntfs_debug("Allocated size already exceeds requested size."); 6070 new_alloc_size = alloc_size; 6071 if (new_data_size < 0) 6072 goto done; 6073 /* 6074 * We want the first attribute extent so that we can update the 6075 * data size. 6076 */ 6077 vcn = 0; 6078 } 6079 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn, NULL, 0, 6080 actx); 6081 if (err) { 6082 if (err == ENOENT) 6083 err = EIO; 6084 goto err_out; 6085 } 6086 m = actx->m; 6087 a = actx->a; 6088 /* Use goto to reduce indentation. */ 6089 if (a->non_resident) 6090 goto do_non_resident_extend; 6091 if (NInoNonResident(ni)) 6092 panic("%s(): NInoNonResident(ni)\n", __FUNCTION__); 6093 /* 6094 * As things are now this function should never be called with an index 6095 * context for the resize of a resident attribute. 6096 */ 6097 if (ictx) 6098 panic("%s(): ictx\n", __FUNCTION__); 6099 /* The total length of the attribute value. */ 6100 attr_len = le32_to_cpu(a->value_length); 6101 /* 6102 * Extend the attribute record to be able to store the new attribute 6103 * size. ntfs_attr_record_resize() will not do anything if the size is 6104 * not changing. 6105 */ 6106 arec_size = (le16_to_cpu(a->value_offset) + new_alloc_size + 7) & ~7; 6107 if (arec_size < le32_to_cpu(m->bytes_allocated) - 6108 le32_to_cpu(m->bytes_in_use) && 6109 !ntfs_attr_record_resize(m, a, arec_size)) { 6110 /* The resize succeeded! */ 6111 if (new_data_size > attr_len) { 6112 if (!ubc_setsize(ni->vn, new_data_size)) { 6113 ntfs_error(vol->mp, "Failed to set size in " 6114 "UBC."); 6115 /* 6116 * This cannot fail as it is a shrinking 6117 * resize. 6118 */ 6119 lck_spin_lock(&ni->size_lock); 6120 err = ntfs_attr_record_resize(m, a, 6121 le16_to_cpu(a->value_offset) + 6122 ni->allocated_size); 6123 lck_spin_unlock(&ni->size_lock); 6124 if (err) 6125 panic("%s(): Failed to shrink " 6126 "resident attribute " 6127 "record (error %d)\n", 6128 __FUNCTION__, err); 6129 err = EIO; 6130 goto err_out; 6131 } 6132 /* Zero the extended attribute value. */ 6133 bzero((u8*)a + le16_to_cpu(a->value_offset) + attr_len, 6134 (u32)new_data_size - attr_len); 6135 lck_spin_lock(&ni->size_lock); 6136 ni->initialized_size = ni->data_size = new_data_size; 6137 a->value_length = cpu_to_le32((u32)new_data_size); 6138 } else 6139 lck_spin_lock(&ni->size_lock); 6140 ni->allocated_size = le32_to_cpu(a->length) - 6141 le16_to_cpu(a->value_offset); 6142 lck_spin_unlock(&ni->size_lock); 6143 if (new_data_size > attr_len) 6144 a->value_length = cpu_to_le32((u32)new_data_size); 6145 goto dirty_done; 6146 } 6147 /* 6148 * We have to drop all the locks so we can call 6149 * ntfs_attr_make_non_resident(). 6150 */ 6151 ntfs_attr_search_ctx_put(actx); 6152 ntfs_mft_record_unmap(base_ni); 6153 lck_rw_unlock_exclusive(&ni->rl.lock); 6154 /* 6155 * Not enough space in the mft record, try to make the attribute 6156 * non-resident and if successful restart the extension process. 6157 */ 6158 err = ntfs_attr_make_non_resident(ni); 6159 if (!err) 6160 goto retry_extend; 6161 /* 6162 * Could not make non-resident. If this is due to this not being 6163 * permitted for this attribute type try to make other attributes 6164 * non-resident and/or move this or other attributes out of the mft 6165 * record this attribute is in. Otherwise fail. 6166 */ 6167 if (err != EPERM) { 6168 if (err != ENOSPC) { 6169 /* 6170 * Only emit errors when the write will fail 6171 * completely. 6172 */ 6173 lck_spin_lock(&ni->size_lock); 6174 old_alloc_size = ni->allocated_size; 6175 lck_spin_unlock(&ni->size_lock); 6176 if (start < 0 || start >= old_alloc_size) 6177 ntfs_error(vol->mp, "Cannot extend allocation " 6178 "of mft_no 0x%llx, attribute " 6179 "type 0x%x, because the " 6180 "conversion from resident to " 6181 "non-resident attribute " 6182 "failed (error %d).", 6183 (unsigned long long)ni->mft_no, 6184 (unsigned)le32_to_cpu(ni->type), 6185 err); 6186 if (err != ENOMEM) { 6187 NVolSetErrors(vol); 6188 err = EIO; 6189 } 6190 } 6191 goto conv_err_out; 6192 } 6193 /* 6194 * To make space in the mft record we would like to try to make other 6195 * attributes non-resident if that would save space. 6196 * 6197 * FIXME: We cannot do this at present unless the attribute is the 6198 * attribute being resized as there could be an ntfs inode matching 6199 * this attribute in memory and it would become out of date with its 6200 * metadata if we touch its attribute record. 6201 * 6202 * FIXME: We do not need to do this if this is the attribute being 6203 * resized as we already tried to make the attribute non-resident and 6204 * it did not work or we would never have gotten here in the first 6205 * place. 6206 * 6207 * Thus we have to either move other attributes to extent mft records 6208 * thus making more space in the base mft record or we have to move the 6209 * attribute being resized to an extent mft record thus giving it more 6210 * space. In any case we need to have an attribute list attribute so 6211 * start by adding it if it does not yet exist. 6212 * 6213 * Before we start, we can check whether it is possible to fit the 6214 * attribute to be resized inside an mft record. If not then there is 6215 * no point in proceeding. 6216 * 6217 * This should never really happen as the attribute size should never 6218 * be allowed to grow so much and such requests should never be made by 6219 * the driver and if they are they should be caught by the call to 6220 * ntfs_attr_size_bounds_check(). 6221 */ 6222 if (arec_size > vol->mft_record_size - sizeof(MFT_RECORD)) { 6223 /* Only emit errors when the write will fail completely. */ 6224 lck_spin_lock(&ni->size_lock); 6225 old_alloc_size = ni->allocated_size; 6226 lck_spin_unlock(&ni->size_lock); 6227 if (start < 0 || start >= old_alloc_size) 6228 ntfs_error(vol->mp, "Cannot extend allocation of " 6229 "mft_no 0x%llx, attribute type 0x%x, " 6230 "because the attribute may not be " 6231 "non-resident and the requested size " 6232 "exceeds the maximum possible " 6233 "resident attribute record size.", 6234 (unsigned long long)ni->mft_no, 6235 (unsigned)le32_to_cpu(ni->type)); 6236 /* Use POSIX conformant write(2) error code. */ 6237 err = EFBIG; 6238 goto conv_err_out; 6239 } 6240 /* 6241 * The resident attribute can fit in an mft record. Now have to decide 6242 * whether to make other attributes non-resident/move other attributes 6243 * out of the mft record or whether to move the attribute record to be 6244 * resized out to a new mft record. 6245 * 6246 * TODO: We never call ntfs_attr_extend_allocation() for attributes 6247 * that cannot be non-resident thus we never get here thus we simply 6248 * panic() here to remind us that we need to implement this code if we 6249 * ever start calling this function for attributes that must remain 6250 * resident. 6251 */ 6252 panic("%s(): Attribute may not be non-resident.\n", __FUNCTION__); 6253do_non_resident_extend: 6254 if (!NInoNonResident(ni)) 6255 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 6256 if (new_alloc_size == alloc_size) { 6257 if (vcn) 6258 panic("%s(): vcn\n", __FUNCTION__); 6259 goto alloc_done; 6260 } 6261 /* 6262 * We are going to allocate starting at the old allocated size and are 6263 * going to allocate up to the new allocated size. 6264 */ 6265 alloc_start = alloc_size; 6266 rl = NULL; 6267 if (ni->rl.elements) { 6268 /* Seek to the end of the runlist. */ 6269 rl = &ni->rl.rl[ni->rl.elements - 1]; 6270 } 6271 /* 6272 * Cache the lowest VCN for later. Need to do it here to silence 6273 * compiler warning about possible use of uninitialiezd variable. 6274 */ 6275 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 6276 /* If this attribute extent is not mapped, map it now. */ 6277 if (alloc_size > 0 && (!ni->rl.elements || 6278 rl->lcn == LCN_RL_NOT_MAPPED || 6279 (rl->lcn == LCN_ENOENT && rl > ni->rl.rl && 6280 (rl-1)->lcn == LCN_RL_NOT_MAPPED))) { 6281 err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl); 6282 if (err || !ni->rl.elements) { 6283 if (!err) 6284 err = EIO; 6285 if (start < 0 || start >= alloc_size) 6286 ntfs_error(vol->mp, "Cannot extend allocation " 6287 "of mft_no 0x%llx, attribute " 6288 "type 0x%x, because the " 6289 "mapping of a runlist " 6290 "fragment failed (error %d).", 6291 (unsigned long long)ni->mft_no, 6292 (unsigned)le32_to_cpu(ni->type), 6293 err); 6294 if (err != ENOMEM) 6295 err = EIO; 6296 goto err_out; 6297 } 6298 /* Seek to the end of the runlist. */ 6299 rl = &ni->rl.rl[ni->rl.elements - 1]; 6300 } 6301 /* 6302 * We now know the runlist of the last extent is mapped and @rl is at 6303 * the end of the runlist. We want to begin extending the runlist. 6304 * 6305 * If the data starts after the end of the old allocation or no data 6306 * start is specified (@start < 0), this is a $DATA attribute and 6307 * sparse attributes are enabled on the volume and for this inode, then 6308 * create a sparse region between the old allocated size and the start 6309 * of the data or the new allocated size if no data start is specified. 6310 * Otherwise proceed with filling the whole space between the old 6311 * allocated size and the new allocated size with clusters. 6312 */ 6313 if ((start >= 0 && start <= alloc_size) || ni->type != AT_DATA || 6314 !NVolSparseEnabled(vol) || NInoSparseDisabled(ni)) { 6315 is_sparse = FALSE; 6316 goto skip_sparse; 6317 } 6318 /* 6319 * If @start is less than zero we create the sparse region from the old 6320 * allocated size to the new allocated size. Otherwise we end the 6321 * sparse region at @start and fill with real clusters between @start 6322 * and the new allocated size. 6323 */ 6324 alloc_end = start; 6325 if (start < 0) 6326 alloc_end = new_alloc_size; 6327 ntfs_debug("Adding hole starting at byte offset 0x%llx and finishing " 6328 "at byte offset 0x%llx.", 6329 (unsigned long long)alloc_start, 6330 (unsigned long long)alloc_end); 6331 /* 6332 * Allocate more memory if needed. We ensure there is space at least 6333 * for two new elements as this is what needs to happen when this is 6334 * the very first allocation, i.e. the file has zero clusters allocated 6335 * at the moment. 6336 */ 6337 if ((ni->rl.elements + 2) * sizeof(*rl) > ni->rl.alloc) { 6338 ntfs_rl_element *rl2; 6339 6340 rl2 = OSMalloc(ni->rl.alloc + NTFS_ALLOC_BLOCK, 6341 ntfs_malloc_tag); 6342 if (!rl2) { 6343 err = ENOMEM; 6344 goto err_out; 6345 } 6346 if (ni->rl.elements) { 6347 memcpy(rl2, ni->rl.rl, ni->rl.elements * sizeof(*rl2)); 6348 /* Seek to the end of the runlist. */ 6349 rl = &rl2[ni->rl.elements - 1]; 6350 } 6351 if (ni->rl.alloc) 6352 OSFree(ni->rl.rl, ni->rl.alloc, ntfs_malloc_tag); 6353 ni->rl.rl = rl2; 6354 ni->rl.alloc += NTFS_ALLOC_BLOCK; 6355 } 6356 if (ni->rl.elements) { 6357 /* Sanity check that this is the end element. */ 6358 if (rl->length || rl->lcn >= LCN_HOLE) 6359 panic("%s(): rl->length || rl->lcn >= LCN_HOLE)\n", 6360 __FUNCTION__); 6361 } else /* if (!ni->rl.elements) */ { 6362 /* 6363 * The runlist is empty thus we are now creating both the 6364 * sparse element and the end element. Thus need to set 6365 * everything up so we end up with two new elements rather than 6366 * one. 6367 * 6368 * Note we do not need to set up @rl->lcn and @rl->length as 6369 * they are both unconditionally overwritten below. 6370 */ 6371 if (alloc_size > 0) 6372 panic("%s(): alloc_size > 0\n", __FUNCTION__); 6373 rl = ni->rl.rl; 6374 rl->vcn = 0; 6375 ni->rl.elements = 1; 6376 } 6377 /* 6378 * If a last real element exists and it is sparse, need to extend it 6379 * instead of adding a new hole. 6380 * 6381 * Replace the terminator element with a sparse element and add a new 6382 * terminator. We know this is the end of the attribute thus we can 6383 * use LCN_ENOENT even if the old terminator was LCN_RL_NOT_MAPPED. 6384 */ 6385 if (rl->vcn != alloc_start >> vol->cluster_size_shift) 6386 panic("%s(): rl->vcn != alloc_start >> " 6387 "vol->cluster_size_shift\n", __FUNCTION__); 6388 if (ni->rl.elements > 1 && (rl - 1)->lcn == LCN_HOLE) 6389 rl--; 6390 else { 6391 rl->lcn = LCN_HOLE; 6392 rl[1].length = 0; 6393 ni->rl.elements++; 6394 } 6395 rl[1].vcn = alloc_end >> vol->cluster_size_shift; 6396 if (rl[1].vcn <= rl->vcn) 6397 panic("%s(): rl[1].vcn <= rl->vcn\n", __FUNCTION__); 6398 rl->length = rl[1].vcn - rl->vcn; 6399 rl[1].lcn = LCN_ENOENT; 6400 is_sparse = TRUE; 6401 /* 6402 * If the entire extension is sparse skip the allocation of real 6403 * clusters and proceed to updating the mapping pairs array. 6404 */ 6405 if (start < 0) { 6406 nr_allocated = 0; 6407 goto skip_real_alloc; 6408 } 6409 /* 6410 * We allocated part of the extension as a hole, now we are going to 6411 * allocate the remainder of the extension with real clusters. 6412 */ 6413 alloc_start = start; 6414skip_sparse: 6415 /* 6416 * We want to begin allocating clusters starting at the last allocated 6417 * cluster to reduce fragmentation. If there are no valid LCNs in the 6418 * attribute we let the cluster allocator choose the starting cluster. 6419 * 6420 * If the last LCN is a hole or similar seek back to last real LCN. 6421 */ 6422 if (ni->rl.elements) { 6423 while (rl->lcn < 0 && rl > ni->rl.rl) 6424 rl--; 6425 } 6426 // FIXME: Need to implement partial allocations so at least part of the 6427 // write can be performed when @start >= 0 (and hence @data_start >= 0). 6428 // This is needed for POSIX write(2) conformance. But do not allow 6429 // partial allocations for non-DATA attributes as partial metadata is 6430 // no use. The @start >= 0 check may be sufficient to exclude non-data 6431 // attributes... 6432 // FIXME: When we implement partial allocations we need to only allow 6433 // them to happen when @atomic is false. 6434 runlist.rl = NULL; 6435 runlist.alloc = runlist.elements = 0; 6436 nr_allocated = (new_alloc_size - alloc_start) >> 6437 vol->cluster_size_shift; 6438 err = ntfs_cluster_alloc(vol, alloc_start >> vol->cluster_size_shift, 6439 nr_allocated, (ni->rl.elements && (rl->lcn >= 0)) ? 6440 rl->lcn + rl->length : -1, DATA_ZONE, TRUE, &runlist); 6441 if (err) { 6442 if (start < 0 || start >= alloc_size) 6443 ntfs_error(vol->mp, "Cannot extend allocation of " 6444 "mft_no 0x%llx, attribute type 0x%x, " 6445 "because the allocation of clusters " 6446 "failed (error %d).", 6447 (unsigned long long)ni->mft_no, 6448 (unsigned)le32_to_cpu(ni->type), err); 6449 if (err != ENOMEM && err != ENOSPC) 6450 err = EIO; 6451 nr_allocated = 0; 6452 goto trunc_err_out; 6453 } 6454 err = ntfs_rl_merge(&ni->rl, &runlist); 6455 if (err) { 6456 if (start < 0 || start >= alloc_size) 6457 ntfs_error(vol->mp, "Cannot extend allocation of " 6458 "mft_no 0x%llx, attribute type 0x%x, " 6459 "because the runlist merge failed " 6460 "(error %d).", 6461 (unsigned long long)ni->mft_no, 6462 (unsigned)le32_to_cpu(ni->type), err); 6463 if (err != ENOMEM) 6464 err = EIO; 6465 err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, -1, 6466 NULL); 6467 if (err2) { 6468 ntfs_error(vol->mp, "Failed to release allocated " 6469 "cluster(s) in error code path (error " 6470 "%d). Run chkdsk to recover the lost " 6471 "space.", err2); 6472 NVolSetErrors(vol); 6473 } 6474 OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag); 6475 nr_allocated = 0; 6476 goto trunc_err_out; 6477 } 6478 ntfs_debug("Allocated 0x%llx clusters.", 6479 (unsigned long long)(new_alloc_size - alloc_start) >> 6480 vol->cluster_size_shift); 6481skip_real_alloc: 6482 /* Find the runlist element with which the attribute extent starts. */ 6483 rl = ntfs_rl_find_vcn_nolock(ni->rl.rl, lowest_vcn); 6484 if (!rl) 6485 panic("%s(): !rl\n", __FUNCTION__); 6486 if (!rl->length) 6487 panic("%s(): !rl->length\n", __FUNCTION__); 6488 if (rl->lcn < LCN_HOLE) 6489 panic("%s(): rl->lcn < LCN_HOLE\n", __FUNCTION__); 6490 mp_rebuilt = FALSE; 6491 attr_len = le32_to_cpu(a->length); 6492 /* Get the size for the new mapping pairs array for this extent. */ 6493 err = ntfs_get_size_for_mapping_pairs(vol, rl, lowest_vcn, -1, 6494 &mp_size); 6495 if (err) { 6496 if (start < 0 || start >= alloc_size) 6497 ntfs_error(vol->mp, "Cannot extend allocation of " 6498 "mft_no 0x%llx, attribute type 0x%x, " 6499 "because determining the size for the " 6500 "mapping pairs failed (error %d).", 6501 (unsigned long long)ni->mft_no, 6502 (unsigned)le32_to_cpu(ni->type), err); 6503 err = EIO; 6504 goto undo_alloc; 6505 } 6506 mp_ofs = le16_to_cpu(a->mapping_pairs_offset); 6507retry_attr_rec_resize: 6508 /* Extend the attribute record to fit the bigger mapping pairs array. */ 6509 err = ntfs_attr_record_resize(m, a, mp_size + mp_ofs); 6510 if (!err) 6511 goto build_mpa; 6512 if (err != ENOSPC) 6513 panic("%s(): err != ENOSPC\n", __FUNCTION__); 6514 /* 6515 * Not enough space in the mft record. If this is an index related 6516 * extension, check if the index root attribute is in the same mft 6517 * record as the attribute being extended and if it is and it is not 6518 * empty move its entries into an index allocation block. Note we do 6519 * not check whether that actually creates enough space because how 6520 * much space is needed exactly is very hard to determine in advance 6521 * (due to potential need for associated attribute list attribute 6522 * extensions) and also because even if it does not create enough space 6523 * it will still help and save work later on when working for example 6524 * on the attribute list attribute. 6525 */ 6526 if (ictx) { 6527 long delta; 6528 INDEX_ROOT *ir; 6529 INDEX_HEADER *ih; 6530 INDEX_ENTRY *ie, *first_ie; 6531 ntfs_index_context *root_ictx; 6532 ntfs_attr_search_ctx root_actx; 6533 6534 if (ni->type != AT_INDEX_ALLOCATION && ni->type != AT_BITMAP) 6535 panic("%s(): ni->type != AT_INDEX_ALLOCATION && " 6536 "ni->type != AT_BITMAP\n", 6537 __FUNCTION__); 6538 ntfs_attr_search_ctx_init(&root_actx, actx->ni, m); 6539 err = ntfs_attr_find_in_mft_record(AT_INDEX_ROOT, ni->name, 6540 ni->name_len, NULL, 0, &root_actx); 6541 if (err) { 6542 if (err != ENOENT) { 6543 ntfs_error(vol->mp, "Failed to find index " 6544 "root attribute in mft_no " 6545 "0x%llx (error %d). Inode is " 6546 "corrupt. Run chkdsk.", 6547 (unsigned long long)ni->mft_no, 6548 err); 6549 NVolSetErrors(vol); 6550 } 6551 /* 6552 * The index root is in a different mft record so we 6553 * cannot gain anything by moving out its entries. Set 6554 * @ictx to NULL so we do not waste our time trying 6555 * again. 6556 */ 6557 ictx = NULL; 6558 goto ictx_done; 6559 } 6560 /* 6561 * We found the index root in the same mft record as the 6562 * attribute (extent) to be extended. Check whether it is 6563 * empty or not. 6564 */ 6565 ir = (INDEX_ROOT*)((u8*)root_actx.a + 6566 le16_to_cpu(root_actx.a->value_offset)); 6567 ih = &ir->index; 6568 first_ie = ie = (INDEX_ENTRY*)((u8*)ih + 6569 le32_to_cpu(ih->entries_offset)); 6570 while (!(ie->flags & INDEX_ENTRY_END)) 6571 ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length)); 6572 /* 6573 * If there are no entries other than the end entry we cannot 6574 * gain anything by moving out the entries from the index root. 6575 * Set @ictx to NULL so we do not waste our time trying again. 6576 */ 6577 if (ie == first_ie) { 6578 ictx = NULL; 6579 goto ictx_done; 6580 } 6581 /* 6582 * We cannot have gotten this far if the current index context 6583 * is locked and/or it is the index root. 6584 * 6585 * Also, we need to undo what we have done so far as the 6586 * metadata is currently in an inconsistent state and things 6587 * will get really confused when moving the entries from the 6588 * index root to the index allocation block and the same 6589 * attribute we are extending at the moment is extended. 6590 * Another reason is that the mft record will be dropped by the 6591 * move thus we would expose invalid metadata to concurrent 6592 * threads which is a Bad Thing(TM). 6593 * 6594 * For the same reasons we also need to drop the runlist lock 6595 * we are holding. 6596 */ 6597 if (ictx->is_locked) 6598 panic("%s(): ictx->is_locked\n", __FUNCTION__); 6599 if (ictx->is_root) 6600 panic("%s(): ictx->is_root\n", __FUNCTION__); 6601 ll = alloc_size >> vol->cluster_size_shift; 6602 err = ntfs_cluster_free(ni, ll, -1, actx, NULL); 6603 if (err) { 6604 ntfs_error(vol->mp, "Failed to release allocated " 6605 "cluster(s) (error %d). Run chkdsk " 6606 "to recover the lost cluster(s).", err); 6607 NVolSetErrors(vol); 6608 } 6609 m = actx->m; 6610 a = actx->a; 6611 /* 6612 * If the runlist truncation fails and/or the search context is 6613 * no longer valid, we cannot resize the attribute record or 6614 * build the mapping pairs array thus we mark the volume dirty 6615 * and tell the user to run chkdsk. 6616 */ 6617 err = ntfs_rl_truncate_nolock(vol, &ni->rl, ll); 6618 if (err || actx->is_error) { 6619 if (actx->is_error) 6620 err = actx->error; 6621 ntfs_error(vol->mp, "Failed to %s (error %d). Run " 6622 "chkdsk.", actx->is_error ? "restore " 6623 "attribute search context" : 6624 "truncate attribute runlist", err); 6625 NVolSetErrors(vol); 6626 goto err_out; 6627 } 6628 lck_rw_unlock_exclusive(&ni->rl.lock); 6629 /* Find the index root by walking up the tree path. */ 6630 root_ictx = ictx; 6631 while (!root_ictx->is_root) { 6632 root_ictx = root_ictx->up; 6633 /* 6634 * If we go all the way round to the beginning without 6635 * finding the root something has gone badly wrong. 6636 */ 6637 if (root_ictx == ictx) 6638 panic("%s(): root_ictx == ictx\n", 6639 __FUNCTION__); 6640 } 6641 /* 6642 * We need a proper deallocatable attribute search context thus 6643 * switch the one pointing to the attribute to be resized to 6644 * point to the index root. FIXME: We are not updating 6645 * @actx->al_entry as this is not going to be touched at all. 6646 * Having said that set it to NULL just in case. 6647 */ 6648 actx->a = root_actx.a; 6649 actx->al_entry = NULL; 6650 /* 6651 * Lock the index root node. We already have the index root 6652 * attribute thus only need to do the revalidation part of 6653 * re-locking. 6654 */ 6655 root_ictx->is_locked = 1; 6656 root_ictx->actx = actx; 6657 root_ictx->bytes_free = le32_to_cpu(m->bytes_allocated) - 6658 le32_to_cpu(m->bytes_in_use); 6659 root_ictx->ir = ir; 6660 delta = (u8*)ih - (u8*)root_ictx->index; 6661 if (delta) { 6662 INDEX_ENTRY **entries; 6663 unsigned u; 6664 6665 root_ictx->index = ih; 6666 root_ictx->entry = (INDEX_ENTRY*)( 6667 (u8*)root_ictx->entry + delta); 6668 entries = root_ictx->entries; 6669 for (u = 0; u < root_ictx->nr_entries; u++) 6670 entries[u] = (INDEX_ENTRY*)((u8*) 6671 entries[u] + delta); 6672 } 6673 /* 6674 * Move the index root entries to an index allocation block. 6675 * 6676 * Note we do not need to worry about this causing infinite 6677 * recursion in the case that we were called from 6678 * ntfs_index_block_alloc() which was called from 6679 * ntfs_index_move_root_to_allocation_block() because the 6680 * latter will have emptied the index root before calling 6681 * ntfs_index_block_alloc() thus we will bail out above when 6682 * checking whether the index root is empty the second time 6683 * round and the recursion will stop there. This is a very 6684 * seldom occurence thus there is no point in special casing it 6685 * in the code in a more efficient but more complicated way. 6686 * 6687 * A complication is that ntfs_attr_resize() may have been 6688 * called from ntfs_index_block_alloc() and in this case when 6689 * we call ntfs_index_move_root_to_allocation_block() it will 6690 * call ntfs_index_block_alloc() again which will cause a 6691 * deadlock (or with lock debugging enabled panic()) because 6692 * ntfs_index_block_alloc() takes the bitmap inode lock for 6693 * writing. To avoid this ntfs_index_block_alloc() sets 6694 * @ictx->bmp_is_locked and we need to set 6695 * @root_ictx->bmp_is_locoked to the same value so that when 6696 * ntfs_index_move_root_to_allocation_block() calls 6697 * ntfs_index_block_alloc() the latter will know not to take 6698 * the bitmap inode lock again. 6699 */ 6700 root_ictx->bmp_is_locked = ictx->bmp_is_locked; 6701 err = ntfs_index_move_root_to_allocation_block(root_ictx); 6702 if (root_ictx != ictx) 6703 root_ictx->bmp_is_locked = 0; 6704 if (err) { 6705 ntfs_error(vol->mp, "Failed to move index root to " 6706 "index allocation block (error %d).", 6707 err); 6708 if (root_ictx->is_locked) 6709 ntfs_index_ctx_unlock(root_ictx); 6710 /* 6711 * This is a disaster as it means the index context is 6712 * no longer valid thus we have to bail out all the way. 6713 */ 6714 return err; 6715 } 6716 /* Unlock the newly created index block. */ 6717 if (root_ictx->is_root) 6718 panic("%s(): root_ictx->is_root\n", __FUNCTION__); 6719 if (!root_ictx->is_locked) 6720 panic("%s(): !root_ictx->is_locked\n", __FUNCTION__); 6721 ntfs_index_ctx_unlock(root_ictx); 6722 /* 6723 * We are done. The index root is now empty thus the mft 6724 * record should now have enough space. Because we undid 6725 * everything and dropped the runlist lock as well as the mft 6726 * record when moving the index root entries into the index 6727 * allocation block we need to restart the attribute allocation 6728 * extension again. 6729 * 6730 * But first we set @ictx to NULL so we do not get here again 6731 * in the case that there still is not enough free space. This 6732 * is not a disaster as we can just carry on doing other 6733 * rearrangements to free up enough space in the mft record. 6734 */ 6735 ictx = NULL; 6736 goto retry_extend; 6737 } 6738ictx_done: 6739 /* 6740 * There is not enough space in the mft record. 6741 * 6742 * We need to add an attribute list attribute if it is not already 6743 * present. 6744 */ 6745 if (!NInoAttrList(base_ni)) { 6746 err = ntfs_attr_list_add(base_ni, base_m, actx); 6747 if (err || actx->is_error) { 6748 if (!err) 6749 err = actx->error; 6750 ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error " 6751 "%d).", actx->is_error ? 6752 "remap extent mft record of" : 6753 "add attribute list attribute to", 6754 (unsigned long long)base_ni->mft_no, 6755 err); 6756 goto undo; 6757 } 6758 /* 6759 * The attribute location will have changed so update it from 6760 * the search context. 6761 */ 6762 m = actx->m; 6763 a = actx->a; 6764 /* 6765 * Retry the original attribute record resize as we may now 6766 * have enough space to create the complete remaining mapping 6767 * pairs array in the moved attribute record. 6768 * 6769 * This can for example happen when the attribute was moved out 6770 * to an extent mft record which has much more free space than 6771 * the base mft record had. 6772 */ 6773 goto retry_attr_rec_resize; 6774 } 6775 /* 6776 * If the attribute record is in an extent mft record we know the 6777 * attribute can be outside the base mft record (as it already is) thus 6778 * we can simply resize the attribute to the maximum size possible and 6779 * then proceed to fill it with mapping pairs data until it is full, 6780 * then start a new extent in a new mft record, etc, until all runlist 6781 * elements have been saved in mapping pairs arrays. 6782 */ 6783 if (m != base_m) { 6784 ATTR_LIST_ENTRY *al_entry; 6785 unsigned new_al_size; 6786 6787 /* 6788 * If the attribute record is not the only one in the extent 6789 * mft record then move it to a new extent mft record as that 6790 * will allow the attribute record to grow larger thus reducing 6791 * the total number of extent attribute records needed to a 6792 * minimum. 6793 */ 6794 if (!ntfs_attr_record_is_only_one(m, a)) { 6795move_attr: 6796 lck_rw_lock_shared(&base_ni->attr_list_rl.lock); 6797 err = ntfs_attr_record_move(actx); 6798 lck_rw_unlock_shared(&base_ni->attr_list_rl.lock); 6799 if (err) { 6800 if (start < 0 || start >= alloc_size) 6801 ntfs_error(vol->mp, "Failed to move " 6802 "attribute extent " 6803 "from mft record " 6804 "0x%llx to an extent " 6805 "mft record (error " 6806 "%d).", 6807 (unsigned long long) 6808 actx->ni->mft_no, err); 6809 goto undo; 6810 } 6811 /* 6812 * The attribute location will have changed so update 6813 * it from the search context. 6814 */ 6815 m = actx->m; 6816 a = actx->a; 6817 /* 6818 * Retry the original attribute record resize as we may 6819 * now have enough space to create the complete 6820 * remaining mapping pairs array in the moved attribute 6821 * record. 6822 */ 6823 goto retry_attr_rec_resize; 6824 } 6825 max_size = (le32_to_cpu(m->bytes_allocated) - 6826 le32_to_cpu(m->bytes_in_use)) & ~7; 6827add_mapping_pairs_to_attr: 6828 max_size += attr_len - mp_ofs; 6829 err = ntfs_attr_record_resize(m, a, max_size + mp_ofs); 6830 /* 6831 * We worked out the exact size we can extend to so the resize 6832 * cannot fail. 6833 */ 6834 if (err) 6835 panic("%s(): err (ntfs_attr_record_resize())\n", 6836 __FUNCTION__); 6837 /* 6838 * If the new size and the old size are the same we cannot add 6839 * anything to this extent so do not bother rebuilding the 6840 * mapping pairs array and go straight to creating the next 6841 * extent. 6842 */ 6843 if (attr_len == le32_to_cpu(a->length)) { 6844start_new_attr: 6845 stop_vcn = sle64_to_cpu(a->highest_vcn) + 1; 6846 goto skip_mpa_build; 6847 } 6848build_mpa: 6849 mp_rebuilt = TRUE; 6850 /* Generate the mapping pairs directly into the attribute. */ 6851 err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, 6852 le32_to_cpu(a->length) - mp_ofs, rl, 6853 lowest_vcn, -1, &stop_vcn); 6854 if (err && err != ENOSPC) { 6855 if (start < 0 || start >= alloc_size) 6856 ntfs_error(vol->mp, "Cannot extend allocation " 6857 "of mft_no 0x%llx, attribute " 6858 "type 0x%x, because building " 6859 "the mapping pairs array " 6860 "failed (error %d).", 6861 (unsigned long long)ni->mft_no, 6862 (unsigned)le32_to_cpu(ni->type), 6863 err); 6864 err = EIO; 6865 /* 6866 * Need to set @a->highest_vcn to enable correct error 6867 * recovery. 6868 */ 6869 if (!is_first) 6870 a->highest_vcn = cpu_to_sle64(sle64_to_cpu( 6871 a->lowest_vcn) - 1); 6872 goto undo; 6873 } 6874 /* Update the highest_vcn. */ 6875 a->highest_vcn = cpu_to_sle64(stop_vcn - 1); 6876 /* 6877 * We have finished with this extent so update the current 6878 * allocated size and attribute length to reflect this. We 6879 * need to do this to enable error handling and recovery. 6880 */ 6881 alloc_size = stop_vcn << vol->cluster_size_shift; 6882 attr_len = le32_to_cpu(a->length); 6883 /* 6884 * If the mapping pairs build succeeded, i.e. the current 6885 * attribute extent contains the end of the runlist, we are 6886 * done and only need to update the attribute sizes in the base 6887 * attribute extent so go and do that. 6888 */ 6889 if (!err) 6890 goto update_sizes; 6891 /* 6892 * We have finished with this extent mft record thus we release 6893 * it after ensuring the changes make it to disk later. We do 6894 * this by hand as we want to keep the current attribute list 6895 * attribute entry as we will be inserting the entry for the 6896 * next attribute extent immediately after it. 6897 */ 6898 NInoSetMrecNeedsDirtying(actx->ni); 6899skip_mpa_build: 6900 /* Get the size of the remaining mapping pairs array. */ 6901 rl = ntfs_rl_find_vcn_nolock(rl, stop_vcn); 6902 if (!rl) 6903 panic("%s(): !rl (skip_mpa_build)\n", __FUNCTION__); 6904 if (!rl->length) 6905 panic("%s(): !rl->length (skip_mpa_build)\n", 6906 __FUNCTION__); 6907 if (rl->lcn < LCN_HOLE) 6908 panic("%s(): rl->lcn < LCN_HOLE (skip_mpa_build)\n", 6909 __FUNCTION__); 6910 err = ntfs_get_size_for_mapping_pairs(vol, rl, stop_vcn, -1, 6911 &mp_size); 6912 if (err) { 6913 if (start < 0 || start >= alloc_size) 6914 ntfs_error(vol->mp, "Cannot complete " 6915 "extension of allocation of " 6916 "mft_no 0x%llx, attribute type " 6917 "0x%x, because determining " 6918 "the size for the mapping " 6919 "pairs failed (error %d).", 6920 (unsigned long long)ni->mft_no, 6921 (unsigned)le32_to_cpu(ni->type), 6922 err); 6923 err = EIO; 6924 goto undo; 6925 } 6926 /* We only release extent mft records. */ 6927 if (actx->ni != base_ni) 6928 ntfs_extent_mft_record_unmap(actx->ni); 6929 /* 6930 * We now need to allocate a new extent mft record, attach it 6931 * to the base ntfs inode and set up the search context to 6932 * point to it, then create a new attribute extent in it of 6933 * either maximum size or the left to do mapping pairs size and 6934 * then build the mapping pairs array in it. Finally, add an 6935 * attribute list attribute entry for the new attribute extent. 6936 */ 6937 err = ntfs_mft_record_alloc(vol, NULL, NULL, base_ni, 6938 &actx->ni, &m, &a); 6939 if (err) { 6940 /* 6941 * Make it safe to release the attribute search 6942 * context. 6943 */ 6944 actx->ni = base_ni; 6945 if (start < 0 || start >= alloc_size) 6946 ntfs_error(vol->mp, "Cannot extend allocation " 6947 "of mft_no 0x%llx, attribute " 6948 "type 0x%x, because " 6949 "allocating a new extent mft " 6950 "record failed (error %d),", 6951 (unsigned long long)ni->mft_no, 6952 (unsigned)le32_to_cpu(ni->type), 6953 err); 6954 goto undo; 6955 } 6956 actx->m = m; 6957 actx->a = a; 6958 /* We are no longer working on the extent we started with. */ 6959 is_first = FALSE; 6960 /* 6961 * Get the size needed for the remaining mapping pairs array 6962 * and make space for an attribute large enough to hold it. If 6963 * there is not enough space to do so make the maximum amount 6964 * of space available. 6965 */ 6966 lowest_vcn = stop_vcn; 6967 /* 6968 * Calculate the offset into the new attribute at which the 6969 * mapping pairs array begins. The mapping pairs array is 6970 * placed after the name aligned to an 8-byte boundary which in 6971 * turn is placed immediately after the non-resident attribute 6972 * record itself. 6973 */ 6974 mp_ofs = offsetof(ATTR_RECORD, compressed_size) + ((name_size + 6975 7) & ~7); 6976 err = ntfs_attr_record_make_space(m, a, mp_ofs + mp_size); 6977 if (err) { 6978 if (err != ENOSPC) 6979 panic("%s(): err != ENOSPC\n", __FUNCTION__); 6980 max_size = (le32_to_cpu(m->bytes_allocated) - 6981 le32_to_cpu(m->bytes_in_use)) & ~7; 6982 if (max_size < mp_ofs) 6983 panic("%s(): max_size < mp_ofs\n", 6984 __FUNCTION__); 6985 err = ntfs_attr_record_make_space(m, a, max_size); 6986 /* 6987 * We worked out the exact maximum size so the call 6988 * cannot fail. 6989 */ 6990 if (err) 6991 panic("%s(): err (" 6992 "ntfs_attr_record_make_space()" 6993 ")\n", __FUNCTION__); 6994 } 6995 /* 6996 * Now setup the new attribute record. The entire attribute 6997 * has been zeroed and the length of the attribute record has 6998 * been set. 6999 * 7000 * Before we proceed with setting up the attribute, add an 7001 * attribute list attribute entry for the created attribute 7002 * extent. 7003 */ 7004 al_entry = actx->al_entry = (ATTR_LIST_ENTRY*)( 7005 (u8*)actx->al_entry + 7006 le16_to_cpu(actx->al_entry->length)); 7007 al_entry_len = (offsetof(ATTR_LIST_ENTRY, name) + name_size + 7008 7) & ~7; 7009 new_al_size = base_ni->attr_list_size + al_entry_len; 7010 /* Out of bounds checks. */ 7011 if ((u8*)al_entry < base_ni->attr_list || (u8*)al_entry > 7012 base_ni->attr_list + new_al_size || 7013 (u8*)al_entry + al_entry_len > 7014 base_ni->attr_list + new_al_size) { 7015 /* Inode is corrupt. */ 7016 if (start < 0 || start >= alloc_size) 7017 ntfs_error(vol->mp, "Cannot complete " 7018 "extension of allocation of " 7019 "mft_no 0x%llx, attribute type " 7020 "0x%x, because the inode is " 7021 "corrupt. Run chkdsk.", 7022 (unsigned long long)ni->mft_no, 7023 (unsigned) 7024 le32_to_cpu(ni->type)); 7025 err = EIO; 7026 goto free_undo; 7027 } 7028 err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST, 7029 new_al_size); 7030 if (err) { 7031 if (err == ERANGE) { 7032 if (start < 0 || start >= alloc_size) 7033 ntfs_error(vol->mp, "Cannot complete " 7034 "extension of " 7035 "allocation of mft_no " 7036 "0x%llx, attribute " 7037 "type 0x%x, because " 7038 "the attribute list " 7039 "attribute would " 7040 "become to large. " 7041 "You need to " 7042 "defragment your " 7043 "volume and then try " 7044 "again.", 7045 (unsigned long long) 7046 ni->mft_no, (unsigned) 7047 le32_to_cpu(ni->type)); 7048 err = ENOSPC; 7049 } else { 7050 if (start < 0 || start >= alloc_size) 7051 ntfs_error(vol->mp, "Cannot complete " 7052 "extension of " 7053 "allocation of mft_no " 7054 "0x%llx, attribute " 7055 "type 0x%x, because " 7056 "the attribute list " 7057 "attribute is unknown " 7058 "on the volume. The " 7059 "volume is corrupt. " 7060 "Run chkdsk.", 7061 (unsigned long long) 7062 ni->mft_no, (unsigned) 7063 le32_to_cpu(ni->type)); 7064 NVolSetErrors(vol); 7065 err = EIO; 7066 } 7067 goto free_undo; 7068 } 7069 /* 7070 * Reallocate the memory buffer if needed and create space for 7071 * the new entry. 7072 */ 7073 new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) & 7074 ~(NTFS_ALLOC_BLOCK - 1); 7075 if (new_al_alloc > base_ni->attr_list_alloc) { 7076 u8 *tmp, *al, *al_end; 7077 unsigned al_entry_ofs; 7078 7079 tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag); 7080 if (!tmp) { 7081 if (start < 0 || start >= alloc_size) 7082 ntfs_error(vol->mp, "Cannot complete " 7083 "extension of " 7084 "allocation of mft_no " 7085 "0x%llx, attribute " 7086 "type 0x%x, because " 7087 "there is not enough " 7088 "memory to extend " 7089 "the attribute list " 7090 "attribute.", 7091 (unsigned long long) 7092 ni->mft_no, (unsigned) 7093 le32_to_cpu(ni->type)); 7094 err = ENOMEM; 7095 goto free_undo; 7096 } 7097 al = base_ni->attr_list; 7098 al_entry_ofs = (u8*)al_entry - al; 7099 al_end = al + base_ni->attr_list_size; 7100 memcpy(tmp, al, al_entry_ofs); 7101 if ((u8*)al_entry < al_end) 7102 memcpy(tmp + al_entry_ofs + al_entry_len, 7103 al + al_entry_ofs, 7104 base_ni->attr_list_size - 7105 al_entry_ofs); 7106 al_entry = actx->al_entry = (ATTR_LIST_ENTRY*)(tmp + 7107 al_entry_ofs); 7108 OSFree(base_ni->attr_list, base_ni->attr_list_alloc, 7109 ntfs_malloc_tag); 7110 base_ni->attr_list_alloc = new_al_alloc; 7111 base_ni->attr_list = tmp; 7112 } else if ((u8*)al_entry < base_ni->attr_list + 7113 base_ni->attr_list_size) 7114 memmove((u8*)al_entry + al_entry_len, al_entry, 7115 base_ni->attr_list_size - 7116 ((u8*)al_entry - base_ni->attr_list)); 7117 base_ni->attr_list_size = new_al_size; 7118 /* Set up the attribute extent and the attribute list entry. */ 7119 al_entry->type = a->type = ni->type; 7120 al_entry->length = cpu_to_le16(al_entry_len); 7121 a->non_resident = 1; 7122 al_entry->name_length = a->name_length = ni->name_len; 7123 a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, 7124 compressed_size)); 7125 al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name); 7126 al_entry->instance = a->instance = m->next_attr_instance; 7127 /* 7128 * Increment the next attribute instance number in the mft 7129 * record as we consumed the old one. 7130 */ 7131 m->next_attr_instance = cpu_to_le16((le16_to_cpu( 7132 m->next_attr_instance) + 1) & 0xffff); 7133 al_entry->lowest_vcn = a->lowest_vcn = 7134 cpu_to_sle64(lowest_vcn); 7135 al_entry->mft_reference = MK_LE_MREF(actx->ni->mft_no, 7136 actx->ni->seq_no); 7137 a->mapping_pairs_offset = cpu_to_le16(mp_ofs); 7138 /* Copy the attribute name into place. */ 7139 if (name_size) { 7140 memcpy((u8*)a + offsetof(ATTR_RECORD, compressed_size), 7141 ni->name, name_size); 7142 memcpy(&al_entry->name, ni->name, name_size); 7143 } 7144 /* For tidyness, zero out the unused space. */ 7145 if (al_entry_len > offsetof(ATTR_LIST_ENTRY, name) + name_size) 7146 memset((u8*)al_entry + 7147 offsetof(ATTR_LIST_ENTRY, name) + 7148 name_size, 0, al_entry_len - 7149 (offsetof(ATTR_LIST_ENTRY, name) + 7150 name_size)); 7151 /* 7152 * Need to set @a->highest_vcn to enable correct error 7153 * recovery. 7154 */ 7155 a->highest_vcn = cpu_to_sle64(lowest_vcn - 1); 7156 /* 7157 * Extend the attribute list attribute and copy in the modified 7158 * value from the cache. 7159 */ 7160 err = ntfs_attr_list_sync_extend(base_ni, base_m, 7161 (u8*)al_entry - base_ni->attr_list, actx); 7162 if (err || actx->is_error) { 7163 /* 7164 * If @actx->is_error indicates error this is fatal as 7165 * we cannot build the mapping pairs array into it as 7166 * it is not mapped. 7167 * 7168 * However, we may still be able to recover from this 7169 * situation by freeing the extent mft record and thus 7170 * deleting the attribute record. This only works when 7171 * this is the only attribute record in the mft record 7172 * and when we just created this extent attribute 7173 * record. We can easily determine if this is the only 7174 * attribute in the mft record by scanning through the 7175 * cached attribute list attribute. 7176 */ 7177 if (!err) 7178 err = actx->error; 7179 ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error " 7180 "%d).", actx->is_error ? 7181 "remap extent mft record of" : 7182 "extend and sync attribute list " 7183 "attribute to", 7184 (unsigned long long)base_ni->mft_no, 7185 err); 7186 goto undo; 7187 } 7188 /* 7189 * Finally, proceed to building the mapping pairs array into 7190 * the attribute record. 7191 */ 7192 goto build_mpa; 7193 } 7194 /* 7195 * We now know that the attribute is in the base mft record. 7196 * 7197 * For performance reasons we want to keep the first extent of the 7198 * unnamed $DATA attribute of files and the $I30 named 7199 * $INDEX_ALLOCATION and $BITMAP attributes of directories in the base 7200 * mft record even if this means that the first extent will be nearly 7201 * empty. This ensures that loading an inode is faster and thus stat() 7202 * and getattrlist() will be faster. 7203 * 7204 * If the attribute is one of the above described ones then we keep the 7205 * existing extent as it is (unless it is actually empty in which case 7206 * we add at least some mapping data to it) and start a new extent in a 7207 * new extent mft record. 7208 * 7209 * In all other cases we move the attribute to a new extent mft record 7210 * and retry the attribute resize as it may now fit. 7211 */ 7212 if (a->lowest_vcn || (!S_ISDIR(base_ni->mode) && 7213 (ni->type != AT_DATA || ni->name_len)) || 7214 (S_ISDIR(base_ni->mode) && 7215 (!ni->name_len || ni->name != I30))) 7216 goto move_attr; 7217 max_size = (le32_to_cpu(m->bytes_allocated) - 7218 le32_to_cpu(m->bytes_in_use)) & ~7; 7219 al_entry_len = le16_to_cpu(actx->al_entry->length); 7220 /* 7221 * A single mapping pair can be up to 17 bytes in size so we need at 7222 * least that much free space. But we need to align the attribute 7223 * length to 8 bytes thus the 17 becomes 24. 7224 * 7225 * Further, we will be adding at least one attribute list attribute 7226 * entry thus we want to definitely have space for that to happen. If 7227 * the attribute list attribute is non-resident we may have to add 7228 * another mapping pair which would as above be 24 bytes or if it is 7229 * resident we would have to add an actual attribute list entry which 7230 * would be the same size as the one for the current attribute record. 7231 * As this is guaranteed to be larger than 24 bytes we use the larger 7232 * size as the minimum to leave free. 7233 * 7234 * Thus the minimum of free space we require before adding any mapping 7235 * pairs to the current attribute record is 24 + @al_entry_len. 7236 * 7237 * There may be a lot of free space so it would be silly to only use 7238 * the minimum. On one hand we would like to consume as much of the 7239 * free space as possible to keep the number of attribute extents to a 7240 * minimum. On the other hand we would like to keep enough spare space 7241 * for four attribute list attribute entries (this is an arbitrary 7242 * choice) to simplify future expansion of the attribute list 7243 * attribute. 7244 */ 7245 if (!*((u8*)a + mp_ofs)) { 7246 /* 7247 * There are no mapping pairs in this attribute record thus we 7248 * either have to add some mapping pairs or if the available 7249 * space is less than our minimum we have to move the attribute 7250 * record out into a new extent mft record. 7251 */ 7252 if (max_size < 24 + al_entry_len) 7253 goto move_attr; 7254 /* 7255 * We have our minimum amount of space and possibly a lot more. 7256 * If we have less than our desired spare space use our minimum 7257 * and if we have more than that use everything except the 7258 * desired spare space. 7259 */ 7260 if (max_size < 24 + (4 * al_entry_len)) 7261 max_size = 24; 7262 else 7263 max_size -= 4 * al_entry_len; 7264 } else { 7265 /* 7266 * Check if it would be sensible to add at least some mapping 7267 * pairs to the current attribute record. 7268 * 7269 * If the amount of free space is less than the desired spare 7270 * space we leave this attribute record be and start a new 7271 * extent and if we have more than that use everything except 7272 * the desired spare space. 7273 */ 7274 if (max_size < 24 + (4 * al_entry_len)) 7275 goto start_new_attr; 7276 max_size -= 4 * al_entry_len; 7277 } 7278 /* 7279 * We want to add some mapping pairs to the current attribute before 7280 * starting the next one. 7281 * 7282 * @max_size is already set to the number of bytes to consume from the 7283 * free space in the mft record and it is guaranteed that the mft 7284 * record has at least that much free space. 7285 */ 7286 goto add_mapping_pairs_to_attr; 7287update_sizes: 7288 /* 7289 * We now have extended the allocated size of the attribute. Reflect 7290 * this in the ntfs_inode structure and the attribute record. 7291 */ 7292 if (a->lowest_vcn) { 7293 /* 7294 * We are not in the first attribute extent, switch to it, but 7295 * first ensure the changes will make it to disk later. 7296 */ 7297 NInoSetMrecNeedsDirtying(actx->ni); 7298 ntfs_attr_search_ctx_reinit(actx); 7299 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, 7300 NULL, 0, actx); 7301 if (err) { 7302 if (start < 0 || start >= alloc_size) 7303 ntfs_error(vol->mp, "Cannot complete " 7304 "extension of allocation of " 7305 "mft_no 0x%llx, attribute type " 7306 "0x%x, because lookup of " 7307 "first attribute extent " 7308 "failed (error %d).", 7309 (unsigned long long) 7310 base_ni->mft_no, (unsigned) 7311 le32_to_cpu(ni->type), err); 7312 if (err == ENOENT) 7313 err = EIO; 7314 goto undo_do_trunc; 7315 } 7316 /* @m is not used any more so no need to set it. */ 7317 a = actx->a; 7318 } 7319 /* 7320 * If we created a hole and the attribute is not marked as sparse, mark 7321 * it as sparse now. 7322 */ 7323 if (is_sparse && !NInoSparse(ni)) { 7324 err = ntfs_attr_sparse_set(base_ni, ni, actx); 7325 if (err) { 7326 ntfs_error(vol->mp, "Failed to set the attribute to " 7327 "be sparse (error %d).", err); 7328 goto undo_do_trunc; 7329 } 7330 /* 7331 * The attribute may have been moved to make space for the 7332 * compressed size so @a is now invalid. 7333 */ 7334 a = actx->a; 7335 } 7336 lck_spin_lock(&ni->size_lock); 7337 ni->allocated_size = new_alloc_size; 7338 a->allocated_size = cpu_to_sle64(new_alloc_size); 7339 if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION && 7340 NInoCompressed(ni))) { 7341 ni->compressed_size += nr_allocated << vol->cluster_size_shift; 7342 a->compressed_size = cpu_to_sle64(ni->compressed_size); 7343 } 7344 lck_spin_unlock(&ni->size_lock); 7345 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 7346 lck_spin_lock(&base_ni->size_lock); 7347 base_ni->allocated_size = new_alloc_size; 7348 lck_spin_unlock(&base_ni->size_lock); 7349 } 7350alloc_done: 7351 if (new_data_size > sle64_to_cpu(a->data_size)) { 7352 if (!ubc_setsize(ni->vn, new_data_size)) { 7353 ntfs_error(vol->mp, "Failed to set size in UBC."); 7354 /* 7355 * This can only happen if a previous resize failed and 7356 * the UBC size was already out of date in which case 7357 * we can just leave it out of date and continue to 7358 * completion returning an error. FIXME: We could roll 7359 * back the changes to the metadata at some point but 7360 * it does not seem worth it at the moment given that 7361 * the error can only happen if there already was an 7362 * error thus it is very unlikely. 7363 */ 7364 err = EIO; 7365 } 7366 lck_spin_lock(&ni->size_lock); 7367 ni->data_size = new_data_size; 7368 a->data_size = cpu_to_sle64(new_data_size); 7369 lck_spin_unlock(&ni->size_lock); 7370 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 7371 lck_spin_lock(&base_ni->size_lock); 7372 base_ni->data_size = new_data_size; 7373 lck_spin_unlock(&base_ni->size_lock); 7374 } 7375 } 7376dirty_done: 7377 /* Ensure the changes make it to disk. */ 7378 NInoSetMrecNeedsDirtying(actx->ni); 7379 /* 7380 * We have modified the size. If the ntfs inode is the base inode, 7381 * cause the sizes to be written to all the directory index entries 7382 * pointing to the base inode when the inode is written to disk. Do 7383 * not do this for directories as they have both sizes set to zero in 7384 * their index entries. 7385 */ 7386 if (ni == base_ni && !S_ISDIR(ni->mode)) 7387 NInoSetDirtySizes(ni); 7388done: 7389 ntfs_attr_search_ctx_put(actx); 7390 ntfs_mft_record_unmap(base_ni); 7391 lck_rw_unlock_exclusive(&ni->rl.lock); 7392 ntfs_debug("Done, new_allocated_size 0x%llx.", 7393 (unsigned long long)new_alloc_size); 7394 if (dst_alloc_size) 7395 *dst_alloc_size = new_alloc_size; 7396 return err; 7397free_undo: 7398 /* We have not yet added an attribute list entry for the new extent. */ 7399 al_entry_added = FALSE; 7400 goto free_extent; 7401undo: 7402 ntfs_attr_search_ctx_reinit(actx); 7403 if (is_first && !mp_rebuilt) 7404 goto undo_alloc; 7405 /* Look up the attribute extent we were working on. */ 7406 if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, lowest_vcn, 7407 NULL, 0, actx)) { 7408 /* There is nothing we can do now, bail out. */ 7409 ntfs_error(vol->mp, "Failed to find current attribute extent " 7410 "in error code path. Leaving inconsistent " 7411 "metadata. Run chkdsk."); 7412 NVolSetErrors(vol); 7413 goto err_out; 7414 } 7415 if (is_first) 7416 actx->a->highest_vcn = cpu_to_sle64( 7417 (alloc_size >> vol->cluster_size_shift) - 1); 7418undo_alloc: 7419 ll = alloc_size >> vol->cluster_size_shift; 7420 if (ntfs_cluster_free(ni, ll, -1, actx, &nr_freed)) { 7421 ntfs_error(vol->mp, "Failed to release allocated cluster(s) " 7422 "in error code path. Run chkdsk to recover " 7423 "the lost cluster(s)."); 7424 NVolSetErrors(vol); 7425 /* 7426 * Still need to know how many real clusters are effectively 7427 * truncated from the attribute extentsion. 7428 */ 7429 nr_freed = ntfs_rl_get_nr_real_clusters(&ni->rl, ll, -1); 7430 } 7431 m = actx->m; 7432 a = actx->a; 7433undo_hole: 7434 /* 7435 * If the runlist truncation fails and/or the search context is no 7436 * longer valid, we cannot resize the attribute record or build the 7437 * mapping pairs array thus we mark the volume dirty and tell the user 7438 * to run chkdsk. 7439 */ 7440 if (ntfs_rl_truncate_nolock(vol, &ni->rl, ll) || actx->is_error) { 7441 ntfs_error(vol->mp, "Failed to %s in error code path. Run " 7442 "chkdsk.", actx->is_error ? 7443 "restore attribute search context" : 7444 "truncate attribute runlist"); 7445 NVolSetErrors(vol); 7446 } else if (is_first) { 7447 if (mp_rebuilt) { 7448 /* We are working on the original extent, restore it. */ 7449 if (ntfs_attr_record_resize(m, a, attr_len)) { 7450 ntfs_error(vol->mp, "Failed to restore " 7451 "attribute record in error " 7452 "code path. Run chkdsk."); 7453 NVolSetErrors(vol); 7454 } else /* if (success) */ { 7455 mp_ofs = le16_to_cpu(a->mapping_pairs_offset); 7456 if (ntfs_mapping_pairs_build(vol, (s8*)a + 7457 mp_ofs, attr_len - mp_ofs, 7458 ni->rl.rl, lowest_vcn, -1, 7459 NULL)) { 7460 ntfs_error(vol->mp, "Failed to " 7461 "restore mapping " 7462 "pairs array in error " 7463 "code path. Run " 7464 "chkdsk."); 7465 NVolSetErrors(vol); 7466 } 7467 if (actx->ni != base_ni) 7468 NInoSetMrecNeedsDirtying(actx->ni); 7469 } 7470 } 7471 } else if (/* !is_first && */ a->highest_vcn == 7472 cpu_to_sle64(sle64_to_cpu(a->lowest_vcn) - 1)) { 7473 /* We need to delete the attribute list entry, too. */ 7474 al_entry_added = TRUE; 7475 /* We are working on a new extent, remove it. */ 7476 if (!ntfs_attr_record_is_only_one(m, a)) { 7477 ntfs_attr_record_delete_internal(m, a); 7478 if (actx->ni != base_ni) 7479 NInoSetMrecNeedsDirtying(actx->ni); 7480 } else { 7481free_extent: 7482 if (!ntfs_extent_mft_record_free(base_ni, actx->ni, 7483 m)) { 7484 /* 7485 * The extent inode no longer exists. Make it 7486 * safe to release/reinit the search context. 7487 */ 7488 actx->ni = base_ni; 7489 } else { 7490 ntfs_error(vol->mp, "Failed to free extent " 7491 "mft record 0x%llx of mft_no " 7492 "0x%llx in error code path. " 7493 "Leaving inconsistent " 7494 "metadata. Run chkdsk.", 7495 (unsigned long long) 7496 actx->ni->mft_no, 7497 (unsigned long long) 7498 base_ni->mft_no); 7499 NVolSetErrors(vol); 7500 } 7501 } 7502 if (al_entry_added) { 7503 ntfs_attr_list_entry_delete(base_ni, actx->al_entry); 7504 ntfs_attr_search_ctx_reinit(actx); 7505 if (ntfs_attr_list_sync_shrink(base_ni, 0, actx)) { 7506 ntfs_error(vol->mp, "Failed to restore " 7507 "attribute list attribute in " 7508 "base inode 0x%llx. Leaving " 7509 "inconsistent metadata. " 7510 "Run chkdsk.", 7511 (unsigned long long) 7512 base_ni->mft_no); 7513 NVolSetErrors(vol); 7514 } 7515 } 7516 } 7517undo_do_trunc: 7518 lck_spin_lock(&ni->size_lock); 7519 if (alloc_size == ni->allocated_size) { 7520 lck_spin_unlock(&ni->size_lock); 7521 goto undo_skip_update_sizes; 7522 } 7523 lck_spin_unlock(&ni->size_lock); 7524 ntfs_attr_search_ctx_reinit(actx); 7525 /* Look up the first attribute extent. */ 7526 if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 7527 actx)) { 7528 /* There is nothing we can do now, bail out. */ 7529 ntfs_error(vol->mp, "Failed to find first attribute extent in " 7530 "error code path. Leaving inconsistent " 7531 "metadata. Run chkdsk."); 7532 NVolSetErrors(vol); 7533 goto err_out; 7534 } 7535 a = actx->a; 7536 lck_spin_lock(&ni->size_lock); 7537 ni->allocated_size = alloc_size; 7538 a->allocated_size = cpu_to_sle64(alloc_size); 7539 if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION && 7540 NInoCompressed(ni))) { 7541 ni->compressed_size += (nr_allocated - nr_freed) << 7542 vol->cluster_size_shift; 7543 a->compressed_size = cpu_to_sle64(ni->compressed_size); 7544 } 7545 lck_spin_unlock(&ni->size_lock); 7546 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 7547 lck_spin_lock(&base_ni->size_lock); 7548 base_ni->allocated_size = alloc_size; 7549 lck_spin_unlock(&base_ni->size_lock); 7550 } 7551 /* Ensure the changes make it to disk. */ 7552 if (actx->ni != base_ni) 7553 NInoSetMrecNeedsDirtying(actx->ni); 7554 /* 7555 * We have modified the size. If the ntfs inode is the base inode, 7556 * cause the sizes to be written to all the directory index entries 7557 * pointing to the base inode when the inode is written to disk. Do 7558 * not do this for directories as they have both sizes set to zero in 7559 * their index entries. 7560 */ 7561 if (ni == base_ni && !S_ISDIR(ni->mode)) 7562 NInoSetDirtySizes(ni); 7563undo_skip_update_sizes: 7564 ntfs_attr_search_ctx_put(actx); 7565 NInoSetMrecNeedsDirtying(base_ni); 7566 ntfs_mft_record_unmap(base_ni); 7567 lck_rw_unlock_exclusive(&ni->rl.lock); 7568 /* 7569 * Things are now consistent, try to truncate the attribute back to its 7570 * old size which will cause the allocation to be restored to its old 7571 * size. 7572 * 7573 * TODO: We should support partial allocations and when we do so we 7574 * should only put the allocated size back if the error was not ENOSPC 7575 * and partial allocations are acceptable for this attribute. In that 7576 * case would also need to update @ni->data_size, @a->data_size, and 7577 * the size in the vnode @ni->vn via ubc_setsize(). 7578 */ 7579 if (!is_first) { 7580 lck_spin_lock(&ni->size_lock); 7581 ll = ni->data_size; 7582 lck_spin_unlock(&ni->size_lock); 7583 if (ntfs_attr_resize(ni, ll, 0, ictx)) { 7584 ntfs_error(vol->mp, "Failed to undo partial " 7585 "allocation in inode 0x%llx in error " 7586 "code path.", 7587 (unsigned long long)base_ni->mft_no); 7588 NVolSetErrors(vol); 7589 } 7590 } 7591conv_err_out: 7592 ntfs_debug("Failed (error %d).", err); 7593 return err; 7594err_out: 7595 if (actx) 7596 ntfs_attr_search_ctx_put(actx); 7597 if (base_m) 7598 ntfs_mft_record_unmap(base_ni); 7599 lck_rw_unlock_exclusive(&ni->rl.lock); 7600 goto conv_err_out; 7601trunc_err_out: 7602 mp_rebuilt = FALSE; 7603 if (is_sparse) { 7604 ll = alloc_size >> vol->cluster_size_shift; 7605 /* 7606 * Silence compiler warning about possible use of uninitalized 7607 * variable. 7608 */ 7609 attr_len = 0; 7610 goto undo_hole; 7611 } 7612 goto err_out; 7613} 7614 7615/** 7616 * ntfs_attr_resize - called to change the size of an ntfs attribute inode 7617 * @ni: ntfs inode for which to change the size 7618 * @new_size: new size in bytes to which to resize the ntfs attribute @ni 7619 * @ioflags: flags further describing the resize request 7620 * @ictx: index context or NULL 7621 * 7622 * Resize the attribute described by the ntfs inode @ni to @new_size bytes. 7623 * 7624 * Note: We only support size changes for normal attributes at present, i.e. 7625 * not compressed and not encrypted. 7626 * 7627 * The flags in @ioflags further describe the resize request. The following 7628 * ioflags are currently defined in OS X kernel (a lot of them are not 7629 * applicable to resize requests however): 7630 * IO_UNIT - Do i/o as atomic unit. 7631 * IO_APPEND - Append write to end. 7632 * IO_SYNC - Do i/o synchronously. 7633 * IO_NODELOCKED - Underlying node already locked. 7634 * IO_NDELAY - FNDELAY flag set in file table. 7635 * IO_NOZEROFILL - F_SETSIZE fcntl uses this to prevent zero filling. 7636 * IO_TAILZEROFILL - Zero fills at the tail of write. 7637 * IO_HEADZEROFILL - Zero fills at the head of write. 7638 * IO_NOZEROVALID - Do not zero fill if valid page. 7639 * IO_NOZERODIRTY - Do not zero fill if page is dirty. 7640 * IO_CLOSE - The i/o was issued from close path. 7641 * IO_NOCACHE - Same effect as VNOCACHE_DATA, but only for this i/o. 7642 * IO_RAOFF - Same effect as VRAOFF, but only for this i/o. 7643 * IO_DEFWRITE - Defer write if vfs.defwrite is set. 7644 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 7645 * In particular the only flags that are used in the kernel when calling 7646 * vnode_setsize() are IO_SYNC and IO_NOZEROFILL. 7647 * 7648 * TODO: The @ioflags are currently ignored. 7649 * 7650 * If @ictx is not NULL, the resize is for an index allocation or bitmap 7651 * attribute extension. In this case, if there is not enough space in the mft 7652 * record for the extended index allocation/bitmap attribute, the index root is 7653 * moved to an index block if it is not empty to create more space in the mft 7654 * record. 7655 * 7656 * Return 0 on success and errno on error. 7657 * 7658 * Locking: - Caller must hold @ni->lock on the inode for writing. 7659 * - If called for a shrinking operation, the tail of the new final 7660 * partial page will be zeroed by the call to ubc_setsize() thus it 7661 * must not be locked / mapped or the ubc_setsize() call would 7662 * deadlock. 7663 */ 7664errno_t ntfs_attr_resize(ntfs_inode *ni, s64 new_size, int ioflags, 7665 ntfs_index_context *ictx) 7666{ 7667 s64 old_size, nr_freed, new_alloc_size, old_alloc_size, compressed_size; 7668 VCN highest_vcn, old_highest_vcn, lowest_vcn; 7669 ntfs_inode *eni, *base_ni; 7670 ntfs_volume *vol = ni->vol; 7671 ntfs_attr_search_ctx *actx; 7672 MFT_RECORD *m; 7673 ATTR_RECORD *a; 7674 ATTR_LIST_ENTRY *al_entry; 7675 u8 *del_al_start, *al_end; 7676 int size_change, alloc_change; 7677 unsigned mp_size, attr_len, arec_size; 7678 errno_t err; 7679 BOOL need_ubc_setsize = TRUE; 7680 static const char es[] = " Leaving inconsistent metadata. Unmount " 7681 "and run chkdsk."; 7682 7683 ntfs_debug("Entering for mft_no 0x%llx.", 7684 (unsigned long long)ni->mft_no); 7685 /* 7686 * Cannot be called for directory inodes as metadata access happens via 7687 * the corresponding index inodes. 7688 */ 7689 if (S_ISDIR(ni->mode)) 7690 panic("%s(): Called for directory inode.\n", __FUNCTION__); 7691 base_ni = ni; 7692 if (NInoAttr(ni)) 7693 base_ni = ni->base_ni; 7694 /* 7695 * We are going to change the size thus we need the ntfs inode lock 7696 * taken for exclusive access which is already done by the caller. 7697 * 7698 * When shrinking start by changing the size in the UBC of the vnode. 7699 * This will cause all pages in the VM beyond the new size to be thrown 7700 * away and the last page to be pushed out to disk and its end 7701 * invalidated. 7702 * 7703 * We guarantee that the size in the UBC in the vnode will always be 7704 * smaller or equal to the data_size in the ntfs inode thus no need to 7705 * check the data_size. 7706 */ 7707 old_size = ubc_getsize(ni->vn); 7708 if (new_size < old_size) { 7709 err = ubc_setsize(ni->vn, new_size); 7710 if (!err) { 7711 ntfs_error(vol->mp, "Failed to shrink size in UBC."); 7712 err = EIO; 7713 goto err; 7714 } 7715 need_ubc_setsize = FALSE; 7716 } 7717retry_resize: 7718 /* 7719 * Lock the runlist for writing and map the mft record to ensure it is 7720 * safe to modify the attribute runlist and sizes. 7721 */ 7722 lck_rw_lock_exclusive(&ni->rl.lock); 7723 err = ntfs_mft_record_map(base_ni, &m); 7724 if (err) { 7725 ntfs_error(vol->mp, "Failed to map mft record for mft_no " 7726 "0x%llx (error %d).", 7727 (unsigned long long)ni->mft_no, err); 7728 goto unl_err; 7729 } 7730 actx = ntfs_attr_search_ctx_get(base_ni, m); 7731 if (!actx) { 7732 ntfs_error(vol->mp, "Failed to allocate a search context (not " 7733 "enough memory)."); 7734 err = ENOMEM; 7735 goto unm_err; 7736 } 7737 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 7738 actx); 7739 if (err) { 7740 if (err == ENOENT) { 7741 ntfs_error(vol->mp, "Open attribute is missing from " 7742 "mft record. Inode 0x%llx is " 7743 "corrupt. Run chkdsk.", 7744 (unsigned long long)ni->mft_no); 7745 err = EIO; 7746 } else 7747 ntfs_error(vol->mp, "Failed to lookup attribute " 7748 "(error %d).", err); 7749 goto put_err; 7750 } 7751 m = actx->m; 7752 a = actx->a; 7753 if (old_size != ntfs_attr_size(a)) { 7754 /* 7755 * A failed truncate caused the ubc size to get out of sync. 7756 * The current size of the attribute value is the correct old 7757 * size. 7758 */ 7759 old_size = ntfs_attr_size(a); 7760 } 7761 /* Calculate the new allocated size. */ 7762 if (NInoNonResident(ni)) 7763 new_alloc_size = (new_size + vol->cluster_size - 1) & 7764 ~(s64)vol->cluster_size_mask; 7765 else 7766 new_alloc_size = (new_size + 7) & ~7; 7767 /* The current allocated size is the old allocated size. */ 7768 lck_spin_lock(&ni->size_lock); 7769 old_alloc_size = ni->allocated_size; 7770 compressed_size = ni->compressed_size; 7771 lck_spin_unlock(&ni->size_lock); 7772 /* 7773 * The change in the file size. This will be 0 if no change, >0 if the 7774 * size is growing, and <0 if the size is shrinking. 7775 */ 7776 size_change = -1; 7777 if (new_size - old_size >= 0) { 7778 size_change = 1; 7779 if (new_size == old_size) 7780 size_change = 0; 7781 } 7782 if (need_ubc_setsize && size_change < 0) { 7783 /* 7784 * A previous truncate failed thus we did not catch that this 7785 * is a shrinking resize earlier on. 7786 */ 7787 err = ubc_setsize(ni->vn, new_size); 7788 if (!err) { 7789 ntfs_error(vol->mp, "Failed to shrink size in UBC."); 7790 err = EIO; 7791 goto put_err; 7792 } 7793 need_ubc_setsize = FALSE; 7794 } 7795 /* As above for the allocated size. */ 7796 alloc_change = -1; 7797 if (new_alloc_size - old_alloc_size >= 0) { 7798 alloc_change = 1; 7799 if (new_alloc_size == old_alloc_size) 7800 alloc_change = 0; 7801 } 7802 /* 7803 * If neither the size nor the allocation are being changed there is 7804 * nothing to do. 7805 */ 7806 if (!size_change && !alloc_change) 7807 goto unm_done; 7808 /* If the size is changing, check if new size is allowed in $AttrDef. */ 7809 if (size_change) { 7810 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); 7811 if (err) { 7812 if (err == ERANGE) { 7813 ntfs_error(vol->mp, "Resize would cause the " 7814 "mft_no 0x%llx to %simum size " 7815 "for its attribute type " 7816 "(0x%x). Aborting resize.", 7817 (unsigned long long)ni->mft_no, 7818 size_change > 0 ? "exceed " 7819 "the max" : "go under the min", 7820 (unsigned) 7821 le32_to_cpu(ni->type)); 7822 err = EFBIG; 7823 } else { 7824 ntfs_error(vol->mp, "Mft_no 0x%llx has " 7825 "unknown attribute type " 7826 "0x%x. Aborting resize.", 7827 (unsigned long long)ni->mft_no, 7828 (unsigned) 7829 le32_to_cpu(ni->type)); 7830 err = EIO; 7831 } 7832 goto put_err; 7833 } 7834 } 7835 /* 7836 * The index root attribute, i.e. directory indexes and index inodes 7837 * can be marked compressed or encrypted but this means to create 7838 * compressed/encrypted files, not that the attribute is 7839 * compressed/encrypted. 7840 */ 7841 if (ni->type != AT_INDEX_ALLOCATION && 7842 (NInoCompressed(ni) || NInoEncrypted(ni))) { 7843 ntfs_warning(vol->mp, "Changes in inode size are not " 7844 "supported yet for %s attributes, ignoring.", 7845 NInoCompressed(ni) ? "compressed" : 7846 "encrypted"); 7847 err = ENOTSUP; 7848 goto put_err; 7849 } 7850 if (a->non_resident) 7851 goto do_non_resident_resize; 7852 if (NInoNonResident(ni)) 7853 panic("%s(): NInoNonResident(ni)\n", __FUNCTION__); 7854 arec_size = (le16_to_cpu(a->value_offset) + new_size + 7) & ~7; 7855 /* Resize the attribute record to best fit the new attribute size. */ 7856 if (new_size < vol->mft_record_size && 7857 !ntfs_resident_attr_value_resize(m, a, new_size)) { 7858 /* The resize succeeded! */ 7859 NInoSetMrecNeedsDirtying(actx->ni); 7860 lck_spin_lock(&ni->size_lock); 7861 /* Update the sizes in the ntfs inode and all is done. */ 7862 ni->allocated_size = le32_to_cpu(a->length) - 7863 le16_to_cpu(a->value_offset); 7864 ni->data_size = le32_to_cpu(a->value_length); 7865 /* 7866 * Note ntfs_resident_attr_value_resize() has already done any 7867 * necessary data clearing in the attribute record. When the 7868 * file is being shrunk ubc_setsize() will already have zeroed 7869 * the last partial page, i.e. since this is the resident case 7870 * this is the page with index 0. However, when the file is 7871 * being expanded, the page cache page data between the old 7872 * data_size, i.e. old_size, and the new_size has not been 7873 * zeroed. Fortunately, we do not need to zero it either since 7874 * on one hand it will either already be zero due to pagein 7875 * clearing partial page data beyond the data_size in which 7876 * case there is nothing to do or in the case of the file being 7877 * mmap()ped at the same time, POSIX specifies that the 7878 * behaviour is unspecified thus we do not have to do anything. 7879 * This means that in our implementation in the rare case that 7880 * the file is mmap()ped and a write occured into the mmap()ped 7881 * region just beyond the file size and we now extend the file 7882 * size to incorporate this dirty region outside the file size, 7883 * a pageout of the page would result in this data being 7884 * written to disk instead of being cleared. Given POSIX 7885 * specifies that this corner case is undefined, we choose to 7886 * leave it like that as this is much simpler for us as we 7887 * cannot lock the relevant page now since we are holding too 7888 * many ntfs locks which would result in lock reversal 7889 * deadlocks. 7890 */ 7891 ni->initialized_size = new_size; 7892 lck_spin_unlock(&ni->size_lock); 7893 goto unm_done; 7894 } 7895 /* If the above resize failed, this must be an attribute extension. */ 7896 if (size_change < 0) 7897 panic("%s(): size_change < 0\n", __FUNCTION__); 7898 /* 7899 * Not enough space in the mft record. If this is an index related 7900 * extension, check if the index root attribute is in the same mft 7901 * record as the attribute being extended and if it is and it is not 7902 * empty move its entries into an index allocation block. Note we do 7903 * not check whether that actually creates enough space because how 7904 * much space is needed exactly is very hard to determine in advance 7905 * (due to potential need for associated attribute list attribute 7906 * extensions) and also because even if it does not create enough space 7907 * it will still help and save work later on when working for example 7908 * on the attribute list attribute. 7909 */ 7910 if (ictx) { 7911 long delta; 7912 INDEX_ROOT *ir; 7913 INDEX_HEADER *ih; 7914 INDEX_ENTRY *ie, *first_ie; 7915 ntfs_index_context *root_ictx; 7916 ntfs_attr_search_ctx root_actx; 7917 7918 /* 7919 * This must be an index bitmap extension. An index allocation 7920 * extension is also possible but not here as that cannot be 7921 * resident. 7922 */ 7923 if (ni->type != AT_BITMAP) 7924 panic("%s(): ni->type != AT_BITMAP\n", __FUNCTION__); 7925 ntfs_attr_search_ctx_init(&root_actx, actx->ni, m); 7926 err = ntfs_attr_find_in_mft_record(AT_INDEX_ROOT, ni->name, 7927 ni->name_len, NULL, 0, &root_actx); 7928 if (err) { 7929 if (err != ENOENT) { 7930 ntfs_error(vol->mp, "Failed to find index " 7931 "root attribute in mft_no " 7932 "0x%llx (error %d). Inode is " 7933 "corrupt. Run chkdsk.", 7934 (unsigned long long)ni->mft_no, 7935 err); 7936 NVolSetErrors(vol); 7937 } 7938 /* 7939 * The index root is in a different mft record so we 7940 * cannot gain anything by moving out its entries. Set 7941 * @ictx to NULL so we do not waste our time trying 7942 * again. 7943 */ 7944 ictx = NULL; 7945 goto ictx_done; 7946 } 7947 /* 7948 * We found the index root in the same mft record as the 7949 * attribute to be extended. Check whether it is empty or not. 7950 */ 7951 ir = (INDEX_ROOT*)((u8*)root_actx.a + 7952 le16_to_cpu(root_actx.a->value_offset)); 7953 ih = &ir->index; 7954 first_ie = ie = (INDEX_ENTRY*)((u8*)ih + 7955 le32_to_cpu(ih->entries_offset)); 7956 while (!(ie->flags & INDEX_ENTRY_END)) 7957 ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length)); 7958 /* 7959 * If there are no entries other than the end entry we cannot 7960 * gain anything by moving out the entries from the index root. 7961 * Set @ictx to NULL so we do not waste our time trying again. 7962 */ 7963 if (ie == first_ie) { 7964 ictx = NULL; 7965 goto ictx_done; 7966 } 7967 /* 7968 * We cannot have gotten this far if the current index context 7969 * is locked and/or it is the index root. 7970 * 7971 * Also, we need to drop the runlist lock we are holding as it 7972 * may need to be taken when moving the entries from the index 7973 * root to the index allocation block. 7974 */ 7975 if (ictx->is_locked) 7976 panic("%s(): ictx->is_locked\n", __FUNCTION__); 7977 if (ictx->is_root) 7978 panic("%s(): ictx->is_root\n", __FUNCTION__); 7979 lck_rw_unlock_exclusive(&ni->rl.lock); 7980 /* Find the index root by walking up the tree path. */ 7981 root_ictx = ictx; 7982 while (!root_ictx->is_root) { 7983 root_ictx = root_ictx->up; 7984 /* 7985 * If we go all the way round to the beginning without 7986 * finding the root something has gone badly wrong. 7987 */ 7988 if (root_ictx == ictx) 7989 panic("%s(): root_ictx == ictx\n", 7990 __FUNCTION__); 7991 } 7992 /* 7993 * We need a proper deallocatable attribute search context thus 7994 * switch the one pointing to the attribute to be resized to 7995 * point to the index root. Note we are not updating 7996 * @actx->al_entry as this is not going to be touched at all. 7997 * Having said that set it to NULL just in case. 7998 */ 7999 actx->a = root_actx.a; 8000 actx->al_entry = NULL; 8001 /* 8002 * Lock the index root node. We already have the index root 8003 * attribute thus only need to do the revalidation part of 8004 * re-locking. 8005 */ 8006 root_ictx->is_locked = 1; 8007 root_ictx->actx = actx; 8008 root_ictx->bytes_free = le32_to_cpu(m->bytes_allocated) - 8009 le32_to_cpu(m->bytes_in_use); 8010 root_ictx->ir = ir; 8011 delta = (u8*)ih - (u8*)root_ictx->index; 8012 if (delta) { 8013 INDEX_ENTRY **entries; 8014 unsigned u; 8015 8016 root_ictx->index = ih; 8017 root_ictx->entry = (INDEX_ENTRY*)( 8018 (u8*)root_ictx->entry + delta); 8019 entries = root_ictx->entries; 8020 for (u = 0; u < root_ictx->nr_entries; u++) 8021 entries[u] = (INDEX_ENTRY*)((u8*)entries[u] + 8022 delta); 8023 } 8024 /* 8025 * Move the index root entries to an index allocation block. 8026 * 8027 * Note we do not need to worry about this causing infinite 8028 * recursion in the case that we were called from 8029 * ntfs_index_block_alloc() which was called from 8030 * ntfs_index_move_root_to_allocation_block() because the 8031 * latter will have emptied the index root before calling 8032 * ntfs_index_block_alloc() thus we will bail out above when 8033 * checking whether the index root is empty the second time 8034 * round and the recursion will stop there. This is a very 8035 * seldom occurence thus there is no point in special casing it 8036 * in the code in a more efficient but more complicated way. 8037 * 8038 * A complication is that ntfs_attr_resize() may have been 8039 * called from ntfs_index_block_alloc() and in this case when 8040 * we call ntfs_index_move_root_to_allocation_block() it will 8041 * call ntfs_index_block_alloc() again which will cause a 8042 * deadlock (or with lock debugging enabled panic()) because 8043 * ntfs_index_block_alloc() takes the bitmap inode lock for 8044 * writing. To avoid this ntfs_index_block_alloc() sets 8045 * @ictx->bmp_is_locked and we need to set 8046 * @root_ictx->bmp_is_locoked to the same value so that when 8047 * ntfs_index_move_root_to_allocation_block() calls 8048 * ntfs_index_block_alloc() the latter will know not to take 8049 * the bitmap inode lock again. 8050 */ 8051 root_ictx->bmp_is_locked = ictx->bmp_is_locked; 8052 err = ntfs_index_move_root_to_allocation_block(root_ictx); 8053 if (root_ictx != ictx) 8054 root_ictx->bmp_is_locked = 0; 8055 if (err) { 8056 ntfs_error(vol->mp, "Failed to move index root to " 8057 "index allocation block (error %d).", 8058 err); 8059 if (root_ictx->is_locked) 8060 ntfs_index_ctx_unlock(root_ictx); 8061 /* 8062 * This is a disaster as it means the index context is 8063 * no longer valid thus we have to bail out all the 8064 * way. 8065 */ 8066 goto err; 8067 } 8068 /* Unlock the newly created index block. */ 8069 if (root_ictx->is_root) 8070 panic("%s(): root_ictx->is_root\n", __FUNCTION__); 8071 if (!root_ictx->is_locked) 8072 panic("%s(): !root_ictx->is_locked\n", __FUNCTION__); 8073 ntfs_index_ctx_unlock(root_ictx); 8074 /* 8075 * We are done. The index root is now empty thus the mft 8076 * record should now have enough space. Because we dropped the 8077 * mft record when moving the index root entries into the index 8078 * allocation block we need to restart the attribute resize 8079 * again. 8080 * 8081 * But first we set @ictx to NULL so we do not get here again 8082 * in the case that there still is not enough free space. This 8083 * is not a disaster as we can just carry on doing other 8084 * rearrangements to free up enough space in the mft record. 8085 */ 8086 ictx = NULL; 8087 goto retry_resize; 8088 } 8089ictx_done: 8090 /* 8091 * We have to drop all the locks so we can call 8092 * ntfs_attr_make_non_resident(). 8093 */ 8094 ntfs_attr_search_ctx_put(actx); 8095 ntfs_mft_record_unmap(base_ni); 8096 lck_rw_unlock_exclusive(&ni->rl.lock); 8097 /* 8098 * Not enough space in the mft record, try to make the attribute 8099 * non-resident and if successful restart the truncation process. 8100 */ 8101 err = ntfs_attr_make_non_resident(ni); 8102 if (!err) 8103 goto retry_resize; 8104 /* 8105 * Could not make non-resident. If this is due to this not being 8106 * permitted for this attribute type try to make other attributes 8107 * non-resident and/or move this or other attributes out of the mft 8108 * record this attribute is in. Otherwise fail. 8109 */ 8110 if (err != EPERM) { 8111 if (err != ENOSPC) { 8112 ntfs_error(vol->mp, "Cannot truncate mft_no 0x%llx, " 8113 "attribute type 0x%x, because the " 8114 "conversion from resident to " 8115 "non-resident attribute failed (error " 8116 "%d).", (unsigned long long)ni->mft_no, 8117 (unsigned)le32_to_cpu(ni->type), err); 8118 if (err != ENOMEM) { 8119 NVolSetErrors(vol); 8120 err = EIO; 8121 } 8122 } 8123 goto err; 8124 } 8125 /* 8126 * To make space in the mft record we would like to try to make other 8127 * attributes non-resident if that would save space. 8128 * 8129 * FIXME: We cannot do this at present unless the attribute is the 8130 * attribute being resized as there could be an ntfs inode matching 8131 * this attribute in memory and it would become out of date with its 8132 * metadata if we touch its attribute record. 8133 * 8134 * FIXME: We do not need to do this if this is the attribute being 8135 * resized as we already tried to make the attribute non-resident and 8136 * it did not work or we would never have gotten here in the first 8137 * place. 8138 * 8139 * Thus we have to either move other attributes to extent mft records 8140 * thus making more space in the base mft record or we have to move the 8141 * attribute being resized to an extent mft record thus giving it more 8142 * space. In any case we need to have an attribute list attribute so 8143 * start by adding it if it does not yet exist. 8144 * 8145 * Before we start, we can check whether it is possible to fit the 8146 * attribute to be resized inside an mft record. If not then there is 8147 * no point in proceeding. 8148 * 8149 * This should never really happen as the attribute size should never 8150 * be allowed to grow so much and such requests should never be made by 8151 * the driver and if they are they should be caught by the call to 8152 * ntfs_attr_size_bounds_check(). 8153 */ 8154 if (arec_size > vol->mft_record_size - sizeof(MFT_RECORD)) { 8155 ntfs_error(vol->mp, "Cannot truncate mft_no 0x%llx, attribute " 8156 "type 0x%x, because the attribute may not be " 8157 "non-resident and the requested size exceeds " 8158 "the maximum possible resident attribute " 8159 "record size.", (unsigned long long)ni->mft_no, 8160 (unsigned)le32_to_cpu(ni->type)); 8161 /* Use POSIX conformant truncate(2) error code. */ 8162 err = EFBIG; 8163 goto err; 8164 } 8165 /* 8166 * The resident attribute can fit in an mft record. Now have to decide 8167 * whether to make other attributes non-resident/move other attributes 8168 * out of the mft record or whether to move the attribute record to be 8169 * resized out to a new mft record. 8170 * 8171 * TODO: We never call ntfs_attr_resize() for attributes that cannot be 8172 * non-resident thus we never get here thus we simply panic() here to 8173 * remind us that we need to implement this code if we ever start 8174 * calling this function for attributes that must remain resident. 8175 */ 8176 panic("%s(): Attribute may not be non-resident.\n", __FUNCTION__); 8177do_non_resident_resize: 8178 if (!NInoNonResident(ni)) 8179 panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__); 8180 /* 8181 * If the size is shrinking, need to reduce the initialized_size and 8182 * the data_size before reducing the allocation. 8183 */ 8184 if (size_change < 0) { 8185 /* 8186 * Make the valid size smaller (the UBC size is already 8187 * up-to-date). 8188 */ 8189 lck_spin_lock(&ni->size_lock); 8190 if (new_size < ni->initialized_size) { 8191 ni->initialized_size = new_size; 8192 a->initialized_size = cpu_to_sle64(new_size); 8193 lck_spin_unlock(&ni->size_lock); 8194 if (ni->name == I30 && 8195 ni->type == AT_INDEX_ALLOCATION) { 8196 lck_spin_lock(&base_ni->size_lock); 8197 base_ni->initialized_size = new_size; 8198 lck_spin_unlock(&base_ni->size_lock); 8199 } 8200 } else 8201 lck_spin_unlock(&ni->size_lock); 8202 /* 8203 * If the size is shrinking it makes no sense for the 8204 * allocation to be growing. 8205 */ 8206 if (alloc_change > 0) 8207 panic("%s(): alloc_change > 0\n", __FUNCTION__); 8208 } else if (/*size_change >= 0 && */ alloc_change > 0){ 8209 /* 8210 * The file size is growing or staying the same but the 8211 * allocation can be shrinking, growing or staying the same. 8212 * 8213 * If the allocating is shrinking or staying the same we fall 8214 * down into the same code as the size shrinking base 8215 * allocation shrinking. 8216 * 8217 * Only if the allocation is growing do we need to extend the 8218 * allocation and possibly update the data size here. If we 8219 * are updating the data size, since we are not touching the 8220 * initialized_size we do not need to worry about the actual 8221 * data on disk. And as far as the VM pages are concerned, 8222 * there will be no pages beyond the old data size and any 8223 * partial region in the last page between the old and new data 8224 * size (or the end of the page if the new data size is outside 8225 * the page) does not need to be modified as explained above 8226 * for the resident attribute resize case. To do this, we 8227 * simply drop the locks we hold and leave all the work to our 8228 * friendly helper ntfs_attr_extend_allocation(). 8229 * 8230 * Note by setting @data_start to -1 (last parameter to 8231 * ntfs_attr_extend_allocation()) we guarantee that the 8232 * allocation is not partial. 8233 */ 8234 ntfs_attr_search_ctx_put(actx); 8235 ntfs_mft_record_unmap(base_ni); 8236 lck_rw_unlock_exclusive(&ni->rl.lock); 8237 err = ntfs_attr_extend_allocation(ni, new_size, 8238 size_change > 0 ? new_size : -1, -1, ictx, 8239 NULL, FALSE); 8240 if (err) 8241 goto err; 8242 goto done; 8243 } 8244 /* alloc_change <= 0 */ 8245 /* If the actual size is changing need to update it now. */ 8246 if (size_change) { 8247 lck_spin_lock(&ni->size_lock); 8248 ni->data_size = new_size; 8249 a->data_size = cpu_to_sle64(new_size); 8250 lck_spin_unlock(&ni->size_lock); 8251 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 8252 lck_spin_lock(&base_ni->size_lock); 8253 base_ni->data_size = new_size; 8254 lck_spin_unlock(&base_ni->size_lock); 8255 } 8256 } 8257 /* Ensure the modified mft record is written out. */ 8258 NInoSetMrecNeedsDirtying(actx->ni); 8259 /* If the allocated size is not changing, we are done. */ 8260 if (!alloc_change) 8261 goto unm_done; 8262 /* 8263 * Free the clusters. Note we cannot recover once this is done because 8264 * someone else can allocate the clusters at any point after we free 8265 * them. Thus any errors will lead to a more or less corrupt file 8266 * system depending on how consistent we can make the volume after an 8267 * error occurs. 8268 */ 8269 err = ntfs_cluster_free(ni, new_alloc_size >> 8270 vol->cluster_size_shift, -1, actx, &nr_freed); 8271 m = actx->m; 8272 a = actx->a; 8273 if (err) { 8274 ntfs_error(vol->mp, "Failed to release cluster(s) (error " 8275 "%d). Unmount and run chkdsk to recover the " 8276 "lost cluster(s).", err); 8277 NVolSetErrors(vol); 8278 } else { 8279 /* 8280 * Truncate the runlist. The call to ntfs_cluster_free() has 8281 * already ensured that all needed runlist fragments have been 8282 * mapped so we do not need to worry about mapping runlist 8283 * fragments here. Note given we have managed to read all the 8284 * runlist fragments already the chances of us failing anywhere 8285 * in the below code is very small indeed. Only running out of 8286 * memory or a disk/sector failure between the above 8287 * ntfs_cluster_free() call and the below calls can cause us to 8288 * fail here. 8289 * 8290 * FIXME: Note that this is not quite true as if 8291 * ntfs_cluster_free() aborts with an error it may not have 8292 * gotten round to mapping the runlist fragments. If this 8293 * happens ntfs_rl_truncate_nolock() could end up doing a lot 8294 * of weird things so we only call it if the 8295 * ntfs_cluster_free() succeeded for now. 8296 */ 8297 err = ntfs_rl_truncate_nolock(vol, &ni->rl, new_alloc_size >> 8298 vol->cluster_size_shift); 8299 } 8300 /* 8301 * If the runlist truncation failed and/or the search context is no 8302 * longer valid, we cannot resize the attribute record or build the 8303 * mapping pairs array thus we abort. 8304 */ 8305 if (err || actx->is_error) { 8306 if (actx->is_error) 8307 err = actx->error; 8308 ntfs_error(vol->mp, "Failed to %s (error %d).%s", 8309 actx->is_error ? 8310 "restore attribute search context" : 8311 "truncate attribute runlist", err, es); 8312 err = EIO; 8313 goto bad_out; 8314 } 8315 /* 8316 * The runlist is now up to date. If this attribute is sparse we need 8317 * to check if it is still sparse and if not we need to change it to a 8318 * non-sparse file. And if it is still sparse we need to update the 8319 * compressed size which we postpone till later so we can do it at the 8320 * same time as the update of the allocated size. 8321 * 8322 * To determine whether the attribute is still sparse we compare the 8323 * new compressed size to the new allocated size. If the two have now 8324 * become the same the attribute is no longer sparse. If the 8325 * compressed size is still smaller than the allocated size the 8326 * attribute is still sparse. 8327 */ 8328 compressed_size -= nr_freed << vol->cluster_size_shift; 8329 if (NInoSparse(ni) && compressed_size >= new_alloc_size) { 8330 if (compressed_size > new_alloc_size) 8331 panic("%s(): compressed_size > new_alloc_size\n", 8332 __FUNCTION__); 8333 /* Switch the attribute to not be sparse any more. */ 8334 ntfs_attr_sparse_clear(base_ni, ni, actx); 8335 } 8336 /* Update the allocated/compressed size. */ 8337 lck_spin_lock(&ni->size_lock); 8338 ni->allocated_size = new_alloc_size; 8339 a->allocated_size = cpu_to_sle64(new_alloc_size); 8340 if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION && 8341 NInoCompressed(ni))) { 8342 if (nr_freed) { 8343 if (compressed_size < 0) 8344 panic("%s(): compressed_size < 0\n", 8345 __FUNCTION__); 8346 ni->compressed_size = compressed_size; 8347 a->compressed_size = cpu_to_sle64(ni->compressed_size); 8348 } 8349 } 8350 lck_spin_unlock(&ni->size_lock); 8351 if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) { 8352 lck_spin_lock(&base_ni->size_lock); 8353 base_ni->allocated_size = new_alloc_size; 8354 lck_spin_unlock(&base_ni->size_lock); 8355 } 8356 /* 8357 * We have the base attribute extent in @actx and we have set it up 8358 * already with the new allocated size. If the truncation point is not 8359 * in the base extent, need to switch to the extent containing the 8360 * truncation point now so we can update its attribute record, too. 8361 * But before doing so need to ensure the modified mft record is 8362 * written out. 8363 */ 8364 highest_vcn = new_alloc_size >> vol->cluster_size_shift; 8365 old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1; 8366 ntfs_debug("highest_vcn 0x%llx, old_highest_vcn 0x%llx.", 8367 (unsigned long long)highest_vcn, 8368 (unsigned long long)old_highest_vcn); 8369 if (highest_vcn >= old_highest_vcn) { 8370 NInoSetMrecNeedsDirtying(actx->ni); 8371 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 8372 highest_vcn, NULL, 0, actx); 8373 if (err) { 8374 if (err == ENOENT) 8375 ntfs_error(vol->mp, "Attribute extent is " 8376 "missing from mft_no 0x%llx. " 8377 "Run chkdsk.", 8378 (unsigned long long) 8379 ni->mft_no); 8380 else 8381 ntfs_error(vol->mp, "Failed to lookup " 8382 "attribute extent in mft_no " 8383 "0x%llx (error %d).%s", 8384 (unsigned long long) 8385 ni->mft_no, err, es); 8386 err = EIO; 8387 goto bad_out; 8388 } 8389 m = actx->m; 8390 a = actx->a; 8391 old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1; 8392 ntfs_debug("Switched to extent attribute record, " 8393 "old_highest_vcn is now 0x%llx.", 8394 (unsigned long long)old_highest_vcn); 8395 } 8396 /* 8397 * If the truncation point is at the very beginning of this attribute 8398 * extent and the extent is not the base extent we need to remove the 8399 * entire extent and hence do not need to waste time truncating it. 8400 * 8401 * If this is the base extent we have to truncate it to zero allocated 8402 * size and if the truncation point is in the middle of the extent we 8403 * need to truncate it to the truncation point. 8404 */ 8405 lowest_vcn = sle64_to_cpu(a->lowest_vcn); 8406 ntfs_debug("lowest_vcn 0x%llx.", (unsigned long long)lowest_vcn); 8407 if (!lowest_vcn || highest_vcn != lowest_vcn) { 8408 /* 8409 * Get the size for the shrunk mapping pairs array for the 8410 * runlist fragment starting at the lowest_vcn of this extent. 8411 */ 8412 err = ntfs_get_size_for_mapping_pairs(vol, 8413 ni->rl.elements ? ni->rl.rl : NULL, lowest_vcn, 8414 -1, &mp_size); 8415 if (err) { 8416 ntfs_error(vol->mp, "Cannot shrink allocation of " 8417 "mft_no 0x%llx, attribute type 0x%x, " 8418 "because determining the size for the " 8419 "mapping pairs failed (error %d).%s", 8420 (unsigned long long)ni->mft_no, 8421 (unsigned)le32_to_cpu(ni->type), err, 8422 es); 8423 NInoSetMrecNeedsDirtying(actx->ni); 8424 err = EIO; 8425 goto bad_out; 8426 } 8427 /* 8428 * Generate the mapping pairs array directly into the attribute 8429 * record. 8430 */ 8431 err = ntfs_mapping_pairs_build(vol, (s8*)a + 8432 le16_to_cpu(a->mapping_pairs_offset), mp_size, 8433 ni->rl.elements ? ni->rl.rl : NULL, lowest_vcn, 8434 -1, NULL); 8435 if (err) { 8436 ntfs_error(vol->mp, "Cannot shrink allocation of " 8437 "mft_no 0x%llx, attribute type 0x%x, " 8438 "because building the mapping pairs " 8439 "failed (error %d).%s", 8440 (unsigned long long)ni->mft_no, 8441 (unsigned)le32_to_cpu(ni->type), err, 8442 es); 8443 NInoSetMrecNeedsDirtying(actx->ni); 8444 err = EIO; 8445 goto bad_out; 8446 } 8447 /* Update the highest_vcn to the new truncation point. */ 8448 a->highest_vcn = cpu_to_sle64(highest_vcn - 1); 8449 /* 8450 * Shrink the attribute record for the new mapping pairs array. 8451 * Note, this cannot fail since we are making the attribute 8452 * smaller thus by definition there is enough space to do so. 8453 */ 8454 attr_len = le32_to_cpu(a->length); 8455 err = ntfs_attr_record_resize(m, a, mp_size + 8456 le16_to_cpu(a->mapping_pairs_offset)); 8457 if (err) 8458 panic("%s(): err\n", __FUNCTION__); 8459 } 8460 /* If there is no attribute list we are done. */ 8461 if (!NInoAttrList(base_ni)) { 8462 /* Ensure the modified mft record is written out. */ 8463 NInoSetMrecNeedsDirtying(base_ni); 8464 goto unm_done; 8465 } 8466 /* 8467 * If the current extent is not the base extent and it has a lowest_vcn 8468 * equal to the new highest_vcn, we need to delete the current extent. 8469 * 8470 * Also need to delete all subsequent attribute extents if any exist. 8471 * We know that some exist if the old highest_vcn of the current extent 8472 * is lower than the old end of the attribute. 8473 * 8474 * When deleting the attribute extents, free the extent mft records if 8475 * the only attribute record in the mft record is the attribute extent 8476 * being deleted. In this case do not need to actually modify the 8477 * attribute record at all, just mark the mft record as not in use and 8478 * clear its bit in the mft bitmap. For each deleted attribute extent 8479 * also need to delete the corresponding attribute list attribute 8480 * entry but we postpone this until we have dealt with all the extents 8481 * first. 8482 * 8483 * When finished, check the attribute list attribute and if it no 8484 * longer references any mft records other than the base mft record 8485 * delete the attribute list attribute altogether. 8486 */ 8487 al_end = base_ni->attr_list + base_ni->attr_list_size; 8488 del_al_start = (u8*)actx->al_entry; 8489 if (lowest_vcn && highest_vcn == lowest_vcn) { 8490 /* 8491 * We need to delete the current extent thus manually 8492 * reinitialize the attribute search context without unmapping 8493 * the current extent. 8494 */ 8495 eni = actx->ni; 8496 actx->ni = base_ni; 8497 ntfs_attr_search_ctx_reinit(actx); 8498 al_entry = (ATTR_LIST_ENTRY*)del_al_start; 8499 goto delete_attr; 8500 } 8501 /* Ensure the modified mft record is written out. */ 8502 NInoSetMrecNeedsDirtying(actx->ni); 8503 del_al_start += le16_to_cpu(((ATTR_LIST_ENTRY*)del_al_start)->length); 8504 al_entry = (ATTR_LIST_ENTRY*)del_al_start; 8505 /* 8506 * Reinitialize the attribute search context thus unmapping the current 8507 * extent if it is not in the base mft record. 8508 */ 8509 ntfs_attr_search_ctx_reinit(actx); 8510 /* 8511 * Check if there are more extents by looking at the highest vcn of the 8512 * current extent which is in @old_highest_vcn. If it is below the old 8513 * allocated size it means that @al_entry points to the attribute list 8514 * entry describing the next attribute extent. 8515 */ 8516 while (old_highest_vcn < (old_alloc_size >> vol->cluster_size_shift)) { 8517 /* Sanity checks. */ 8518 if ((u8*)al_entry + sizeof(ATTR_LIST_ENTRY) >= al_end || 8519 (u8*)al_entry < base_ni->attr_list) { 8520 ntfs_error(vol->mp, "Attribute list attribute is " 8521 "corrupt in mft_no 0x%llx. Run " 8522 "chkdsk.", 8523 (unsigned long long)base_ni->mft_no); 8524 err = EIO; 8525 goto bad_out; 8526 } 8527 /* 8528 * Map the mft record containing the next extent if it is not 8529 * the base mft record which is already mapped and described by 8530 * the attribute search context @actx. 8531 */ 8532 if (MREF_LE(al_entry->mft_reference) == base_ni->mft_no) { 8533 /* We want the base mft record. */ 8534 if (MSEQNO_LE(al_entry->mft_reference) != 8535 base_ni->seq_no) { 8536 ntfs_error(vol->mp, "Found stale mft " 8537 "reference in attribute list " 8538 "attribute of mft_no 0x%llx. " 8539 "Inode is corrupt. Run " 8540 "chkdsk.", (unsigned long long) 8541 base_ni->mft_no); 8542 err = EIO; 8543 goto bad_out; 8544 } 8545 eni = base_ni; 8546 m = actx->m; 8547 } else { 8548 /* We want an extent mft record. */ 8549 err = ntfs_extent_mft_record_map(base_ni, 8550 le64_to_cpu(al_entry->mft_reference), 8551 &eni, &m); 8552 if (err) { 8553 ntfs_error(vol->mp, "Failed to map extent mft " 8554 "record 0x%llx of mft_no " 8555 "0x%llx. Inode is corrupt. " 8556 "Run chkdsk.", 8557 (unsigned long long)MREF_LE( 8558 al_entry->mft_reference), 8559 (unsigned long long) 8560 base_ni->mft_no); 8561 err = EIO; 8562 goto bad_out; 8563 } 8564 } 8565 /* Locate the attribute extent in the mft record. */ 8566 a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); 8567 do { 8568 /* Sanity checks. */ 8569 if ((u8*)a < (u8*)m || (u8*)a > (u8*)m + 8570 le32_to_cpu(m->bytes_allocated)) 8571 goto corrupt_err; 8572 /* 8573 * We cannot reach the end of the attributes without 8574 * finding the attribute extent we are looking for. 8575 */ 8576 if (a->type == AT_END || !a->length) 8577 goto corrupt_err; 8578 /* 8579 * The attribute instance is unique thus if we find the 8580 * correct instance we have found the attribute extent. 8581 */ 8582 if (al_entry->instance == a->instance) { 8583 /* 8584 * If the type and/or the name are mismatched 8585 * between the attribute list entry and the 8586 * attribute record, there is corruption. 8587 */ 8588 if (al_entry->type != a->type) 8589 goto corrupt_err; 8590 if (!ntfs_are_names_equal((ntfschar*)((u8*)a + 8591 le16_to_cpu(a->name_offset)), 8592 a->name_length, 8593 (ntfschar*)((u8*)al_entry + 8594 al_entry->name_offset), 8595 al_entry->name_length, 8596 NVolCaseSensitive(vol), 8597 vol->upcase, vol->upcase_len)) 8598 goto corrupt_err; 8599 /* We found the attribute extent. */ 8600 break; 8601 } 8602 /* Proceed to the next attribute in the mft record. */ 8603 a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); 8604 } while (1); 8605 /* Record the highest_vcn of the new extent. */ 8606 old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1; 8607delete_attr: 8608 /* 8609 * If this is the only attribute record in the mft record, free 8610 * the mft record. Note if this is the case it is not possible 8611 * for the mft record to be the base record as it would at 8612 * least have to contain the attribute record for the attribute 8613 * list attribute so no need to check for this case. 8614 * 8615 * If it is not the only attribute record in the mft record, 8616 * delete the attribute record from the mft record. 8617 */ 8618 if ((u8*)m + le16_to_cpu(m->attrs_offset) == (u8*)a && 8619 ((ATTR_RECORD*)((u8*)a + 8620 le32_to_cpu(a->length)))->type == AT_END) { 8621 err = ntfs_extent_mft_record_free(base_ni, eni, m); 8622 if (err) { 8623 ntfs_error(vol->mp, "Failed to free extent " 8624 "mft_no 0x%llx (error %d). " 8625 "Unmount and run chkdsk to " 8626 "recover the lost inode.", 8627 (unsigned long long) 8628 eni->mft_no, err); 8629 NVolSetErrors(vol); 8630 if (eni != base_ni) { 8631 NInoSetMrecNeedsDirtying(eni); 8632 ntfs_extent_mft_record_unmap(eni); 8633 } 8634 } 8635 } else { 8636 ntfs_attr_record_delete_internal(m, a); 8637 /* Unmap the mft record if it is not the base record. */ 8638 if (eni != base_ni) { 8639 NInoSetMrecNeedsDirtying(eni); 8640 ntfs_extent_mft_record_unmap(eni); 8641 } 8642 } 8643 /* Go to the next entry in the attribute list attribute. */ 8644 al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + 8645 le16_to_cpu(al_entry->length)); 8646 } 8647 /* 8648 * There are no more extents. If we deleted any attribute extents we 8649 * need to remove their attribute list attribute entries now. 8650 */ 8651 if ((u8*)al_entry != del_al_start) { 8652 unsigned al_ofs; 8653 BOOL have_extent_records; 8654 8655 al_ofs = del_al_start - base_ni->attr_list; 8656 ntfs_attr_list_entries_delete(base_ni, 8657 (ATTR_LIST_ENTRY*)del_al_start, al_entry); 8658 /* 8659 * Scan all entries in the attribute list attribute. If there 8660 * are no more references to extent mft records, delete the 8661 * attribute list attribute. 8662 * 8663 * Otherwise truncate the attribute list attribute and update 8664 * its value from the in memory copy. 8665 */ 8666 err = ntfs_attr_list_is_needed(base_ni, NULL, 8667 &have_extent_records); 8668 if (err) 8669 goto put_err; 8670 if (!have_extent_records) { 8671 /* 8672 * There are no extent mft records left in use thus 8673 * delete the attribute list attribute. 8674 */ 8675 err = ntfs_attr_list_delete(base_ni, actx); 8676 if (err) 8677 goto put_err; 8678 } else { 8679 /* 8680 * There still are extent mft records left in use thus 8681 * update the attribute list attribute size and write 8682 * the modified data to disk. 8683 */ 8684 err = ntfs_attr_list_sync_shrink(base_ni, al_ofs, actx); 8685 if (err) 8686 goto put_err; 8687 } 8688 } 8689unm_done: 8690 ntfs_attr_search_ctx_put(actx); 8691 ntfs_mft_record_unmap(base_ni); 8692 lck_rw_unlock_exclusive(&ni->rl.lock); 8693 /* Set the UBC size if not set yet. */ 8694 if (need_ubc_setsize && !ubc_setsize(ni->vn, new_size)) { 8695 ntfs_error(vol->mp, "Failed to set the size in UBC."); 8696 err = EIO; 8697 /* 8698 * This should never fail and if it does it can only happen as 8699 * the result of a previous resize having failed. Thus we do 8700 * not try to roll back the metadata changes and simply bail 8701 * out. 8702 */ 8703 goto err; 8704 } 8705done: 8706 /* 8707 * If we have modified the size of the base inode, cause the sizes to 8708 * be written to all the directory index entries pointing to the base 8709 * inode when the inode is written to disk. Do not do this for 8710 * directories as they have both sizes set to zero in their index 8711 * entries. 8712 */ 8713 if (ni == base_ni && !S_ISDIR(ni->mode) && 8714 (size_change || alloc_change)) 8715 NInoSetDirtySizes(ni); 8716 // TODO:/FIXME: We have to clear the S_ISUID and S_ISGID bits in the 8717 // file mode. - Only to be done on success and (size_change || 8718 // alloc_change). 8719 /* 8720 * Update the last_data_change_time (mtime) and last_mft_change_time 8721 * (ctime) on the base ntfs inode @base_ni unless this is an attribute 8722 * inode update in which case only update the ctime as named stream/ 8723 * extended attribute semantics expect on OS X. 8724 * 8725 * FIXME: For open(O_TRUNC) it is correct to always change the 8726 * {m,c}time. But for {,f}truncate() we have to only set {m,c}time if 8727 * a change happened, i.e. only if size_change is true. Problem is we 8728 * cannot know from which code path we are being called as both system 8729 * calls on OS X call vnode_setattr() which calls VNOP_SETATTR() which 8730 * calls ntfs_vnop_setattr() which then calls us... For now at least 8731 * we always update the times thus we follow open(O_TRUNC) semantics 8732 * and disobey {,f}truncate() semantics. 8733 */ 8734 base_ni->last_mft_change_time = ntfs_utc_current_time(); 8735 if (ni == base_ni) 8736 base_ni->last_data_change_time = base_ni->last_mft_change_time; 8737 NInoSetDirtyTimes(base_ni); 8738 /* 8739 * If this is not a directory or it is an encrypted directory, set the 8740 * needs archiving bit except for the core system files. 8741 */ 8742 if (!S_ISDIR(base_ni->mode) || NInoEncrypted(base_ni)) { 8743 BOOL need_set_archive_bit = TRUE; 8744 if (vol->major_ver >= 2) { 8745 if (ni->mft_no <= FILE_Extend) 8746 need_set_archive_bit = FALSE; 8747 } else { 8748 if (ni->mft_no <= FILE_UpCase) 8749 need_set_archive_bit = FALSE; 8750 } 8751 if (need_set_archive_bit) { 8752 base_ni->file_attributes |= FILE_ATTR_ARCHIVE; 8753 NInoSetDirtyFileAttributes(base_ni); 8754 } 8755 } 8756 ntfs_debug("Done."); 8757 return 0; 8758corrupt_err: 8759 ntfs_error(vol->mp, "Mft record 0x%llx of mft_no 0x%llx is corrupt. " 8760 "Unmount and run chkdsk.", 8761 (unsigned long long)eni->mft_no, 8762 (unsigned long long)base_ni->mft_no); 8763 if (eni != base_ni) 8764 ntfs_extent_mft_record_unmap(eni); 8765 err = EIO; 8766bad_out: 8767 if (err != ENOMEM && err != ENOTSUP) 8768 NVolSetErrors(vol); 8769put_err: 8770 ntfs_attr_search_ctx_put(actx); 8771unm_err: 8772 ntfs_mft_record_unmap(base_ni); 8773unl_err: 8774 lck_rw_unlock_exclusive(&ni->rl.lock); 8775err: 8776 /* Reset the UBC size. */ 8777 if (!ubc_setsize(ni->vn, old_size)) 8778 ntfs_error(vol->mp, "Failed to restore UBC size. Leaving UBC " 8779 "size out of sync with attribute data size."); 8780 ntfs_debug("Failed (error %d).", err); 8781 return err; 8782} 8783 8784/** 8785 * ntfs_attr_set - fill (a part of) an attribute with a byte 8786 * @ni: ntfs inode describing the attribute to fill 8787 * @ofs: offset inside the attribute at which to start to fill 8788 * @cnt: number of bytes to fill 8789 * @val: the unsigned 8-bit value with which to fill the attribute 8790 * 8791 * Fill @cnt bytes of the attribute described by the ntfs inode @ni starting at 8792 * byte offset @ofs inside the attribute with the constant byte @val. 8793 * 8794 * This function is effectively like memset() applied to an ntfs attribute. 8795 * Note this function actually only operates on the page cache pages belonging 8796 * to the ntfs attribute and it marks them dirty after doing the memset(). 8797 * Thus it relies on the vm dirty page write code paths to cause the modified 8798 * pages to be written to the mft record/disk. 8799 * 8800 * Return 0 on success and errno on error. An error code of ESPIPE means that 8801 * @ofs + @cnt were outside the end of the attribute and no write was 8802 * performed. 8803 * 8804 * Note: This function does not take care of the initialized size! 8805 * 8806 * Locking: - Caller must hold an iocount reference on the vnode of the ntfs 8807 * inode @ni. 8808 * - Caller must hold @ni->lock for reading or writing. 8809 */ 8810errno_t ntfs_attr_set(ntfs_inode *ni, s64 ofs, const s64 cnt, const u8 val) 8811{ 8812 s64 end, data_size; 8813 ntfs_volume *vol = ni->vol; 8814 upl_t upl; 8815 upl_page_info_array_t pl; 8816 u8 *kaddr; 8817 unsigned start_ofs, end_ofs, size; 8818 errno_t err; 8819 8820 ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%x.", 8821 (unsigned long long)ofs, (unsigned long long)cnt, 8822 (unsigned)val); 8823 if (ofs < 0) 8824 panic("%s(): ofs < 0\n", __FUNCTION__); 8825 if (cnt < 0) 8826 panic("%s(): cnt < 0\n", __FUNCTION__); 8827 if (!cnt) 8828 goto done; 8829 /* 8830 * FIXME: Compressed and encrypted attributes are not supported when 8831 * writing and we should never have gotten here for them. 8832 */ 8833 if (NInoCompressed(ni)) 8834 panic("%s(): Inode is compressed.\n", __FUNCTION__); 8835 if (NInoEncrypted(ni)) 8836 panic("%s(): Inode is encrypted.\n", __FUNCTION__); 8837 /* Work out the starting index and page offset. */ 8838 start_ofs = (unsigned)ofs & PAGE_MASK; 8839 /* Work out the ending index and page offset. */ 8840 end = ofs + cnt; 8841 end_ofs = (unsigned)end & PAGE_MASK; 8842 /* If the end is outside the inode size return ESPIPE. */ 8843 lck_spin_lock(&ni->size_lock); 8844 data_size = ni->data_size; 8845 lck_spin_unlock(&ni->size_lock); 8846 if (end > data_size) { 8847 ntfs_error(vol->mp, "Request exceeds end of attribute."); 8848 return ESPIPE; 8849 } 8850 ofs &= ~PAGE_MASK_64; 8851 end &= ~PAGE_MASK_64; 8852 /* If there is a first partial page, need to do it the slow way. */ 8853 if (start_ofs) { 8854 err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, TRUE); 8855 if (err) { 8856 ntfs_error(vol->mp, "Failed to read first partial " 8857 "page (ofs 0x%llx).", 8858 (unsigned long long)ofs); 8859 return err; 8860 } 8861 /* 8862 * If the last page is the same as the first page, need to 8863 * limit the write to the end offset. 8864 */ 8865 size = PAGE_SIZE; 8866 if (ofs == end) 8867 size = end_ofs; 8868 memset(kaddr + start_ofs, val, size - start_ofs); 8869 ntfs_page_unmap(ni, upl, pl, TRUE); 8870 ofs += PAGE_SIZE; 8871 if (ofs >= (end + end_ofs)) 8872 goto done; 8873 } 8874 /* 8875 * Do the whole pages the fast way. 8876 * 8877 * TODO: It may be possible to optimize this loop by creating a 8878 * sequence of large page lists by hand, mapping them, then running the 8879 * memset, then unmapping them and committing them. This incurs a 8880 * higher cpu time because of the larger mapping required but incurs 8881 * many fewer calls into the ubc thus less locks will need to be taken 8882 * which may well speed things up a lot. It will need to be 8883 * benchmarked to determine which is actually faster so leaving it the 8884 * easier way for now. 8885 */ 8886 for (; ofs < end; ofs += PAGE_SIZE) { 8887 /* Find or create the current page. */ 8888 err = ntfs_page_grab(ni, ofs, &upl, &pl, &kaddr, TRUE); 8889 if (err) { 8890 ntfs_error(vol->mp, "Failed to grab page (ofs " 8891 "0x%llx).", (unsigned long long)ofs); 8892 return err; 8893 } 8894 memset(kaddr, val, PAGE_SIZE); 8895 ntfs_page_unmap(ni, upl, pl, TRUE); 8896 } 8897 /* If there is a last partial page, need to do it the slow way. */ 8898 if (end_ofs) { 8899 err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, TRUE); 8900 if (err) { 8901 ntfs_error(vol->mp, "Failed to read last partial page " 8902 "(ofs 0x%llx).", 8903 (unsigned long long)ofs); 8904 return err; 8905 } 8906 memset(kaddr, val, end_ofs); 8907 ntfs_page_unmap(ni, upl, pl, TRUE); 8908 } 8909done: 8910 ntfs_debug("Done."); 8911 return 0; 8912} 8913 8914/** 8915 * ntfs_resident_attr_read - read from an attribute which is resident 8916 * @ni: resident ntfs inode describing the attribute from which to read 8917 * @ofs: byte offset in attribute at which to start reading 8918 * @cnt: number of bytes to copy into the destination buffer @buf 8919 * @buf: destination buffer into which to copy attribute data 8920 * 8921 * Map the base mft record of the ntfs inode @ni, find the attribute it 8922 * describes, and copy @cnt bytes from byte offset @ofs into the destination 8923 * buffer @buf. If @buf is bigger than the attribute size, zero the remainder. 8924 * 8925 * We do not need to worry about compressed attributes because when they are 8926 * resident the data is not actually compressed and we do not need to worry 8927 * about encrypted attributes because encrypted attributes cannot be resident. 8928 * 8929 * Return 0 on success and errno on error. Note that a return value of EAGAIN 8930 * means that someone converted the attribute to non-resident before we took 8931 * the necessary locks to read from the resident attribute thus we could not 8932 * perform the read. The caller needs to cope with this and perform a 8933 * non-resident read instead. 8934 */ 8935errno_t ntfs_resident_attr_read(ntfs_inode *ni, const s64 ofs, const u32 cnt, 8936 u8 *buf) 8937{ 8938 s64 max_size; 8939 ntfs_inode *base_ni; 8940 MFT_RECORD *m; 8941 ntfs_attr_search_ctx *ctx; 8942 ATTR_RECORD *a; 8943 unsigned attr_len, init_len, bytes; 8944 errno_t err; 8945 8946 base_ni = ni; 8947 if (NInoAttr(ni)) 8948 base_ni = ni->base_ni; 8949 /* Map, pin, and lock the mft record. */ 8950 err = ntfs_mft_record_map(base_ni, &m); 8951 if (err) 8952 goto err; 8953 /* 8954 * If a parallel write made the attribute non-resident, drop the mft 8955 * record and return EAGAIN. 8956 */ 8957 if (NInoNonResident(ni)) { 8958 err = EAGAIN; 8959 goto unm_err; 8960 } 8961 ctx = ntfs_attr_search_ctx_get(base_ni, m); 8962 if (!ctx) { 8963 err = ENOMEM; 8964 goto unm_err; 8965 } 8966 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 8967 ctx); 8968 if (err) { 8969 if (err == ENOENT) 8970 err = EIO; 8971 goto put_err; 8972 } 8973 a = ctx->a; 8974 lck_spin_lock(&ni->size_lock); 8975 /* These can happen when we race with a shrinking truncate. */ 8976 attr_len = le32_to_cpu(a->value_length); 8977 if (attr_len > ni->data_size) 8978 attr_len = ni->data_size; 8979 max_size = ubc_getsize(ni->vn); 8980 if (attr_len > max_size) 8981 attr_len = max_size; 8982 init_len = attr_len; 8983 if (init_len > ni->initialized_size) 8984 init_len = ni->initialized_size; 8985 lck_spin_unlock(&ni->size_lock); 8986 /* 8987 * If we are reading from the initialized attribute part, copy the data 8988 * over into the destination buffer. 8989 */ 8990 bytes = cnt; 8991 if (init_len > ofs) { 8992 u32 available = init_len - ofs; 8993 if (bytes > available) 8994 bytes = available; 8995 memcpy(buf, (u8*)a + le16_to_cpu(a->value_offset) + ofs, bytes); 8996 } 8997 /* Zero the remainder of the destination buffer if any. */ 8998 if (bytes < cnt) 8999 bzero(buf + bytes, cnt - bytes); 9000put_err: 9001 ntfs_attr_search_ctx_put(ctx); 9002unm_err: 9003 ntfs_mft_record_unmap(base_ni); 9004err: 9005 return err; 9006} 9007 9008/** 9009 * ntfs_resident_attr_write - write to an attribute which is resident 9010 * @ni: resident ntfs inode describing the attribute to which to write 9011 * @buf: source buffer from which to copy attribute data 9012 * @cnt: number of bytes to copy into the attribute from the buffer 9013 * @ofs: byte offset in attribute at which to start writing 9014 * 9015 * Map the base mft record of the ntfs inode @ni, find the attribute it 9016 * describes, and copy @cnt bytes from the buffer @buf into the attribute value 9017 * at byte offset @ofs. 9018 * 9019 * We do not need to worry about compressed attributes because when they are 9020 * resident the data is not actually compressed and we do not need to worry 9021 * about encrypted attributes because encrypted attributes cannot be resident. 9022 * 9023 * Return 0 on success and errno on error. Note that a return value of EAGAIN 9024 * means that someone converted the attribute to non-resident before we took 9025 * the necessary locks to write to the resident attribute thus we could not 9026 * perform the write. The caller needs to cope with this and perform a 9027 * non-resident write instead. 9028 */ 9029errno_t ntfs_resident_attr_write(ntfs_inode *ni, u8 *buf, u32 cnt, 9030 const s64 ofs) 9031{ 9032 ntfs_inode *base_ni; 9033 MFT_RECORD *m; 9034 ntfs_attr_search_ctx *ctx; 9035 ATTR_RECORD *a; 9036 errno_t err; 9037 u32 attr_len; 9038 9039 base_ni = ni; 9040 if (NInoAttr(ni)) 9041 base_ni = ni->base_ni; 9042 /* Map, pin, and lock the mft record. */ 9043 err = ntfs_mft_record_map(base_ni, &m); 9044 if (err) 9045 goto err; 9046 /* 9047 * If a parallel write made the attribute non-resident, drop the mft 9048 * record and return EAGAIN. 9049 */ 9050 if (NInoNonResident(ni)) { 9051 err = EAGAIN; 9052 goto unm_err; 9053 } 9054 ctx = ntfs_attr_search_ctx_get(base_ni, m); 9055 if (!ctx) { 9056 err = ENOMEM; 9057 goto unm_err; 9058 } 9059 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0, 9060 ctx); 9061 if (err) { 9062 if (err == ENOENT) 9063 err = EIO; 9064 goto put_err; 9065 } 9066 a = ctx->a; 9067 if (a->non_resident) 9068 panic("%s(): a->non_resident\n", __FUNCTION__); 9069 lck_spin_lock(&ni->size_lock); 9070 /* These can happen when we race with a shrinking truncate. */ 9071 attr_len = le32_to_cpu(a->value_length); 9072 if (ofs > attr_len) { 9073 ntfs_error(ni->vol->mp, "Cannot write past end of resident " 9074 "attribute."); 9075 lck_spin_unlock(&ni->size_lock); 9076 err = EINVAL; 9077 goto put_err; 9078 } 9079 if (ofs + cnt > attr_len) { 9080 ntfs_error(ni->vol->mp, "Truncating resident write."); 9081 cnt = attr_len - ofs; 9082 } 9083 if (ofs + cnt > ni->initialized_size) 9084 ni->initialized_size = ofs + cnt; 9085 lck_spin_unlock(&ni->size_lock); 9086 /* Copy the data over from the destination buffer. */ 9087 memcpy((u8*)a + le16_to_cpu(a->value_offset) + ofs, buf, cnt); 9088 /* Mark the mft record dirty to ensure it gets written out. */ 9089 NInoSetMrecNeedsDirtying(ctx->ni); 9090put_err: 9091 ntfs_attr_search_ctx_put(ctx); 9092unm_err: 9093 ntfs_mft_record_unmap(base_ni); 9094err: 9095 return err; 9096} 9097