1/* 2 * ntfs_lcnalloc.c - NTFS kernel cluster (de)allocation code. 3 * 4 * Copyright (c) 2006-2011 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2011 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/buf.h> 39#include <sys/errno.h> 40#include <sys/stat.h> 41#include <sys/ucred.h> 42#include <sys/vnode.h> 43 44#include <string.h> 45 46#include <libkern/OSMalloc.h> 47 48#include <kern/debug.h> 49#include <kern/locks.h> 50 51#include "ntfs.h" 52#include "ntfs_attr.h" 53#include "ntfs_bitmap.h" 54#include "ntfs_debug.h" 55#include "ntfs_inode.h" 56#include "ntfs_lcnalloc.h" 57#include "ntfs_page.h" 58#include "ntfs_runlist.h" 59#include "ntfs_types.h" 60#include "ntfs_volume.h" 61 62static errno_t ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 63 ntfs_rl_element *rl, const VCN start_vcn, s64 count, 64 s64 *nr_freed); 65 66/** 67 * ntfs_cluster_alloc - allocate clusters on an ntfs volume 68 * @vol: mounted ntfs volume on which to allocate the clusters 69 * @start_vcn: vcn to use for the first allocated cluster 70 * @count: number of clusters to allocate 71 * @start_lcn: starting lcn at which to allocate the clusters (or -1) 72 * @zone: zone from which to allocate the clusters 73 * @is_extension: if true, this is an attribute extension 74 * @runlist: destination runlist to return the allocated clusters in 75 * 76 * Allocate @count clusters preferably starting at cluster @start_lcn or at the 77 * current allocator position if @start_lcn is -1, on the mounted ntfs volume 78 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or 79 * MFT_ZONE for allocation of clusters for the master file table, i.e. the 80 * $MFT/$DATA attribute. 81 * 82 * @start_vcn specifies the vcn of the first allocated cluster. This makes 83 * merging the resulting runlist with the old runlist easier. 84 * 85 * If @is_extension is true, the caller is allocating clusters to extend an 86 * attribute and if it is false, the caller is allocating clusters to fill a 87 * hole in an attribute. Practically the difference is that if @is_extension 88 * is true the returned runlist will be terminated with LCN_ENOENT and if 89 * @is_extension is false the runlist will be terminated with 90 * LCN_RL_NOT_MAPPED. 91 * 92 * On success return 0 and set up @runlist to describe the allocated clusters. 93 * 94 * On error return the error code. 95 * 96 * Notes on the allocation algorithm 97 * ================================= 98 * 99 * There are two data zones. First is the area between the end of the mft zone 100 * and the end of the volume, and second is the area between the start of the 101 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x 102 * volumes, the second data zone does not exist due to the mft zone being 103 * expanded to cover the start of the volume in order to reserve space for the 104 * mft bitmap attribute. 105 * 106 * This is not the prettiest function but the complexity stems from the need of 107 * implementing the mft vs data zoned approach and from the fact that we have 108 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we 109 * need to cope with crossing over boundaries of two buffers. Further, the 110 * fact that the allocator allows for caller supplied hints as to the location 111 * of where allocation should begin and the fact that the allocator keeps track 112 * of where in the data zones the next natural allocation should occur, 113 * contribute to the complexity of the function. But it should all be 114 * worthwhile, because this allocator should: 1) be a full implementation of 115 * the MFT zone approach used by Windows NT, 2) cause reduction in 116 * fragmentation, and 3) be speedy in allocations (the code is not optimized 117 * for speed, but the algorithm is, so further speed improvements are probably 118 * possible). 119 * 120 * FIXME: We should be monitoring cluster allocation and increment the MFT zone 121 * size dynamically but this is something for the future. We will just cause 122 * heavier fragmentation by not doing it and I am not even sure Windows would 123 * grow the MFT zone dynamically, so it might even be correct not to do this. 124 * The overhead in doing dynamic MFT zone expansion would be very large and 125 * unlikely worth the effort. (AIA) 126 * 127 * TODO: I have added in double the required zone position pointer wrap around 128 * logic which can be optimized to having only one of the two logic sets. 129 * However, having the double logic will work fine, but if we have only one of 130 * the sets and we get it wrong somewhere, then we get into trouble, so 131 * removing the duplicate logic requires _very_ careful consideration of _all_ 132 * possible code paths. So at least for now, I am leaving the double logic - 133 * better safe than sorry... (AIA) 134 * 135 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked 136 * on return. 137 * - This function takes the volume lcn bitmap lock for writing and 138 * modifies the bitmap contents. 139 * - The lock of the runlist @runlist is not touched thus the caller 140 * is responsible for locking it for writing if needed. 141 */ 142errno_t ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, 143 const s64 count, const LCN start_lcn, 144 const NTFS_CLUSTER_ALLOCATION_ZONES zone, 145 const BOOL is_extension, ntfs_runlist *runlist) 146{ 147 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 148 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 149 s64 clusters, data_size; 150 ntfs_inode *lcnbmp_ni; 151 ntfs_rl_element *rl = NULL; 152 upl_t upl = NULL; 153 upl_page_info_array_t pl; 154 u8 *b, *byte; 155 int rlpos, rlsize, bsize; 156 errno_t err; 157 u8 pass, done_zones, search_zone, bit; 158 BOOL need_writeback = FALSE; 159 160 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn " 161 "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn, 162 (unsigned long long)count, 163 (unsigned long long)start_lcn, 164 zone == MFT_ZONE ? "MFT" : "DATA"); 165 if (!vol) 166 panic("%s(): !vol\n", __FUNCTION__); 167 lcnbmp_ni = vol->lcnbmp_ni; 168 if (!lcnbmp_ni) 169 panic("%s(): !lcnbmp_ni\n", __FUNCTION__); 170 if (start_vcn < 0) 171 panic("%s(): start_vcn < 0\n", __FUNCTION__); 172 if (count < 0) 173 panic("%s(): count < 0\n", __FUNCTION__); 174 if (start_lcn < -1) 175 panic("%s(): start_lcn < -1\n", __FUNCTION__); 176 if (zone < FIRST_ZONE) 177 panic("%s(): zone < FIRST_ZONE\n", __FUNCTION__); 178 if (zone > LAST_ZONE) 179 panic("%s(): zone > LAST_ZONE\n", __FUNCTION__); 180 /* Return NULL if @count is zero. */ 181 if (!count) { 182 if (runlist->alloc) 183 OSFree(runlist->rl, runlist->alloc, ntfs_malloc_tag); 184 runlist->rl = NULL; 185 runlist->elements = 0; 186 runlist->alloc = 0; 187 return 0; 188 } 189 /* Take the lcnbmp lock for writing. */ 190 lck_rw_lock_exclusive(&vol->lcnbmp_lock); 191 err = vnode_get(lcnbmp_ni->vn); 192 if (err) { 193 ntfs_error(vol->mp, "Failed to get vnode for $Bitmap."); 194 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 195 return err; 196 } 197 lck_rw_lock_shared(&lcnbmp_ni->lock); 198 /* 199 * If no specific @start_lcn was requested, use the current data zone 200 * position, otherwise use the requested @start_lcn but make sure it 201 * lies outside the mft zone. Also set done_zones to 0 (no zones done) 202 * and pass depending on whether we are starting inside a zone (1) or 203 * at the beginning of a zone (2). If requesting from the MFT_ZONE, 204 * we either start at the current position within the mft zone or at 205 * the specified position. If the latter is out of bounds then we start 206 * at the beginning of the MFT_ZONE. 207 */ 208 done_zones = 0; 209 pass = 1; 210 /* 211 * zone_start and zone_end are the current search range. search_zone 212 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of 213 * volume) and 4 for data zone 2 (start of volume till start of mft 214 * zone). 215 */ 216 zone_start = start_lcn; 217 if (zone_start < 0) { 218 if (zone == DATA_ZONE) 219 zone_start = vol->data1_zone_pos; 220 else 221 zone_start = vol->mft_zone_pos; 222 if (!zone_start) { 223 /* 224 * Zone starts at beginning of volume which means a 225 * single pass is sufficient. 226 */ 227 pass = 2; 228 } 229 } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start && 230 zone_start < vol->mft_zone_end) { 231 zone_start = vol->mft_zone_end; 232 /* 233 * Starting at beginning of data1_zone which means a single 234 * pass in this zone is sufficient. 235 */ 236 pass = 2; 237 } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start || 238 zone_start >= vol->mft_zone_end)) { 239 zone_start = vol->mft_lcn; 240 if (!vol->mft_zone_end) 241 zone_start = 0; 242 /* 243 * Starting at beginning of volume which means a single pass 244 * is sufficient. 245 */ 246 pass = 2; 247 } 248 if (zone == MFT_ZONE) { 249 zone_end = vol->mft_zone_end; 250 search_zone = 1; 251 } else /* if (zone == DATA_ZONE) */ { 252 /* Skip searching the mft zone. */ 253 done_zones |= 1; 254 if (zone_start >= vol->mft_zone_end) { 255 zone_end = vol->nr_clusters; 256 search_zone = 2; 257 } else { 258 zone_end = vol->mft_zone_start; 259 search_zone = 4; 260 } 261 } 262 /* 263 * bmp_pos is the current bit position inside the bitmap. We use 264 * bmp_initial_pos to determine whether or not to do a zone switch. 265 */ 266 bmp_pos = bmp_initial_pos = zone_start; 267 /* Loop until all clusters are allocated, i.e. clusters == 0. */ 268 clusters = count; 269 rlpos = rlsize = 0; 270 lck_spin_lock(&lcnbmp_ni->size_lock); 271 data_size = ubc_getsize(lcnbmp_ni->vn); 272 if (data_size != lcnbmp_ni->data_size) 273 panic("%s(): data_size != lcnbmp_ni->data_size\n", 274 __FUNCTION__); 275 lck_spin_unlock(&lcnbmp_ni->size_lock); 276 while (1) { 277 ntfs_debug("Start of outer while loop: done_zones 0x%x, " 278 "search_zone %d, pass %d, zone_start 0x%llx, " 279 "zone_end 0x%llx, bmp_initial_pos 0x%llx, " 280 "bmp_pos 0x%llx, rlpos %d, rlsize %d.", 281 done_zones, search_zone, pass, 282 (unsigned long long)zone_start, 283 (unsigned long long)zone_end, 284 (unsigned long long)bmp_initial_pos, 285 (unsigned long long)bmp_pos, rlpos, rlsize); 286 /* Loop until we run out of free clusters. */ 287 last_read_pos = bmp_pos >> 3; 288 ntfs_debug("last_read_pos 0x%llx.", 289 (unsigned long long)last_read_pos); 290 if (last_read_pos > data_size) { 291 ntfs_debug("End of attribute reached. " 292 "Skipping to zone_pass_done."); 293 goto zone_pass_done; 294 } 295 if (upl) { 296 ntfs_page_unmap(lcnbmp_ni, upl, pl, need_writeback); 297 if (need_writeback) { 298 ntfs_debug("Marking page dirty."); 299 need_writeback = FALSE; 300 } 301 } 302 err = ntfs_page_map(lcnbmp_ni, last_read_pos & ~PAGE_MASK_64, 303 &upl, &pl, &b, TRUE); 304 if (err) { 305 ntfs_error(vol->mp, "Failed to map page."); 306 upl = NULL; 307 goto out; 308 } 309 bsize = last_read_pos & PAGE_MASK; 310 b += bsize; 311 bsize = PAGE_SIZE - bsize; 312 if (last_read_pos + bsize > data_size) 313 bsize = data_size - last_read_pos; 314 bsize <<= 3; 315 lcn = bmp_pos & 7; 316 bmp_pos &= ~(LCN)7; 317 ntfs_debug("Before inner while loop: bsize %d, lcn 0x%llx, " 318 "bmp_pos 0x%llx, need_writeback is %s.", bsize, 319 (unsigned long long)lcn, 320 (unsigned long long)bmp_pos, 321 need_writeback ? "true" : "false"); 322 while (lcn < bsize && lcn + bmp_pos < zone_end) { 323 byte = b + (lcn >> 3); 324 ntfs_debug("In inner while loop: bsize %d, lcn " 325 "0x%llx, bmp_pos 0x%llx, " 326 "need_writeback is %s, byte ofs 0x%x, " 327 "*byte 0x%x.", bsize, 328 (unsigned long long)lcn, 329 (unsigned long long)bmp_pos, 330 need_writeback ? "true" : "false", 331 (unsigned)(lcn >> 3), (unsigned)*byte); 332 /* Skip full bytes. */ 333 if (*byte == 0xff) { 334 lcn = (lcn + 8) & ~(LCN)7; 335 ntfs_debug("Continuing while loop 1."); 336 continue; 337 } 338 bit = 1 << (lcn & 7); 339 ntfs_debug("bit 0x%x.", bit); 340 /* If the bit is already set, go onto the next one. */ 341 if (*byte & bit) { 342 lcn++; 343 ntfs_debug("Continuing while loop 2."); 344 continue; 345 } 346 /* 347 * Allocate more memory if needed, including space for 348 * the terminator element. 349 * ntfs_malloc_nofs() operates on whole pages only. 350 */ 351 if ((rlpos + 2) * (int)sizeof(*rl) > rlsize) { 352 ntfs_rl_element *rl2; 353 354 ntfs_debug("Reallocating memory."); 355 rl2 = OSMalloc(rlsize + NTFS_ALLOC_BLOCK, 356 ntfs_malloc_tag); 357 if (!rl2) { 358 err = ENOMEM; 359 ntfs_error(vol->mp, "Failed to " 360 "allocate memory."); 361 goto out; 362 } 363 if (!rl) 364 ntfs_debug("First free bit is at LCN " 365 "0x%llx.", 366 (unsigned long long) 367 (lcn + bmp_pos)); 368 else { 369 memcpy(rl2, rl, rlsize); 370 OSFree(rl, rlsize, ntfs_malloc_tag); 371 } 372 rl = rl2; 373 rlsize += NTFS_ALLOC_BLOCK; 374 ntfs_debug("Reallocated memory, rlsize 0x%x.", 375 rlsize); 376 } 377 /* Allocate the bitmap bit. */ 378 *byte |= bit; 379 vol->nr_free_clusters--; 380 if (vol->nr_free_clusters < 0) 381 vol->nr_free_clusters = 0; 382 /* We need to write this bitmap page to disk. */ 383 need_writeback = TRUE; 384 ntfs_debug("*byte 0x%x, need_writeback is set.", 385 (unsigned)*byte); 386 /* 387 * Coalesce with previous run if adjacent LCNs. 388 * Otherwise, append a new run. 389 */ 390 ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), " 391 "prev_lcn 0x%llx, lcn 0x%llx, " 392 "bmp_pos 0x%llx, prev_run_len 0x%llx, " 393 "rlpos %d.", 394 (unsigned long long)(lcn + bmp_pos), 395 1ULL, (unsigned long long)prev_lcn, 396 (unsigned long long)lcn, 397 (unsigned long long)bmp_pos, 398 (unsigned long long)prev_run_len, 399 rlpos); 400 if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { 401 ntfs_debug("Coalescing to run (lcn 0x%llx, " 402 "len 0x%llx).", 403 (unsigned long long) 404 rl[rlpos - 1].lcn, 405 (unsigned long long) 406 rl[rlpos - 1].length); 407 rl[rlpos - 1].length = ++prev_run_len; 408 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), " 409 "prev_run_len 0x%llx.", 410 (unsigned long long) 411 rl[rlpos - 1].lcn, 412 (unsigned long long) 413 rl[rlpos - 1].length, 414 (unsigned long long) 415 prev_run_len); 416 } else { 417 if (rlpos) { 418 ntfs_debug("Adding new run, (previous " 419 "run lcn 0x%llx, " 420 "len 0x%llx).", 421 (unsigned long long) 422 rl[rlpos - 1].lcn, 423 (unsigned long long) 424 rl[rlpos - 1].length); 425 rl[rlpos].vcn = rl[rlpos - 1].vcn + 426 prev_run_len; 427 } else { 428 ntfs_debug("Adding new run, is first " 429 "run."); 430 rl[rlpos].vcn = start_vcn; 431 } 432 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; 433 rl[rlpos].length = prev_run_len = 1; 434 rlpos++; 435 } 436 /* Done? */ 437 if (!--clusters) { 438 LCN tc; 439 /* 440 * Update the current zone position. Positions 441 * of already scanned zones have been updated 442 * during the respective zone switches. 443 */ 444 tc = lcn + bmp_pos + 1; 445 ntfs_debug("Done. Updating current zone " 446 "position, tc 0x%llx, " 447 "search_zone %d.", 448 (unsigned long long)tc, 449 search_zone); 450 switch (search_zone) { 451 case 1: 452 ntfs_debug("Before checks, " 453 "vol->mft_zone_pos " 454 "0x%llx.", 455 (unsigned long long) 456 vol->mft_zone_pos); 457 if (tc >= vol->mft_zone_end) { 458 vol->mft_zone_pos = 459 vol->mft_lcn; 460 if (!vol->mft_zone_end) 461 vol->mft_zone_pos = 0; 462 } else if ((bmp_initial_pos >= 463 vol->mft_zone_pos || 464 tc > vol->mft_zone_pos) 465 && tc >= vol->mft_lcn) 466 vol->mft_zone_pos = tc; 467 ntfs_debug("After checks, " 468 "vol->mft_zone_pos " 469 "0x%llx.", 470 (unsigned long long) 471 vol->mft_zone_pos); 472 break; 473 case 2: 474 ntfs_debug("Before checks, " 475 "vol->data1_zone_pos " 476 "0x%llx.", 477 (unsigned long long) 478 vol->data1_zone_pos); 479 if (tc >= vol->nr_clusters) 480 vol->data1_zone_pos = 481 vol->mft_zone_end; 482 else if ((bmp_initial_pos >= 483 vol->data1_zone_pos || 484 tc > vol->data1_zone_pos) 485 && tc >= vol->mft_zone_end) 486 vol->data1_zone_pos = tc; 487 ntfs_debug("After checks, " 488 "vol->data1_zone_pos " 489 "0x%llx.", 490 (unsigned long long) 491 vol->data1_zone_pos); 492 break; 493 case 4: 494 ntfs_debug("Before checks, " 495 "vol->data2_zone_pos " 496 "0x%llx.", 497 (unsigned long long) 498 vol->data2_zone_pos); 499 if (tc >= vol->mft_zone_start) 500 vol->data2_zone_pos = 0; 501 else if (bmp_initial_pos >= 502 vol->data2_zone_pos || 503 tc > vol->data2_zone_pos) 504 vol->data2_zone_pos = tc; 505 ntfs_debug("After checks, " 506 "vol->data2_zone_pos " 507 "0x%llx.", 508 (unsigned long long) 509 vol->data2_zone_pos); 510 break; 511 default: 512 panic("%s(): Reached default case in " 513 "switch!\n", 514 __FUNCTION__); 515 } 516 ntfs_debug("Finished. Going to out."); 517 goto out; 518 } 519 lcn++; 520 } 521 bmp_pos += bsize; 522 ntfs_debug("After inner while loop: bsize 0x%x, lcn 0x%llx, " 523 "bmp_pos 0x%llx, need_writeback is %s.", bsize, 524 (unsigned long long)lcn, 525 (unsigned long long)bmp_pos, 526 need_writeback ? "true" : "false"); 527 if (bmp_pos < zone_end) { 528 ntfs_debug("Continuing outer while loop, " 529 "bmp_pos 0x%llx, zone_end 0x%llx.", 530 (unsigned long long)bmp_pos, 531 (unsigned long long)zone_end); 532 continue; 533 } 534zone_pass_done: /* Finished with the current zone pass. */ 535 ntfs_debug("At zone_pass_done, pass %d.", pass); 536 if (pass == 1) { 537 /* 538 * Now do pass 2, scanning the first part of the zone 539 * we omitted in pass 1. 540 */ 541 pass = 2; 542 zone_end = zone_start; 543 switch (search_zone) { 544 case 1: /* mft_zone */ 545 zone_start = vol->mft_zone_start; 546 break; 547 case 2: /* data1_zone */ 548 zone_start = vol->mft_zone_end; 549 break; 550 case 4: /* data2_zone */ 551 zone_start = 0; 552 break; 553 default: 554 panic("%s(): Reached default case in switch " 555 "(2)!\n", __FUNCTION__); 556 } 557 /* Sanity check. */ 558 if (zone_end < zone_start) 559 zone_end = zone_start; 560 bmp_pos = zone_start; 561 ntfs_debug("Continuing outer while loop, pass 2, " 562 "zone_start 0x%llx, zone_end 0x%llx, " 563 "bmp_pos 0x%llx.", 564 (unsigned long long)zone_start, 565 (unsigned long long)zone_end, 566 (unsigned long long)bmp_pos); 567 continue; 568 } /* pass == 2 */ 569done_zones_check: 570 ntfs_debug("At done_zones_check, search_zone %d, done_zones " 571 "before 0x%x, done_zones after 0x%x.", 572 search_zone, done_zones, 573 done_zones | search_zone); 574 done_zones |= search_zone; 575 if (done_zones < 7) { 576 ntfs_debug("Switching zone."); 577 /* Now switch to the next zone we haven't done yet. */ 578 pass = 1; 579 switch (search_zone) { 580 case 1: 581 ntfs_debug("Switching from mft zone to data1 " 582 "zone."); 583 /* Update mft zone position. */ 584 if (rlpos) { 585 LCN tc; 586 587 ntfs_debug("Before checks, " 588 "vol->mft_zone_pos " 589 "0x%llx.", 590 (unsigned long long) 591 vol->mft_zone_pos); 592 tc = rl[rlpos - 1].lcn + 593 rl[rlpos - 1].length; 594 if (tc >= vol->mft_zone_end) { 595 vol->mft_zone_pos = 596 vol->mft_lcn; 597 if (!vol->mft_zone_end) 598 vol->mft_zone_pos = 0; 599 } else if ((bmp_initial_pos >= 600 vol->mft_zone_pos || 601 tc > vol->mft_zone_pos) 602 && tc >= vol->mft_lcn) 603 vol->mft_zone_pos = tc; 604 ntfs_debug("After checks, " 605 "vol->mft_zone_pos " 606 "0x%llx.", 607 (unsigned long long) 608 vol->mft_zone_pos); 609 } 610 /* Switch from mft zone to data1 zone. */ 611switch_to_data1_zone: search_zone = 2; 612 zone_start = bmp_initial_pos = 613 vol->data1_zone_pos; 614 zone_end = vol->nr_clusters; 615 if (zone_start == vol->mft_zone_end) 616 pass = 2; 617 if (zone_start >= zone_end) { 618 vol->data1_zone_pos = zone_start = 619 vol->mft_zone_end; 620 pass = 2; 621 } 622 break; 623 case 2: 624 ntfs_debug("Switching from data1 zone to " 625 "data2 zone."); 626 /* Update data1 zone position. */ 627 if (rlpos) { 628 LCN tc; 629 630 ntfs_debug("Before checks, " 631 "vol->data1_zone_pos " 632 "0x%llx.", 633 (unsigned long long) 634 vol->data1_zone_pos); 635 tc = rl[rlpos - 1].lcn + 636 rl[rlpos - 1].length; 637 if (tc >= vol->nr_clusters) 638 vol->data1_zone_pos = 639 vol->mft_zone_end; 640 else if ((bmp_initial_pos >= 641 vol->data1_zone_pos || 642 tc > vol->data1_zone_pos) 643 && tc >= vol->mft_zone_end) 644 vol->data1_zone_pos = tc; 645 ntfs_debug("After checks, " 646 "vol->data1_zone_pos " 647 "0x%llx.", 648 (unsigned long long) 649 vol->data1_zone_pos); 650 } 651 /* Switch from data1 zone to data2 zone. */ 652 search_zone = 4; 653 zone_start = bmp_initial_pos = 654 vol->data2_zone_pos; 655 zone_end = vol->mft_zone_start; 656 if (!zone_start) 657 pass = 2; 658 if (zone_start >= zone_end) { 659 vol->data2_zone_pos = zone_start = 660 bmp_initial_pos = 0; 661 pass = 2; 662 } 663 break; 664 case 4: 665 ntfs_debug("Switching from data2 zone to " 666 "data1 zone."); 667 /* Update data2 zone position. */ 668 if (rlpos) { 669 LCN tc; 670 671 ntfs_debug("Before checks, " 672 "vol->data2_zone_pos " 673 "0x%llx.", 674 (unsigned long long) 675 vol->data2_zone_pos); 676 tc = rl[rlpos - 1].lcn + 677 rl[rlpos - 1].length; 678 if (tc >= vol->mft_zone_start) 679 vol->data2_zone_pos = 0; 680 else if (bmp_initial_pos >= 681 vol->data2_zone_pos || 682 tc > vol->data2_zone_pos) 683 vol->data2_zone_pos = tc; 684 ntfs_debug("After checks, " 685 "vol->data2_zone_pos " 686 "0x%llx.", 687 (unsigned long long) 688 vol->data2_zone_pos); 689 } 690 /* Switch from data2 zone to data1 zone. */ 691 goto switch_to_data1_zone; 692 default: 693 panic("%s(): Reached default case in switch " 694 "(3)!\n", __FUNCTION__); 695 } 696 ntfs_debug("After zone switch, search_zone %d, " 697 "pass %d, bmp_initial_pos 0x%llx, " 698 "zone_start 0x%llx, zone_end 0x%llx.", 699 search_zone, pass, 700 (unsigned long long)bmp_initial_pos, 701 (unsigned long long)zone_start, 702 (unsigned long long)zone_end); 703 bmp_pos = zone_start; 704 if (zone_start == zone_end) { 705 ntfs_debug("Empty zone, going to " 706 "done_zones_check."); 707 /* Empty zone. Don't bother searching it. */ 708 goto done_zones_check; 709 } 710 ntfs_debug("Continuing outer while loop."); 711 continue; 712 } /* done_zones == 7 */ 713 ntfs_debug("All zones are finished."); 714 /* 715 * All zones are finished! If DATA_ZONE, shrink mft zone. If 716 * MFT_ZONE, we have really run out of space. 717 */ 718 mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; 719 ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end " 720 "0x%llx, mft_zone_size 0x%llx.", 721 (unsigned long long)vol->mft_zone_start, 722 (unsigned long long)vol->mft_zone_end, 723 (unsigned long long)mft_zone_size); 724 if (zone == MFT_ZONE || mft_zone_size <= 0) { 725 ntfs_debug("No free clusters left, going to out."); 726 /* Really no more space left on device. */ 727 err = ENOSPC; 728 goto out; 729 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 730 ntfs_debug("Shrinking mft zone."); 731 zone_end = vol->mft_zone_end; 732 mft_zone_size >>= 1; 733 if (mft_zone_size > 0) 734 vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; 735 else /* mft zone and data2 zone no longer exist. */ 736 vol->data2_zone_pos = vol->mft_zone_start = 737 vol->mft_zone_end = 0; 738 if (vol->mft_zone_pos >= vol->mft_zone_end) { 739 vol->mft_zone_pos = vol->mft_lcn; 740 if (!vol->mft_zone_end) 741 vol->mft_zone_pos = 0; 742 } 743 bmp_pos = zone_start = bmp_initial_pos = 744 vol->data1_zone_pos = vol->mft_zone_end; 745 search_zone = 2; 746 pass = 2; 747 done_zones &= ~2; 748 ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, " 749 "vol->mft_zone_start 0x%llx, " 750 "vol->mft_zone_end 0x%llx, " 751 "vol->mft_zone_pos 0x%llx, search_zone 2, " 752 "pass 2, dones_zones 0x%x, zone_start 0x%llx, " 753 "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, " 754 "continuing outer while loop.", 755 (unsigned long long)mft_zone_size, 756 (unsigned long long)vol->mft_zone_start, 757 (unsigned long long)vol->mft_zone_end, 758 (unsigned long long)vol->mft_zone_pos, 759 done_zones, (unsigned long long)zone_start, 760 (unsigned long long)zone_end, 761 (unsigned long long)vol->data1_zone_pos); 762 } 763 ntfs_debug("After outer while loop."); 764out: 765 ntfs_debug("At out."); 766 /* Add runlist terminator element. */ 767 if (rl) { 768 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; 769 rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED; 770 rl[rlpos].length = 0; 771 } 772 if (upl) { 773 ntfs_page_unmap(lcnbmp_ni, upl, pl, need_writeback); 774 if (need_writeback) { 775 ntfs_debug("Marking page dirty."); 776 need_writeback = FALSE; 777 } 778 } 779 if (!err) { 780 /* 781 * We allocated new clusters thus we need to unmap any 782 * underlying i/os that may be inprogress so our new data 783 * cannot get trampled by old data from the previous owner of 784 * the clusters. 785 */ 786 if (rl) { 787 ntfs_rl_element *trl; 788 vnode_t dev_vn = vol->dev_vn; 789 const u8 cluster_to_block_shift = 790 vol->cluster_size_shift - 791 vol->sector_size_shift; 792 793 /* Iterate over the runs in the allocated runlist. */ 794 for (trl = rl; trl->length; trl++) { 795 daddr64_t block, end_block; 796 797 if (trl->lcn < 0) 798 continue; 799 /* Determine the starting block of this run. */ 800 block = trl->lcn << cluster_to_block_shift; 801 /* Determine the last block of this run. */ 802 end_block = block + (trl->length << 803 cluster_to_block_shift); 804 /* 805 * For each block in this run, invoke 806 * buf_invalblkno() to ensure no i/o against 807 * the block device can happen once we have 808 * allocated the buffers for other purposes. 809 * 810 * FIXME:/TODO: buf_invalblkno() currently 811 * aborts if msleep() returns an error so we 812 * keep calling it until it returns success. 813 */ 814 for (; block < end_block; block++) { 815 do { 816 err = buf_invalblkno(dev_vn, 817 block, 818 BUF_WAIT); 819 } while (err); 820 } 821 } 822 } 823 lck_rw_unlock_shared(&lcnbmp_ni->lock); 824 (void)vnode_put(lcnbmp_ni->vn); 825 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 826 if (runlist->alloc) 827 OSFree(runlist->rl, runlist->alloc, ntfs_malloc_tag); 828 runlist->rl = rl; 829 runlist->elements = rlpos + 1; 830 runlist->alloc = rlsize; 831 ntfs_debug("Done."); 832 return 0; 833 } 834 ntfs_error(vol->mp, "Failed to allocate clusters, aborting (error " 835 "%d).", err); 836 if (rl) { 837 errno_t err2; 838 839 if (err == ENOSPC) 840 ntfs_debug("Not enough space to complete allocation, " 841 "err ENOSPC, first free lcn 0x%llx, " 842 "could allocate up to 0x%llx " 843 "clusters.", 844 (unsigned long long)rl[0].lcn, 845 (unsigned long long)(count - clusters)); 846 /* Deallocate all allocated clusters. */ 847 ntfs_debug("Attempting rollback..."); 848 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl, 0, -1, NULL); 849 if (err2) { 850 ntfs_error(vol->mp, "Failed to rollback (error %d). " 851 "Leaving inconsistent metadata! " 852 "Unmount and run chkdsk.", err2); 853 NVolSetErrors(vol); 854 } 855 /* Free the runlist. */ 856 OSFree(rl, rlsize, ntfs_malloc_tag); 857 } else if (err == ENOSPC) 858 ntfs_debug("No space left at all, err ENOSPC, first free lcn " 859 "0x%llx.", (long long)vol->data1_zone_pos); 860 lck_rw_unlock_shared(&lcnbmp_ni->lock); 861 (void)vnode_put(lcnbmp_ni->vn); 862 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 863 return err; 864} 865 866/** 867 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist 868 * @vol: mounted ntfs volume on which to free the clusters 869 * @rl: runlist describing the clusters to free 870 * @start_vcn: vcn in the runlist @rl at which to start freeing clusters 871 * @count: number of clusters to free or -1 for all clusters 872 * @nr_freed: if not NULL return the number of real clusters freed 873 * 874 * Free @count clusters starting at the cluster @start_vcn in the runlist @rl 875 * on the volume @vol. If @nr_freed is not NULL, *@nr_freed is set to the 876 * number of real clusters (i.e. not counting sparse clusters) that have been 877 * freed. 878 * 879 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 880 * deallocated. Thus, to completely free all clusters in a runlist, use 881 * @start_vcn = 0 and @count = -1. 882 * 883 * Note, ntfs_cluster_free_from_rl_nolock() does not modify the runlist, so you 884 * have to remove from the runlist or mark sparse the freed runs later. 885 * 886 * Return 0 on success and errno on error. Note that if at least some clusters 887 * were freed success is always returned even though not all runs have been 888 * freed yet. This does not matter as it just means that some clusters are 889 * lost until chkdsk is next run and as we schedule chkdsk to run at next boot 890 * this should happen soon. We do emit a warning about this happenning 891 * however. 892 * 893 * Locking: - The volume lcn bitmap must be locked for writing on entry and is 894 * left locked on return. 895 * - The caller must have locked the runlist @rl for reading or 896 * writing. 897 * - The caller must have taken an iocount reference on the lcnbmp 898 * vnode. 899 */ 900static errno_t ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 901 ntfs_rl_element *rl, const VCN start_vcn, s64 count, 902 s64 *nr_freed) 903{ 904 s64 delta, to_free, real_freed; 905 ntfs_inode *lcnbmp_ni = vol->lcnbmp_ni; 906 errno_t err; 907 908 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx.", 909 (unsigned long long)start_vcn, 910 (unsigned long long)count); 911 if (!lcnbmp_ni) 912 panic("%s(): !lcnbmp_ni\n", __FUNCTION__); 913 if (start_vcn < 0) 914 panic("%s(): start_vcn < 0\n", __FUNCTION__); 915 if (count < -1) 916 panic("%s(): count < -1\n", __FUNCTION__); 917 real_freed = 0; 918 if (nr_freed) 919 *nr_freed = 0; 920 rl = ntfs_rl_find_vcn_nolock(rl, start_vcn); 921 if (!rl) 922 return 0; 923 if (rl->lcn < LCN_HOLE) { 924 ntfs_error(vol->mp, "First runlist element has invalid lcn, " 925 "aborting."); 926 return EIO; 927 } 928 /* Find the starting cluster inside the run that needs freeing. */ 929 delta = start_vcn - rl->vcn; 930 /* The number of clusters in this run that need freeing. */ 931 to_free = rl->length - delta; 932 if (count >= 0 && to_free > count) 933 to_free = count; 934 if (rl->lcn >= 0) { 935 /* Do the actual freeing of the clusters in this run. */ 936 err = ntfs_bitmap_clear_run(lcnbmp_ni, rl->lcn + delta, 937 to_free); 938 if (err) { 939 ntfs_error(vol->mp, "Failed to clear first run " 940 "(error %d), aborting.", err); 941 return err; 942 } 943 /* We have freed @to_free real clusters. */ 944 real_freed = to_free; 945 vol->nr_free_clusters += to_free; 946 if (vol->nr_free_clusters > vol->nr_clusters) 947 vol->nr_free_clusters = vol->nr_clusters; 948 } 949 /* Go to the next run and adjust the number of clusters left to free. */ 950 ++rl; 951 if (count >= 0) 952 count -= to_free; 953 /* 954 * Loop over the remaining runs, using @count as a capping value, and 955 * free them. 956 */ 957 for (; rl->length && count; ++rl) { 958 if (rl->lcn < LCN_HOLE) { 959 ntfs_error(vol->mp, "Runlist element has invalid lcn " 960 "%lld at vcn 0x%llx, not freeing. " 961 "Run chkdsk to recover the lost " 962 "space.", (long long)rl->lcn, 963 (unsigned long long)rl->vcn); 964 NVolSetErrors(vol); 965 } 966 /* The number of clusters in this run that need freeing. */ 967 to_free = rl->length; 968 if (count >= 0 && to_free > count) 969 to_free = count; 970 if (rl->lcn >= 0) { 971 /* Do the actual freeing of the clusters in the run. */ 972 err = ntfs_bitmap_clear_run(lcnbmp_ni, rl->lcn, 973 to_free); 974 if (err) { 975 ntfs_warning(vol->mp, "Failed to free " 976 "clusters in subsequent run. " 977 "Run chkdsk to recover the " 978 "lost space."); 979 NVolSetErrors(vol); 980 } else { 981 vol->nr_free_clusters += to_free; 982 if (vol->nr_free_clusters > vol->nr_clusters) 983 vol->nr_free_clusters = 984 vol->nr_clusters; 985 } 986 /* We have freed @to_free real clusters. */ 987 real_freed += to_free; 988 } 989 /* Adjust the number of clusters left to free. */ 990 if (count >= 0) 991 count -= to_free; 992 } 993 if (count > 0) 994 panic("%s(): count > 0\n", __FUNCTION__); 995 /* We are done. Return the number of actually freed clusters. */ 996 ntfs_debug("Done."); 997 if (nr_freed) 998 *nr_freed = real_freed; 999 return 0; 1000} 1001 1002/** 1003 * ntfs_cluster_free_from_rl - free clusters from runlist 1004 * @vol: mounted ntfs volume on which to free the clusters 1005 * @rl: runlist describing the clusters to free 1006 * @start_vcn: vcn in the runlist @rl at which to start freeing clusters 1007 * @count: number of clusters to free or -1 for all clusters 1008 * @nr_freed: if not NULL return the number of real clusters freed 1009 * 1010 * Free @count clusters starting at the cluster @start_vcn in the runlist @rl 1011 * on the volume @vol. If @nr_freed is not NULL, *@nr_freed is set to the 1012 * number of real clusters (i.e. not counting sparse clusters) that have been 1013 * freed. 1014 * 1015 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 1016 * deallocated. Thus, to completely free all clusters in a runlist, use 1017 * @start_vcn = 0 and @count = -1. 1018 * 1019 * Note, ntfs_cluster_free_from_rl_nolock() does not modify the runlist, so you 1020 * have to remove from the runlist or mark sparse the freed runs later. 1021 * 1022 * Return 0 on success and errno on error. Note that if at least some clusters 1023 * were freed success is always returned even though not all runs have been 1024 * freed yet. This does not matter as it just means that some clusters are 1025 * lost until chkdsk is next run and as we schedule chkdsk to run at next boot 1026 * this should happen soon. We do emit a warning about this happenning 1027 * however. 1028 * 1029 * Locking: - This function takes the volume lcn bitmap lock for writing and 1030 * modifies the bitmap contents. 1031 * - The caller must have locked the runlist @rl for reading or 1032 * writing. 1033 */ 1034errno_t ntfs_cluster_free_from_rl(ntfs_volume *vol, ntfs_rl_element *rl, 1035 const VCN start_vcn, s64 count, s64 *nr_freed) 1036{ 1037 ntfs_inode *lcnbmp_ni; 1038 vnode_t lcnbmp_vn; 1039 errno_t err; 1040 1041 lcnbmp_ni = vol->lcnbmp_ni; 1042 lcnbmp_vn = lcnbmp_ni->vn; 1043 lck_rw_lock_exclusive(&vol->lcnbmp_lock); 1044 err = vnode_get(lcnbmp_vn); 1045 if (!err) { 1046 lck_rw_lock_shared(&lcnbmp_ni->lock); 1047 err = ntfs_cluster_free_from_rl_nolock(vol, rl, start_vcn, 1048 count, nr_freed); 1049 lck_rw_unlock_shared(&lcnbmp_ni->lock); 1050 (void)vnode_put(lcnbmp_vn); 1051 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 1052 return err; 1053 } 1054 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 1055 ntfs_error(vol->mp, "Failed to get vnode for $Bitmap."); 1056 return err; 1057} 1058 1059/** 1060 * ntfs_cluster_free_nolock - free clusters on an ntfs volume 1061 * @ni: ntfs inode whose runlist describes the clusters to free 1062 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters 1063 * @count: number of clusters to free or -1 for all clusters 1064 * @ctx: active attribute search context if present or NULL if not 1065 * @nr_freed: if not NULL return the number of real clusters freed 1066 * @is_rollback: true if this is a rollback operation 1067 * 1068 * Free @count clusters starting at the cluster @start_vcn in the runlist 1069 * described by the ntfs inode @ni. If @nr_freed is not NULL, *@nr_freed is 1070 * set to the number of real clusters (i.e. not counting sparse clusters) that 1071 * have been freed. 1072 * 1073 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 1074 * deallocated. Thus, to completely free all clusters in a runlist, use 1075 * @start_vcn = 0 and @count = -1. 1076 * 1077 * If @ctx is specified, it is an active search context of @ni and its base mft 1078 * record. This is needed when ntfs_cluster_free_nolock() encounters unmapped 1079 * runlist fragments and allows their mapping. If you do not have the mft 1080 * record mapped, you can specify @ctx as NULL and ntfs_cluster_free_nolock() 1081 * will perform the necessary mapping and unmapping. 1082 * 1083 * Note, ntfs_cluster_free_unlock() saves the state of @ctx on entry and 1084 * restores it before returning. Thus, @ctx will be left pointing to the same 1085 * attribute on return as on entry. However, the actual pointers in @ctx may 1086 * point to different memory locations on return, so you must remember to reset 1087 * any cached pointers from the @ctx, i.e. after the call to 1088 * ntfs_cluster_free_nolock(), you will probably want to do: 1089 * m = ctx->m; 1090 * a = ctx->a; 1091 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that 1092 * you cache ctx->m in a variable @m of type MFT_RECORD *. 1093 * 1094 * @is_rollback should always be false, it is for internal use to rollback 1095 * errors. You probably want to use ntfs_cluster_free() instead. 1096 * 1097 * Note, ntfs_cluster_free_nolock() does not modify the runlist, so you have to 1098 * remove from the runlist or mark sparse the freed runs later. 1099 * 1100 * Return 0 on success and errno on error. 1101 * 1102 * WARNING: If @ctx is supplied, regardless of whether success or failure is 1103 * returned, you need to check @ctx->is_error and if 1 the @ctx is no 1104 * longer valid, i.e. you need to either call 1105 * ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it. 1106 * In that case @ctx->error will give you the error code for why the 1107 * mapping of the old inode failed. 1108 * Also if @ctx is supplied and the current attribute (or the mft 1109 * record it is in) has been modified then the caller must call 1110 * NInoSetMrecNeedsDirtying(ctx->ni); before calling 1111 * ntfs_map_runlist_nolock() or the changes may be lost. 1112 * 1113 * Locking: - The volume lcn bitmap must be locked for writing on entry and is 1114 * left locked on return. 1115 * - The runlist described by @ni must be locked for writing on entry 1116 * and is locked on return. Note the runlist may be modified when 1117 * needed runlist fragments need to be mapped. 1118 * - If @ctx is NULL, the base mft record of @ni must not be mapped on 1119 * entry and it will be left unmapped on return. 1120 * - If @ctx is not NULL, the base mft record must be mapped on entry 1121 * and it will be left mapped on return. 1122 * - The caller must have taken an iocount reference on the lcnbmp 1123 * vnode. 1124 */ 1125static errno_t ntfs_cluster_free_nolock(ntfs_inode *ni, const VCN start_vcn, 1126 s64 count, ntfs_attr_search_ctx *ctx, s64 *nr_freed, 1127 const BOOL is_rollback) 1128{ 1129 s64 delta, to_free, total_freed, real_freed; 1130 ntfs_volume *vol; 1131 ntfs_inode *lcnbmp_ni; 1132 ntfs_rl_element *rl; 1133 errno_t err, err2; 1134 1135 vol = ni->vol; 1136 lcnbmp_ni = vol->lcnbmp_ni; 1137 if (!lcnbmp_ni) 1138 panic("%s(): !lcnbmp_ni\n", __FUNCTION__); 1139 if (start_vcn < 0) 1140 panic("%s(): start_vcn < 0\n", __FUNCTION__); 1141 if (count < -1) 1142 panic("%s(): count < -1\n", __FUNCTION__); 1143 total_freed = real_freed = 0; 1144 if (nr_freed) 1145 *nr_freed = 0; 1146 err = ntfs_attr_find_vcn_nolock(ni, start_vcn, &rl, ctx); 1147 if (err) { 1148 if (!is_rollback) 1149 ntfs_error(vol->mp, "Failed to find first runlist " 1150 "element (error %d), aborting.", err); 1151 goto err; 1152 } 1153 if (rl->lcn < LCN_HOLE) { 1154 if (!is_rollback) 1155 ntfs_error(vol->mp, "First runlist element has " 1156 "invalid lcn, aborting."); 1157 err = EIO; 1158 goto err; 1159 } 1160 /* Find the starting cluster inside the run that needs freeing. */ 1161 delta = start_vcn - rl->vcn; 1162 /* The number of clusters in this run that need freeing. */ 1163 to_free = rl->length - delta; 1164 if (count >= 0 && to_free > count) 1165 to_free = count; 1166 if (rl->lcn >= 0) { 1167 /* Do the actual freeing of the clusters in this run. */ 1168 err = ntfs_bitmap_set_bits_in_run(lcnbmp_ni, rl->lcn + delta, 1169 to_free, !is_rollback ? 0 : 1); 1170 if (err) { 1171 if (!is_rollback) 1172 ntfs_error(vol->mp, "Failed to clear first run " 1173 "(error %d), aborting.", err); 1174 goto err; 1175 } 1176 /* We have freed @to_free real clusters. */ 1177 real_freed = to_free; 1178 if (is_rollback) { 1179 vol->nr_free_clusters -= to_free; 1180 if (vol->nr_free_clusters < 0) 1181 vol->nr_free_clusters = 0; 1182 } else { 1183 vol->nr_free_clusters += to_free; 1184 if (vol->nr_free_clusters > vol->nr_clusters) 1185 vol->nr_free_clusters = vol->nr_clusters; 1186 } 1187 } 1188 /* Go to the next run and adjust the number of clusters left to free. */ 1189 ++rl; 1190 if (count >= 0) 1191 count -= to_free; 1192 /* Keep track of the total "freed" clusters, including sparse ones. */ 1193 total_freed = to_free; 1194 /* 1195 * Loop over the remaining runs, using @count as a capping value, and 1196 * free them. 1197 */ 1198 for (; count; ++rl) { 1199 if (rl->lcn < LCN_HOLE) { 1200 VCN vcn; 1201 1202 /* 1203 * If we have reached the end of the runlist we are 1204 * done. We need this when @count is -1 so that we 1205 * detect the end of the runlist. 1206 */ 1207 if (rl->lcn == LCN_ENOENT) 1208 break; 1209 /* Attempt to map runlist. */ 1210 vcn = rl->vcn; 1211 err = ntfs_attr_find_vcn_nolock(ni, vcn, &rl, ctx); 1212 if (err) { 1213 /* 1214 * If we have reached the end of the runlist we 1215 * are done. We need this when @count is -1 so 1216 * that we detect the end of the runlist. 1217 */ 1218 if (err == ENOENT) 1219 break; 1220 if (!is_rollback) 1221 ntfs_error(vol->mp, "Failed to map " 1222 "runlist fragment or " 1223 "failed to find " 1224 "subsequent runlist " 1225 "element."); 1226 goto err; 1227 } 1228 if (rl->lcn < LCN_HOLE) { 1229 if (!is_rollback) 1230 ntfs_error(vol->mp, "Runlist element " 1231 "has invalid lcn " 1232 "(0x%llx).", 1233 (unsigned long long) 1234 rl->lcn); 1235 err = EIO; 1236 goto err; 1237 } 1238 } 1239 /* The number of clusters in this run that need freeing. */ 1240 to_free = rl->length; 1241 if (count >= 0 && to_free > count) 1242 to_free = count; 1243 if (rl->lcn >= 0) { 1244 /* Do the actual freeing of the clusters in the run. */ 1245 err = ntfs_bitmap_set_bits_in_run(lcnbmp_ni, rl->lcn, 1246 to_free, !is_rollback ? 0 : 1); 1247 if (err) { 1248 if (!is_rollback) 1249 ntfs_error(vol->mp, "Failed to clear " 1250 "subsequent run."); 1251 goto err; 1252 } 1253 /* We have freed @to_free real clusters. */ 1254 real_freed += to_free; 1255 if (is_rollback) { 1256 vol->nr_free_clusters -= to_free; 1257 if (vol->nr_free_clusters < 0) 1258 vol->nr_free_clusters = 0; 1259 } else { 1260 vol->nr_free_clusters += to_free; 1261 if (vol->nr_free_clusters > vol->nr_clusters) 1262 vol->nr_free_clusters = 1263 vol->nr_clusters; 1264 } 1265 } 1266 /* Adjust the number of clusters left to free. */ 1267 if (count >= 0) 1268 count -= to_free; 1269 /* Update the total done clusters. */ 1270 total_freed += to_free; 1271 } 1272 if (count > 0) 1273 panic("%s(): count > 0\n", __FUNCTION__); 1274 /* We are done. Return the number of actually freed clusters. */ 1275 ntfs_debug("Done."); 1276 if (nr_freed) 1277 *nr_freed = real_freed; 1278 return 0; 1279err: 1280 if (is_rollback) 1281 return err; 1282 /* If no real clusters were freed, no need to rollback. */ 1283 if (!real_freed) 1284 return err; 1285 /* 1286 * Attempt to rollback and if that succeeds just return the error code. 1287 * If rollback fails, set the volume errors flag, emit an error 1288 * message, and return the error code. 1289 */ 1290 err2 = ntfs_cluster_free_nolock(ni, start_vcn, total_freed, ctx, NULL, 1291 TRUE); 1292 if (err2) { 1293 ntfs_error(vol->mp, "Failed to rollback (error %d). Leaving " 1294 "inconsistent metadata! Unmount and run " 1295 "chkdsk.", err2); 1296 NVolSetErrors(vol); 1297 } 1298 ntfs_error(vol->mp, "Aborting (error %d).", err); 1299 return err; 1300} 1301 1302/** 1303 * ntfs_cluster_free - free clusters on an ntfs volume 1304 * @ni: ntfs inode whose runlist describes the clusters to free 1305 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters 1306 * @count: number of clusters to free or -1 for all clusters 1307 * @ctx: active attribute search context if present or NULL if not 1308 * @nr_freed: if not NULL return the number of real clusters freed 1309 * 1310 * Free @count clusters starting at the cluster @start_vcn in the runlist 1311 * described by the ntfs inode @ni. If @nr_freed is not NULL, *@nr_freed is 1312 * set to the number of real clusters (i.e. not counting sparse clusters) that 1313 * have been freed. 1314 * 1315 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 1316 * deallocated. Thus, to completely free all clusters in a runlist, use 1317 * @start_vcn = 0 and @count = -1. 1318 * 1319 * If @ctx is specified, it is an active search context of @ni and its base mft 1320 * record. This is needed when ntfs_cluster_free() encounters unmapped runlist 1321 * fragments and allows their mapping. If you do not have the mft record 1322 * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform 1323 * the necessary mapping and unmapping. 1324 * 1325 * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it 1326 * before returning. Thus, @ctx will be left pointing to the same attribute on 1327 * return as on entry. However, the actual pointers in @ctx may point to 1328 * different memory locations on return, so you must remember to reset any 1329 * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(), 1330 * you will probably want to do: 1331 * m = ctx->m; 1332 * a = ctx->a; 1333 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that 1334 * you cache ctx->m in a variable @m of type MFT_RECORD *. 1335 * 1336 * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove 1337 * from the runlist or mark sparse the freed runs later. 1338 * 1339 * Return 0 on success and errno on error. 1340 * 1341 * WARNING: If @ctx is supplied, regardless of whether success or failure is 1342 * returned, you need to check @ctx->is_error and if 1 the @ctx is no 1343 * longer valid, i.e. you need to either call 1344 * ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it. 1345 * In that case @ctx->error will give you the error code for why the 1346 * mapping of the old inode failed. 1347 * Also if @ctx is supplied and the current attribute (or the mft 1348 * record it is in) has been modified then the caller must call 1349 * NInoSetMrecNeedsDirtying(ctx->ni); before calling 1350 * ntfs_map_runlist_nolock() or the changes may be lost. 1351 * 1352 * Locking: - The runlist described by @ni must be locked for writing on entry 1353 * and is locked on return. Note the runlist may be modified when 1354 * needed runlist fragments need to be mapped. 1355 * - The volume lcn bitmap must be unlocked on entry and is unlocked 1356 * on return. 1357 * - This function takes the volume lcn bitmap lock for writing and 1358 * modifies the bitmap contents. 1359 * - If @ctx is NULL, the base mft record of @ni must not be mapped on 1360 * entry and it will be left unmapped on return. 1361 * - If @ctx is not NULL, the base mft record must be mapped on entry 1362 * and it will be left mapped on return. 1363 */ 1364errno_t ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, 1365 ntfs_attr_search_ctx *ctx, s64 *nr_freed) 1366{ 1367 ntfs_volume *vol; 1368 ntfs_inode *lcnbmp_ni; 1369 vnode_t lcnbmp_vn; 1370 errno_t err; 1371 1372 if (!ni) 1373 panic("%s(): !ni\n", __FUNCTION__); 1374 ntfs_debug("Entering for mft_no 0x%llx, start_vcn 0x%llx, count " 1375 "0x%llx.", (unsigned long long)ni->mft_no, 1376 (unsigned long long)start_vcn, 1377 (unsigned long long)count); 1378 vol = ni->vol; 1379 lcnbmp_ni = vol->lcnbmp_ni; 1380 lcnbmp_vn = lcnbmp_ni->vn; 1381 lck_rw_lock_exclusive(&vol->lcnbmp_lock); 1382 err = vnode_get(lcnbmp_vn); 1383 if (!err) { 1384 lck_rw_lock_shared(&lcnbmp_ni->lock); 1385 err = ntfs_cluster_free_nolock(ni, start_vcn, count, ctx, 1386 nr_freed, FALSE); 1387 lck_rw_unlock_shared(&lcnbmp_ni->lock); 1388 (void)vnode_put(lcnbmp_vn); 1389 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 1390 return err; 1391 } 1392 lck_rw_unlock_exclusive(&vol->lcnbmp_lock); 1393 ntfs_error(vol->mp, "Failed to get vnode for $Bitmap."); 1394 return err; 1395} 1396