1/* 2 * ntfs_compress.c - NTFS kernel compressed attribute operations. 3 * 4 * Copyright (c) 2006-2008 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2008 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/errno.h> 39#include <sys/ucred.h> 40#include <sys/ubc.h> 41#include <sys/uio.h> 42#include <sys/types.h> 43 44#include <mach/memory_object_types.h> 45 46#include <string.h> 47 48#include <libkern/OSMalloc.h> 49 50#include <kern/debug.h> 51#include <kern/locks.h> 52 53#include "ntfs.h" 54#include "ntfs_attr.h" 55#include "ntfs_compress.h" 56#include "ntfs_debug.h" 57#include "ntfs_inode.h" 58#include "ntfs_runlist.h" 59#include "ntfs_types.h" 60#include "ntfs_volume.h" 61 62/** 63 * Compression related constants. 64 */ 65enum { 66 /* Compression block (cb) types. */ 67 NTFS_CB_SPARSE = -1, 68 NTFS_CB_COMPRESSED = -2, 69 NTFS_CB_UNCOMPRESSED = -3, 70 71 /* Compression sub-block (sb) constants. */ 72 NTFS_SB_SIZE_MASK = 0x0fff, 73 NTFS_SB_SIZE = 0x1000, 74 NTFS_SB_IS_COMPRESSED = 0x8000, 75 76 /* Token types and access mask. */ 77 NTFS_SYMBOL_TOKEN = 0, 78 NTFS_PHRASE_TOKEN = 1, 79 NTFS_TOKEN_MASK = 1, 80}; 81 82/** 83 * ntfs_get_cb_type - determine the type of a compression block 84 * @ni: raw ntfs inode to which the compression block belongs 85 * @ofs: byte offset of start of compression block 86 * 87 * Determine whether the compression block is sparse, compressed, or 88 * uncompressed by looking at the runlist. 89 * 90 * If the first cluster in the compression block is sparse the whole 91 * compression is sparse. In that case we return NTFS_CB_SPARSE. 92 * 93 * If the last cluster in the compression block is sparse the compression block 94 * is compressed. In that case we return NTFS_CB_COMPRESSED. 95 * 96 * If the whole compression block is backed by real clusters it is not 97 * compressed. In that case we return NTFS_CB_UNCOMPRESSED. 98 * 99 * Return the compression block type (< 0) and errno (>= 0) on error. 100 */ 101static inline int ntfs_get_cb_type(ntfs_inode *ni, s64 ofs) 102{ 103 VCN start_vcn, vcn, end_vcn; 104 LCN lcn; 105 s64 clusters; 106 ntfs_rl_element *rl; 107 int ret = EIO; 108#ifdef DEBUG 109 const char *cb_type_str[3] = { "sparse", "compressed", "uncompressed" }; 110#endif /* DEBUG */ 111 BOOL is_retry, write_locked; 112 113 ntfs_debug("Entering for compressed file inode 0x%llx, offset 0x%llx.", 114 (unsigned long long)ni->mft_no, 115 (unsigned long long)ofs); 116 vcn = start_vcn = ofs >> ni->vol->cluster_size_shift; 117 end_vcn = start_vcn + ni->compression_block_clusters; 118 write_locked = is_retry = FALSE; 119 lck_rw_lock_shared(&ni->rl.lock); 120retry_remap: 121 rl = ni->rl.rl; 122 if (ni->rl.elements) { 123next_vcn: 124 /* Seek to element containing target vcn. */ 125 while (rl->length && rl[1].vcn <= vcn) 126 rl++; 127 lcn = ntfs_rl_vcn_to_lcn(rl, vcn, &clusters); 128 /* 129 * If we found a hole we are done. If we were looking up the 130 * starting vcn the entire compression block must be sparse. 131 * Otherwise only part of the compression block is sparse, thus 132 * it is compressed. 133 */ 134 if (lcn == LCN_HOLE) { 135 ret = NTFS_CB_COMPRESSED; 136 if (vcn == start_vcn) 137 ret = NTFS_CB_SPARSE; 138 goto done; 139 } 140 /* 141 * If we found a real cluster allocation and it covers the end 142 * of the compression block the compression block is not 143 * compressed. Otherwise we need to look at the last cluster 144 * in the compression block. Note there is no need to look at 145 * the intervening clusters because it is not possible to have 146 * sparse clusters in the middle of a compression block but not 147 * at its end and neither is it possible to have a partial 148 * compression block at the end of the attribute. 149 */ 150 if (lcn >= 0) { 151 if (vcn + clusters >= end_vcn) { 152 ret = NTFS_CB_UNCOMPRESSED; 153 goto done; 154 } 155 vcn = end_vcn - 1; 156 is_retry = FALSE; 157 goto next_vcn; 158 } 159 } else 160 lcn = LCN_RL_NOT_MAPPED; 161 /* The runlist is not mapped or an error occured. */ 162 if (!write_locked) { 163 write_locked = TRUE; 164 if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) { 165 lck_rw_lock_exclusive(&ni->rl.lock); 166 goto retry_remap; 167 } 168 } 169 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 170 ret = ntfs_map_runlist_nolock(ni, vcn, NULL); 171 if (!ret) { 172 is_retry = TRUE; 173 goto retry_remap; 174 } 175 } else if (!ret) 176 ret = EIO; 177done: 178 if (write_locked) 179 lck_rw_unlock_exclusive(&ni->rl.lock); 180 else 181 lck_rw_unlock_shared(&ni->rl.lock); 182 if (ret < 0) 183 ntfs_debug("Done (compression block is %s).", 184 cb_type_str[-ret - 1]); 185 else 186 ntfs_error(ni->vol->mp, "Failed (error %d, lcn %lld).", ret, 187 (long long)lcn); 188 return ret; 189} 190 191/** 192 * ntfs_decompress - decompress a compression block into a destination buffer 193 * 194 * Decompress the compression block @cb_start of size @cb_size into the 195 * destination buffer @dst_start. 196 * 197 * If @pl is not NULL, i.e. a page list is present, skip over any valid pages 198 * in the destination buffer. 199 * 200 * Return 0 on success and errno on error. 201 * 202 * The decompression algorithm: 203 * 204 * Compressed data is organized in logical "compression" blocks (cb). Each cb 205 * has a size (cb_size) of 2^compression_unit clusters. In all versions of 206 * Windows, NTFS (NT/2k/XP, NTFS 1.2-3.1), the only valid compression_unit is 207 * 4, IOW, each cb is 2^4 = 16 clusters in size. 208 * 209 * Compression is only supported for cluster sizes between 512 and 4096. Thus a 210 * cb can be between 8 and 64kiB in size. 211 * 212 * Each cb is independent of the other cbs and is thus the minimal unit we have 213 * to parse even if we wanted to decompress only one byte. 214 * 215 * Also, a cb can be totally uncompressed and this would be indicated as a 216 * sparse cb in the runlist. 217 * 218 * Thus, we need to look at the runlist of the compressed data stream, starting 219 * at the beginning of the first cb overlapping @page. So we convert the page 220 * offset into units of clusters (vcn), and round the vcn down to a mutliple of 221 * cb_size clusters. 222 * 223 * We then scan the runlist for the appropriate position. Based on what we find 224 * there, we decide how to proceed. 225 * 226 * If the cb is not compressed at all, we copy the uncompressed data over from 227 * the compressed data. 228 * 229 * If the cb is completely sparse, we just zero out the destination. 230 * 231 * In all other cases we initiate the decompression engine, but first some more 232 * on the compression algorithm. 233 * 234 * Before compression the data of each cb is further divided into 4kiB blocks, 235 * we call them "sub compression" blocks (sb), each including a header 236 * specifying its compressed length. So we could just scan the cb for the 237 * first sb overlapping page and skip the sbs before that, or we could 238 * decompress the whole cb injecting the superfluous decompressed pages into 239 * the page cache as a form of read ahead (this is what we do when invoked via 240 * VNOP_READ()). 241 * 242 * In either case, we then need to read and decompress all sbs overlapping the 243 * destination, potentially having to decompress one or more other cbs, too. 244 * 245 * Because the sbs follow each other directly, we need to actually read in the 246 * whole cb in order to be able to scan through the cb to find the first sb 247 * overlapping the destination. 248 * 249 * So, we read the whole cb from disk and start at the first sb. 250 * 251 * As mentioned above, each sb is started with a header. The header is 16 bits 252 * of which the lower twelve bits (i.e. bits 0 to 11) are the length (L) - 3 of 253 * the sb (including the two bytes for the header itself, or L - 1 not counting 254 * the two bytes for the header). The higher four bits are set to 1011 (0xb) 255 * by the compressor for a compressed block, or to 0000 for an uncompressed 256 * block, but the decompressor only checks the most significant bit taking a 1 257 * to signify a compressed block, and a 0 an uncompressed block. 258 * 259 * So from the header we know how many compressed bytes we need to decompress to 260 * obtain the next 4kiB of uncompressed data and if we did not want to 261 * decompress this sb we could just seek to the next next one using the length 262 * read from the header. We could then continue seeking until we reach the 263 * first sb overlapping the destination. 264 * 265 * In either case, we will reach a sb which we want to decompress. 266 * 267 * Having dealt with the 16-bit header of the sb, we now have length bytes of 268 * compressed data to decompress. This compressed stream is further split into 269 * tokens which are organized into groups of eight tokens. Each token group 270 * (tg) starts with a tag byte, which is an eight bit bitmap, the bits 271 * specifying the type of each of the following eight tokens. The least 272 * significant bit (LSB) corresponds to the first token and the most 273 * significant bit (MSB) corresponds to the last token. 274 * 275 * The two types of tokens are symbol tokens, specified by a zero bit, and 276 * phrase tokens, specified by a set bit. 277 * 278 * A symbol token (st) is a single byte and is to be taken literally and copied 279 * into the sliding window (the decompressed data). 280 * 281 * A phrase token (pt) is a pointer back into the sliding window (in bytes), 282 * together with a length (again in bytes), starting at the byte the back 283 * pointer is pointing to. Thus a phrase token defines a sequence of bytes in 284 * the sliding window which need to be copied at the current position into the 285 * sliding window (the decompressed data stream). 286 * 287 * Each pt consists of 2 bytes split into the back pointer (p) and the length 288 * (l), each of variable bit width (but the sum of the widths of p and l is 289 * fixed at 16 bits). p is at least 4 bits and l is at most 12 bits. 290 * 291 * The most significant bits contain the back pointer (p), while the least 292 * significant bits contain the length (l). 293 * 294 * l is actually stored as the number of bytes minus 3 (unsigned) as anything 295 * shorter than that would be at least as long as the 2 bytes needed for the 296 * actual pt, so no compression would be achieved. 297 * 298 * p is stored as the positive number of bytes minus 1 (unsigned) as going zero 299 * bytes back is meaningless. 300 * 301 * Note that decompression has to occur byte by byte, as it is possible that 302 * some of the bytes pointed to by the pt will only be generated in the sliding 303 * window as the byte sequence pointed to by the pt is being copied into it! 304 * 305 * To give a concrete example: a block full of the letter A would be compressed 306 * by storing the byte A once as a symbol token, followed by a single phrase 307 * token with back pointer -1 (p = 0, therefore go back by -(0 + 1) bytes) and 308 * length 4095 (l=0xffc, therefore length 0xffc + 3 bytes). 309 * 310 * The widths of p and l are determined from the current position within the 311 * decompressed data (dst). We do not actually care about the widths as such 312 * however, but instead we want the mask (l_mask) with which to AND the pt to 313 * obtain l, and the number of bits (p_shift) by which to right shift the pt to 314 * obtain p. These are determined using the following algorithm: 315 * 316 * for (i = cur_pos, l_mask = 0xfff, p_shift = 12; i >= 0x10; i >>= 1) { 317 * l_mask >>= 1; 318 * p_shift--; 319 * } 320 * 321 * The above is the conventional algorithm. As an optimization we actually use 322 * a different algorithm as this offers O(1) performance instead of O(n) of the 323 * above conventional algorithm. Our optimized algorithm first calculates 324 * log2(current destination position in sb) and then uses that to derive p and 325 * l without having to iterate. We just need an arch-optimized log2() function 326 * now to make it really fast as we for now still have a small loop which we 327 * need to determine the log2(). See the below code for details. 328 * 329 * Note, that as usual in NTFS, the sb header, as well as each pt, are stored 330 * in little endian format. 331 */ 332static inline errno_t ntfs_decompress(ntfs_volume *vol, u8 *dst_start, 333 const int dst_ofs_in_cb, int dst_size, u8 *const cb_start, 334 const int cb_size, upl_page_info_t *pl, int cur_pg, 335 const int pages_per_cb) 336{ 337 /* 338 * Pointers into the compressed data, i.e. the compression block (cb), 339 * and the therein contained sub-blocks (sb). 340 */ 341 u8 *cb; /* Current position in compression block. */ 342 u8 *cb_end; /* End of compression block. */ 343 u8 *cb_sb_start; /* Beginning of the current sb in the cb. */ 344 u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ 345 /* Variables for uncompressed data / destination buffer. */ 346 u8 *dst; /* Current position in destination. */ 347 u8 *dst_end; /* End of destination buffer. */ 348 u8 *dst_sb_start; /* Start of current sub-block in destination. */ 349 u8 *dst_sb_end; /* End of current sub-block in destination. */ 350 /* Variables for tag and token parsing. */ 351 u8 tag; /* Current tag. */ 352 unsigned token; /* Loop counter for the eight tokens in tag. */ 353 unsigned skip_sbs; 354 BOOL skip_valid_pages; 355 356 ntfs_debug("Entering, compression block size 0x%x bytes.", cb_size); 357 /* Do we need to test for and skip valid pages in the destination? */ 358 skip_valid_pages = FALSE; 359 if (pl && pages_per_cb > 1) 360 skip_valid_pages = TRUE; 361 /* 362 * Do we need to skip any sub-blocks because the destination buffer 363 * does not begin at the beginning of the compression block? 364 */ 365 skip_sbs = 0; 366 if (dst_ofs_in_cb) 367 skip_sbs = dst_ofs_in_cb / NTFS_SB_SIZE; 368 cb = cb_start; 369 cb_end = cb_start + cb_size; 370 dst = dst_start; 371 dst_end = dst + dst_size; 372next_sb: 373 ntfs_debug("Beginning sub-block at offset 0x%lx in the compression " 374 "block.", (unsigned long)(cb - cb_start)); 375 /* 376 * Have we reached the end of the compression block or the end of the 377 * decompressed data? The latter can happen for example if the current 378 * position in the compression block is one byte before its end so the 379 * first two checks do not detect it. 380 */ 381 if (cb == cb_end || !le16_to_cpup((le16*)cb) || dst == dst_end) { 382 ntfs_debug("Done."); 383 return 0; 384 } 385 /* Setup offsets for the current sub-block destination. */ 386 dst_sb_start = dst; 387 dst_sb_end = dst + NTFS_SB_SIZE; 388 /* Check that we are still within allowed boundaries. */ 389 if (dst_sb_end > dst_end) 390 goto err; 391 /* Does the minimum size of a compressed sb overflow valid range? */ 392 if (cb + 6 > cb_end) 393 goto err; 394 /* Setup the current sub-block source pointers and validate range. */ 395 cb_sb_start = cb; 396 cb_sb_end = cb + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK) + 3; 397 if (cb_sb_end > cb_end) 398 goto err; 399 /* 400 * If the destination buffer does not start at the beginning of the 401 * compression block, skip sub-blocks until we reach the beginning of 402 * the destination buffer. 403 */ 404 if (skip_sbs) { 405 skip_sbs--; 406 /* Advance source position to next sub-block. */ 407 cb = cb_sb_end; 408 goto next_sb; 409 } 410 /* 411 * If the destination page corresponding to this sub-block is valid, 412 * skip the sub-block. 413 */ 414 if (skip_valid_pages) { 415 BOOL skip_sb; 416 417 skip_sb = upl_valid_page(pl, cur_pg); 418 /* 419 * Advance current page if the destination pointer is going to 420 * cross a page boundary. Doing this here unconditionally 421 * means we do not need to advance it later on when switching 422 * to the next sub-block thus it saves us one test for 423 * @skip_valid_pages. 424 */ 425 if (!((dst - dst_start + NTFS_SB_SIZE) & PAGE_MASK)) 426 cur_pg++; 427 if (skip_sb) { 428 /* Advance position to next sub-block. */ 429 cb = cb_sb_end; 430 dst = dst_sb_end; 431 goto next_sb; 432 } 433 } 434 /* Now, we are ready to process the current sub-block. */ 435 if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) { 436 /* 437 * This sb is not compressed, just copy its data into the 438 * destination buffer. 439 */ 440 ntfs_debug("Found uncompressed sub-block."); 441 /* Advance source position to first data byte. */ 442 cb += 2; 443 /* An uncompressed sb must be full size. */ 444 if (cb_sb_end - cb != NTFS_SB_SIZE) 445 goto err; 446 /* Copy the sub-block data. */ 447 memcpy(dst, cb, NTFS_SB_SIZE); 448 /* Advance position to next sub-block. */ 449 cb = cb_sb_end; 450 dst = dst_sb_end; 451 goto next_sb; 452 } 453 /* This sb is compressed, decompress it into the destination buffer. */ 454 ntfs_debug("Found compressed sub-block."); 455 /* Forward to the first tag in the sub-block. */ 456 cb += 2; 457next_tag: 458 if (cb == cb_sb_end) { 459 /* Check if the decompressed sub-block was not full-length. */ 460 if (dst < dst_sb_end) { 461 ntfs_debug("Filling incomplete sub-block with " 462 "zeroes."); 463 /* Zero remainder and update destination position. */ 464 bzero(dst, dst_sb_end - dst); 465 dst = dst_sb_end; 466 } 467 /* We have finished the current sub-block. */ 468 goto next_sb; 469 } 470 /* Check we are still in range. */ 471 if (cb > cb_sb_end || dst > dst_sb_end) 472 goto err; 473 /* Get the next tag and advance to first token. */ 474 tag = *cb++; 475 /* Parse the eight tokens described by the tag. */ 476 for (token = 0; token < 8; token++, tag >>= 1) { 477 unsigned lg, u, pt, length, max_non_overlap; 478 u8 *dst_back_addr; 479 480 /* Check if we are done / still in range. */ 481 if (cb >= cb_sb_end || dst > dst_sb_end) 482 break; 483 /* Determine token type and parse appropriately.*/ 484 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { 485 /* 486 * We have a symbol token, copy the symbol across, and 487 * advance the source and destination positions. 488 */ 489 *dst++ = *cb++; 490 /* Continue with the next token. */ 491 continue; 492 } 493 /* 494 * We have a phrase token. Make sure it is not the first tag 495 * in the sub-block as this is illegal and would confuse the 496 * code below. 497 */ 498 if (dst == dst_sb_start) 499 goto err; 500 /* 501 * Determine the number of bytes to go back (p) and the number 502 * of bytes to copy (l). We use an optimized algorithm in 503 * which we first calculate log2(current destination position 504 * in sb), which allows determination of l and p in O(1) rather 505 * than O(n). We just need an arch-optimized log2() function 506 * now. 507 */ 508 lg = 0; 509 for (u = dst - dst_sb_start - 1; u >= 0x10; u >>= 1) 510 lg++; 511 /* Get the phrase token. */ 512 pt = le16_to_cpup((u16*)cb); 513 /* 514 * Calculate starting position of the byte sequence in the 515 * destination using the fact that p = (pt >> (12 - lg)) + 1 516 * and make sure we do not go too far back. 517 */ 518 dst_back_addr = dst - (pt >> (12 - lg)) - 1; 519 if (dst_back_addr < dst_sb_start) 520 goto err; 521 /* Now calculate the length (l) of the byte sequence. */ 522 length = (pt & (0xfff >> lg)) + 3; 523 /* Verify destination is in range. */ 524 if (dst + length > dst_sb_end) 525 goto err; 526 /* The number of non-overlapping bytes. */ 527 max_non_overlap = dst - dst_back_addr; 528 if (length <= max_non_overlap) { 529 /* The byte sequence does not overlap, just copy it. */ 530 memcpy(dst, dst_back_addr, length); 531 /* Advance destination pointer. */ 532 dst += length; 533 } else { 534 /* 535 * The byte sequence does overlap, copy non-overlapping 536 * part and then do a slow byte by byte copy for the 537 * overlapping part. Also, advance the destination 538 * pointer. 539 */ 540 memcpy(dst, dst_back_addr, max_non_overlap); 541 dst += max_non_overlap; 542 dst_back_addr += max_non_overlap; 543 length -= max_non_overlap; 544 while (length--) 545 *dst++ = *dst_back_addr++; 546 } 547 /* Advance source position and continue with the next token. */ 548 cb += 2; 549 } 550 /* No tokens left in the current tag. Continue with the next tag. */ 551 goto next_tag; 552err: 553 ntfs_error(vol->mp, "Compressed data is corrupt. Run chkdsk."); 554 NVolSetErrors(vol); 555 return EOVERFLOW; 556} 557 558/** 559 * ntfs_read_compressed - read and decompress data from a compressed attribute 560 * @ni: non-raw ntfs inode to which the raw inode belongs 561 * @raw_ni: raw compressed ntfs inode to read from 562 * @ofs: byte offset into uncompressed data stream to read from 563 * @count: number of bytes to return in the destination buffer 564 * @dst_start: destination buffer in which to return the data 565 * @pl: page list in which @dst_start resides (or NULL) 566 * @ioflags: flags further describing the read (see ntfs_vnop_read()) 567 * 568 * Read compressed data from the raw inode @raw_ni, decompress it, and return 569 * the decompressed data in the destination buffer @dst_start of size @count 570 * bytes. 571 * 572 * If @pl is not NULL, it is the page list in which @dst_start resides with 573 * @dst_start beginning at offset zero in the first page of the page list. 574 * 575 * When @pl is not NULL, we have to check each page in the page list for being 576 * valid and if it is we have to skip decompression of its data so that we do 577 * not overwrite and dirty but not yet comitted data and even in the read-only 578 * driver we want to skip decompression in this case as there is no point in 579 * decompressing data we already have decompressed and have in cache. 580 * 581 * This function allocates a temporary buffer to hold a compression block and 582 * reads each compression block in sequence into it using cluster_read() which 583 * gives us read-ahead on the raw inode. Once it has the data it decompresses 584 * it straight into the destination buffer @dst_start and stops when @count 585 * bytes have been decompressed (usually this will be the end of the 586 * compression block). 587 * 588 * Return 0 on success and errno on error. 589 */ 590errno_t ntfs_read_compressed(ntfs_inode *ni, ntfs_inode *raw_ni, s64 ofs_start, 591 int count, u8 *dst_start, upl_page_info_t *pl, int ioflags) 592{ 593 s64 ofs, init_size, raw_size, size; 594 ntfs_volume *vol = ni->vol; 595 u8 *dst, *cb; 596 uio_t uio; 597 int err, io_count, pages_per_cb, cb_size, cur_pg, cur_pg_ofs, last_pg; 598 int cb_type, zero_end_ofs, dst_ofs_in_cb; 599 600 ntfs_debug("Entering for compressed file inode 0x%llx, offset 0x%llx, " 601 "count 0x%x, ioflags 0x%x.", 602 (unsigned long long)ni->mft_no, 603 (unsigned long long)ofs_start, count, ioflags); 604 ofs = ofs_start; 605 dst = dst_start; 606 cb = NULL; 607 uio = NULL; 608 zero_end_ofs = last_pg = cur_pg_ofs = cur_pg = 0; 609 /* 610 * We can only read from regular files and named streams that are 611 * compressed and non-resident. We should never be called for anything 612 * else. 613 */ 614 if (!NInoAttr(raw_ni) || raw_ni->type != AT_DATA || 615 !NInoCompressed(raw_ni) || !NInoNonResident(raw_ni) || 616 !NInoRaw(raw_ni) || NInoEncrypted(raw_ni)) 617 panic("%s(): Called for incorrect inode type.\n", __FUNCTION__); 618 if (ofs & PAGE_MASK || count & PAGE_MASK) 619 panic("%s(): Called with offset 0x%llx and count 0x%x at " 620 "least one of which is not a multiple of the " 621 "system page size 0x%x.\n", __FUNCTION__, 622 (unsigned long long)ofs, count, PAGE_SIZE); 623 cb_size = raw_ni->compression_block_size; 624 /* 625 * Doing this means that we can assume that @pages_per_cb <= 1 implies 626 * that @pl is not NULL in the below code. 627 */ 628 pages_per_cb = 0xffff; 629 if (pl) { 630 last_pg = count >> PAGE_SHIFT; 631 pages_per_cb = cb_size >> PAGE_SHIFT; 632 } 633 /* 634 * Zero any completely uninitialized pages thus we are guaranteed only 635 * to have a partial uninitialized page which makes the decompression 636 * code simpler. 637 */ 638 lck_spin_lock(&ni->size_lock); 639 init_size = ni->initialized_size; 640 raw_size = ni->allocated_size; 641 if (ofs > ni->data_size) 642 panic("%s(): Called with offset 0x%llx which is beyond the " 643 "data size 0x%llx.\n", __FUNCTION__, 644 (unsigned long long)ofs, 645 (unsigned long long)ni->data_size); 646 lck_spin_unlock(&ni->size_lock); 647 size = (init_size + PAGE_MASK) & ~PAGE_MASK_64; 648 /* @ofs is page aligned and @count is at least one page in size. */ 649 if (ofs + count > init_size) { 650 int start_count; 651 652 start_count = count; 653 /* Do not zero a partial final page yet. */ 654 count = size - ofs; 655 /* 656 * If the beginning of the i/o exceeds the initialized size we 657 * just need to zero the destination and are done. 658 */ 659 if (ofs > init_size) 660 count = 0; 661 if (!pl) 662 bzero(dst + count, start_count - count); 663 else { 664 /* Only zero complete, invalid pages. */ 665 for (cur_pg = count >> PAGE_SHIFT; cur_pg < last_pg; 666 cur_pg++) { 667 if (!upl_valid_page(pl, cur_pg)) 668 bzero(dst + (cur_pg << PAGE_SHIFT), 669 PAGE_SIZE); 670 } 671 last_pg = count >> PAGE_SHIFT; 672 cur_pg = 0; 673 } 674 if (!count) { 675 ntfs_debug("Done (request in uninitialized region)."); 676 return 0; 677 } 678 if (init_size < size) 679 zero_end_ofs = init_size - ofs; 680 } 681 dst_ofs_in_cb = ofs & (cb_size - 1); 682do_next_cb: 683 /* Truncate the final request if it is partial. */ 684 io_count = cb_size - dst_ofs_in_cb; 685 if (io_count > count) 686 io_count = count; 687 /* 688 * If a page list is present and a page is larger than or equal to a 689 * compression block and the current page is valid, skip this whole 690 * page. 691 */ 692 if (pages_per_cb <= 1 && upl_valid_page(pl, cur_pg)) { 693 io_count = PAGE_SIZE - cur_pg_ofs; 694 cur_pg_ofs = 0; 695 cur_pg++; 696 goto next_cb; 697 } 698 /* Determine the type of the current compression block. */ 699 cb_type = ntfs_get_cb_type(raw_ni, ofs - dst_ofs_in_cb); 700 if (cb_type >= 0) { 701 err = cb_type; 702 goto err; 703 } 704 /* 705 * If the compression block is sparse, bzero() the destination buffer 706 * (skipping any valid pages) and go to the next compression block. 707 */ 708 if (cb_type == NTFS_CB_SPARSE) { 709 int stop_pg; 710 711 ntfs_debug("Found sparse compression block."); 712 /* Only zero invalid pages. */ 713 if (!pl || pages_per_cb <= 1) { 714 bzero(dst, io_count); 715 goto pl_next_cb; 716 } 717 stop_pg = cur_pg + pages_per_cb; 718 if (stop_pg > last_pg) 719 stop_pg = last_pg; 720 for (; cur_pg < stop_pg; cur_pg++) { 721 if (!upl_valid_page(pl, cur_pg)) 722 bzero(dst_start + (cur_pg << PAGE_SHIFT), 723 PAGE_SIZE); 724 } 725 goto next_cb; 726 } 727 /* 728 * Create a uio or reset the already created one and point it at the 729 * current attribute offset. 730 */ 731 if (!uio) { 732 uio = uio_create(1, ofs, UIO_SYSSPACE32, UIO_READ); 733 if (!uio) { 734 ntfs_error(vol->mp, "Not enough memory to allocate " 735 "uio."); 736 err = ENOMEM; 737 goto err; 738 } 739 } 740 /* 741 * If the compression block is not compressed use cluster_read() to 742 * read the uncompressed data straight into our destination buffer. 743 */ 744 if (cb_type == NTFS_CB_UNCOMPRESSED) { 745 int stop_pg; 746 747 ntfs_debug("Found uncompressed compression block."); 748 /* 749 * If no page list is present or the only page is invalid use 750 * cluster_read() to read the uncompressed data straight into 751 * our buffer. 752 */ 753 if (!pl || pages_per_cb <= 1) { 754 /* 755 * Add our destination buffer to the uio so we can read 756 * into it using cluster_read(). 757 */ 758 uio_reset(uio, ofs, UIO_SYSSPACE32, UIO_READ); 759 err = uio_addiov(uio, CAST_USER_ADDR_T(dst), io_count); 760 if (err) 761 panic("%s(): uio_addiov() failed.\n", 762 __FUNCTION__); 763 err = cluster_read(raw_ni->vn, uio, raw_size, ioflags); 764 if (err || uio_resid(uio)) 765 goto cl_err; 766 goto pl_next_cb; 767 } 768 /* 769 * Page list present and multiple pages per compression block. 770 * Iterate over all the pages reading in all invalid page 771 * ranges straight into our buffer. 772 */ 773 stop_pg = cur_pg + pages_per_cb; 774 if (stop_pg > last_pg) 775 stop_pg = last_pg; 776 for (; cur_pg < stop_pg; cur_pg++) { 777 int start_pg, start_ofs; 778 779 /* Skip over valid page ranges. */ 780 if (upl_valid_page(pl, cur_pg)) 781 continue; 782 /* 783 * We found an invalid page, determine how many 784 * sequential pages are invalid. 785 */ 786 start_pg = cur_pg; 787 while (cur_pg + 1 < stop_pg) { 788 if (upl_valid_page(pl, cur_pg + 1)) 789 break; 790 cur_pg++; 791 } 792 /* 793 * Add our destination buffer to the uio so we can read 794 * into it using cluster_read(). 795 */ 796 start_ofs = start_pg << PAGE_SHIFT; 797 uio_reset(uio, ofs_start + start_ofs, UIO_SYSSPACE32, 798 UIO_READ); 799 err = uio_addiov(uio, CAST_USER_ADDR_T( 800 dst_start + start_ofs), 801 (cur_pg + 1 - start_pg) << PAGE_SHIFT); 802 if (err) 803 panic("%s(): uio_addiov() failed.\n", 804 __FUNCTION__); 805 err = cluster_read(raw_ni->vn, uio, raw_size, ioflags); 806 if (err || uio_resid(uio)) 807 goto cl_err; 808 } 809 goto next_cb; 810 } 811 /* 812 * The compression block is compressed. Read the compressed data into 813 * our temporary buffer, allocating it if we have not done so yet. 814 */ 815 ntfs_debug("Found compressed compression block."); 816 if (!cb) { 817 cb = OSMalloc(cb_size, ntfs_malloc_tag); 818 if (!cb) { 819 ntfs_error(vol->mp, "Not enough memory to allocate " 820 "temporary buffer."); 821 err = ENOMEM; 822 goto err; 823 } 824 } 825 /* 826 * FIXME: As an optimization we could only read the actual allocated 827 * clusters so cluster_read() does not waste time zeroing the sparse 828 * clusters when there are whole pages worth of them. Probably not 829 * worth the effort as that may mess around with the read-ahead 830 * streaming detection and having read-ahead messed up is likely to 831 * cause a performance hit outweighing the benefit gained from not 832 * doing the zeroing. Especially so since we would incur overhead in 833 * determining the number of non-sparse clusters. 834 */ 835 uio_reset(uio, ofs - dst_ofs_in_cb, UIO_SYSSPACE32, UIO_READ); 836 err = uio_addiov(uio, CAST_USER_ADDR_T(cb), cb_size); 837 if (err) 838 panic("%s(): uio_addiov() failed.\n", __FUNCTION__); 839 err = cluster_read(raw_ni->vn, uio, raw_size, ioflags); 840 if (err || uio_resid(uio)) 841 goto cl_err; 842 /* 843 * We now have the compressed data. Decompress it into the destination 844 * buffer skipping any valid pages if a page list is present. 845 */ 846 err = ntfs_decompress(vol, dst, dst_ofs_in_cb, io_count, cb, cb_size, 847 pl, cur_pg, pages_per_cb); 848 if (err) { 849 ntfs_error(vol->mp, "Failed to decompress data (error %d).", 850 err); 851 goto err; 852 } 853pl_next_cb: 854 if (pl) { 855 cur_pg += pages_per_cb; 856 if (!pages_per_cb) { 857 cur_pg_ofs += io_count; 858 if (cur_pg_ofs >= PAGE_SIZE) { 859 cur_pg_ofs = 0; 860 cur_pg++; 861 } 862 } 863 } 864next_cb: 865 ofs += io_count; 866 dst += io_count; 867 count -= io_count; 868 dst_ofs_in_cb = 0; 869 /* Are we done yet? */ 870 if (count > 0) 871 goto do_next_cb; 872 /* 873 * Check if the last page is partially outside the initialized size and 874 * if so zero its uninitialized tail. 875 */ 876 if (zero_end_ofs) { 877 count = PAGE_SIZE - (zero_end_ofs & PAGE_MASK); 878 if (!pl || !upl_valid_page(pl, zero_end_ofs >> PAGE_SHIFT)) 879 bzero(dst_start + zero_end_ofs, count); 880 } 881 if (uio) 882 uio_free(uio); 883 if (cb) 884 OSFree(cb, cb_size, ntfs_malloc_tag); 885 ntfs_debug("Done."); 886 return 0; 887cl_err: 888 if (err) 889 ntfs_error(vol->mp, "Failed to read %scompressed compression " 890 "block using cluster_read() (error %d).", 891 cb_type == NTFS_CB_COMPRESSED ? "" : "un", 892 err); 893 else { 894 ntfs_error(vol->mp, "Partial read when reading %scompressed " 895 "compression block using cluster_read().", 896 cb_type == NTFS_CB_COMPRESSED ? "" : "un"); 897 err = EIO; 898 } 899err: 900 if (uio) 901 uio_free(uio); 902 if (cb) 903 OSFree(cb, cb_size, ntfs_malloc_tag); 904 ntfs_error(vol->mp, "Failed (error %d).", err); 905 return err; 906} 907