1/**
2 * compress.c - Compressed attribute handling code.  Originated from the Linux-NTFS
3 *		project.
4 *
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 * Copyright (c) 2004-2006 Szabolcs Szakacsits
7 * Copyright (c)      2005 Yura Pakhuchiy
8 *
9 * This program/include file is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as published
11 * by the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program/include file is distributed in the hope that it will be
15 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program (in the main directory of the NTFS-3G
21 * distribution in the file COPYING); if not, write to the Free Software
22 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24
25#ifdef HAVE_CONFIG_H
26#include "config.h"
27#endif
28
29#ifdef HAVE_STDIO_H
30#include <stdio.h>
31#endif
32#ifdef HAVE_STRING_H
33#include <string.h>
34#endif
35#ifdef HAVE_STDLIB_H
36#include <stdlib.h>
37#endif
38#ifdef HAVE_ERRNO_H
39#include <errno.h>
40#endif
41
42#include "attrib.h"
43#include "debug.h"
44#include "volume.h"
45#include "types.h"
46#include "layout.h"
47#include "runlist.h"
48#include "compress.h"
49#include "logging.h"
50#include "misc.h"
51
52/**
53 * enum ntfs_compression_constants - constants used in the compression code
54 */
55typedef enum {
56	/* Token types and access mask. */
57	NTFS_SYMBOL_TOKEN	=	0,
58	NTFS_PHRASE_TOKEN	=	1,
59	NTFS_TOKEN_MASK		=	1,
60
61	/* Compression sub-block constants. */
62	NTFS_SB_SIZE_MASK	=	0x0fff,
63	NTFS_SB_SIZE		=	0x1000,
64	NTFS_SB_IS_COMPRESSED	=	0x8000,
65} ntfs_compression_constants;
66
67/**
68 * ntfs_decompress - decompress a compression block into an array of pages
69 * @dest:	buffer to which to write the decompressed data
70 * @dest_size:	size of buffer @dest in bytes
71 * @cb_start:	compression block to decompress
72 * @cb_size:	size of compression block @cb_start in bytes
73 *
74 * This decompresses the compression block @cb_start into the destination
75 * buffer @dest.
76 *
77 * @cb_start is a pointer to the compression block which needs decompressing
78 * and @cb_size is the size of @cb_start in bytes (8-64kiB).
79 *
80 * Return 0 if success or -EOVERFLOW on error in the compressed stream.
81 */
82static int ntfs_decompress(u8 *dest, const u32 dest_size,
83		u8 *const cb_start, const u32 cb_size)
84{
85	/*
86	 * Pointers into the compressed data, i.e. the compression block (cb),
87	 * and the therein contained sub-blocks (sb).
88	 */
89	u8 *cb_end = cb_start + cb_size; /* End of cb. */
90	u8 *cb = cb_start;	/* Current position in cb. */
91	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
92	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
93	/* Variables for uncompressed data / destination. */
94	u8 *dest_end = dest + dest_size;	/* End of dest buffer. */
95	u8 *dest_sb_start;	/* Start of current sub-block in dest. */
96	u8 *dest_sb_end;	/* End of current sb in dest. */
97	/* Variables for tag and token parsing. */
98	u8 tag;			/* Current tag. */
99	int token;		/* Loop counter for the eight tokens in tag. */
100
101	ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
102do_next_sb:
103	ntfs_log_debug("Beginning sub-block at offset = %d in the cb.\n",
104			(int)(cb - cb_start));
105	/*
106	 * Have we reached the end of the compression block or the end of the
107	 * decompressed data?  The latter can happen for example if the current
108	 * position in the compression block is one byte before its end so the
109	 * first two checks do not detect it.
110	 */
111	if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
112		ntfs_log_debug("Completed. Returning success (0).\n");
113		return 0;
114	}
115	/* Setup offset for the current sub-block destination. */
116	dest_sb_start = dest;
117	dest_sb_end = dest + NTFS_SB_SIZE;
118	/* Check that we are still within allowed boundaries. */
119	if (dest_sb_end > dest_end)
120		goto return_overflow;
121	/* Does the minimum size of a compressed sb overflow valid range? */
122	if (cb + 6 > cb_end)
123		goto return_overflow;
124	/* Setup the current sub-block source pointers and validate range. */
125	cb_sb_start = cb;
126	cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
127			+ 3;
128	if (cb_sb_end > cb_end)
129		goto return_overflow;
130	/* Now, we are ready to process the current sub-block (sb). */
131	if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
132		ntfs_log_debug("Found uncompressed sub-block.\n");
133		/* This sb is not compressed, just copy it into destination. */
134		/* Advance source position to first data byte. */
135		cb += 2;
136		/* An uncompressed sb must be full size. */
137		if (cb_sb_end - cb != NTFS_SB_SIZE)
138			goto return_overflow;
139		/* Copy the block and advance the source position. */
140		memcpy(dest, cb, NTFS_SB_SIZE);
141		cb += NTFS_SB_SIZE;
142		/* Advance destination position to next sub-block. */
143		dest += NTFS_SB_SIZE;
144		goto do_next_sb;
145	}
146	ntfs_log_debug("Found compressed sub-block.\n");
147	/* This sb is compressed, decompress it into destination. */
148	/* Forward to the first tag in the sub-block. */
149	cb += 2;
150do_next_tag:
151	if (cb == cb_sb_end) {
152		/* Check if the decompressed sub-block was not full-length. */
153		if (dest < dest_sb_end) {
154			int nr_bytes = dest_sb_end - dest;
155
156			ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
157			/* Zero remainder and update destination position. */
158			memset(dest, 0, nr_bytes);
159			dest += nr_bytes;
160		}
161		/* We have finished the current sub-block. */
162		goto do_next_sb;
163	}
164	/* Check we are still in range. */
165	if (cb > cb_sb_end || dest > dest_sb_end)
166		goto return_overflow;
167	/* Get the next tag and advance to first token. */
168	tag = *cb++;
169	/* Parse the eight tokens described by the tag. */
170	for (token = 0; token < 8; token++, tag >>= 1) {
171		u16 lg, pt, length, max_non_overlap;
172		register u16 i;
173		u8 *dest_back_addr;
174
175		/* Check if we are done / still in range. */
176		if (cb >= cb_sb_end || dest > dest_sb_end)
177			break;
178		/* Determine token type and parse appropriately.*/
179		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
180			/*
181			 * We have a symbol token, copy the symbol across, and
182			 * advance the source and destination positions.
183			 */
184			*dest++ = *cb++;
185			/* Continue with the next token. */
186			continue;
187		}
188		/*
189		 * We have a phrase token. Make sure it is not the first tag in
190		 * the sb as this is illegal and would confuse the code below.
191		 */
192		if (dest == dest_sb_start)
193			goto return_overflow;
194		/*
195		 * Determine the number of bytes to go back (p) and the number
196		 * of bytes to copy (l). We use an optimized algorithm in which
197		 * we first calculate log2(current destination position in sb),
198		 * which allows determination of l and p in O(1) rather than
199		 * O(n). We just need an arch-optimized log2() function now.
200		 */
201		lg = 0;
202		for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
203			lg++;
204		/* Get the phrase token into i. */
205		pt = le16_to_cpup((u16*)cb);
206		/*
207		 * Calculate starting position of the byte sequence in
208		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
209		 * and make sure we don't go too far back.
210		 */
211		dest_back_addr = dest - (pt >> (12 - lg)) - 1;
212		if (dest_back_addr < dest_sb_start)
213			goto return_overflow;
214		/* Now calculate the length of the byte sequence. */
215		length = (pt & (0xfff >> lg)) + 3;
216		/* Verify destination is in range. */
217		if (dest + length > dest_sb_end)
218			goto return_overflow;
219		/* The number of non-overlapping bytes. */
220		max_non_overlap = dest - dest_back_addr;
221		if (length <= max_non_overlap) {
222			/* The byte sequence doesn't overlap, just copy it. */
223			memcpy(dest, dest_back_addr, length);
224			/* Advance destination pointer. */
225			dest += length;
226		} else {
227			/*
228			 * The byte sequence does overlap, copy non-overlapping
229			 * part and then do a slow byte by byte copy for the
230			 * overlapping part. Also, advance the destination
231			 * pointer.
232			 */
233			memcpy(dest, dest_back_addr, max_non_overlap);
234			dest += max_non_overlap;
235			dest_back_addr += max_non_overlap;
236			length -= max_non_overlap;
237			while (length--)
238				*dest++ = *dest_back_addr++;
239		}
240		/* Advance source position and continue with the next token. */
241		cb += 2;
242	}
243	/* No tokens left in the current tag. Continue with the next tag. */
244	goto do_next_tag;
245return_overflow:
246	errno = EOVERFLOW;
247	ntfs_log_perror("Failed to decompress file");
248	return -1;
249}
250
251/**
252 * ntfs_is_cb_compressed - internal function, do not use
253 *
254 * This is a very specialised function determining if a cb is compressed or
255 * uncompressed.  It is assumed that checking for a sparse cb has already been
256 * performed and that the cb is not sparse.  It makes all sorts of other
257 * assumptions as well and hence it is not useful anywhere other than where it
258 * is used at the moment.  Please, do not make this function available for use
259 * outside of compress.c as it is bound to confuse people and not do what they
260 * want.
261 *
262 * Return TRUE on errors so that the error will be detected later on in the
263 * code.  Might be a bit confusing to debug but there really should never be
264 * errors coming from here.
265 */
266static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl,
267				  VCN cb_start_vcn, int cb_clusters)
268{
269	/*
270	 * The simplest case: the run starting at @cb_start_vcn contains
271	 * @cb_clusters clusters which are all not sparse, thus the cb is not
272	 * compressed.
273	 */
274restart:
275	cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
276	while (cb_clusters > 0) {
277		/* Go to the next run. */
278		rl++;
279		/* Map the next runlist fragment if it is not mapped. */
280		if (rl->lcn < LCN_HOLE || !rl->length) {
281			cb_start_vcn = rl->vcn;
282			rl = ntfs_attr_find_vcn(na, rl->vcn);
283			if (!rl || rl->lcn < LCN_HOLE || !rl->length)
284				return TRUE;
285			/*
286			 * If the runs were merged need to deal with the
287			 * resulting partial run so simply restart.
288			 */
289			if (rl->vcn < cb_start_vcn)
290				goto restart;
291		}
292		/* If the current run is sparse, the cb is compressed. */
293		if (rl->lcn == LCN_HOLE)
294			return TRUE;
295		/* If the whole cb is not sparse, it is not compressed. */
296		if (rl->length >= cb_clusters)
297			return FALSE;
298		cb_clusters -= rl->length;
299	};
300	/* All cb_clusters were not sparse thus the cb is not compressed. */
301	return FALSE;
302}
303
304/**
305 * ntfs_compressed_attr_pread - read from a compressed attribute
306 * @na:		ntfs attribute to read from
307 * @pos:	byte position in the attribute to begin reading from
308 * @count:	number of bytes to read
309 * @b:		output data buffer
310 *
311 * NOTE:  You probably want to be using attrib.c::ntfs_attr_pread() instead.
312 *
313 * This function will read @count bytes starting at offset @pos from the
314 * compressed ntfs attribute @na into the data buffer @b.
315 *
316 * On success, return the number of successfully read bytes.  If this number
317 * is lower than @count this means that the read reached end of file or that
318 * an error was encountered during the read so that the read is partial.
319 * 0 means end of file or nothing was read (also return 0 when @count is 0).
320 *
321 * On error and nothing has been read, return -1 with errno set appropriately
322 * to the return code of ntfs_pread(), or to EINVAL in case of invalid
323 * arguments.
324 */
325s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
326{
327	s64 br, to_read, ofs, total, total2;
328	u64 cb_size_mask;
329	VCN start_vcn, vcn, end_vcn;
330	ntfs_volume *vol;
331	runlist_element *rl;
332	u8 *dest, *cb, *cb_pos, *cb_end;
333	u32 cb_size;
334	int err;
335	unsigned int nr_cbs, cb_clusters;
336
337	ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
338			(unsigned long long)na->ni->mft_no, na->type,
339			(long long)pos, (long long)count);
340	if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
341			pos < 0 || count < 0) {
342		errno = EINVAL;
343		return -1;
344	}
345	/*
346	 * Encrypted attributes are not supported.  We return access denied,
347	 * which is what Windows NT4 does, too.
348	 */
349	if (NAttrEncrypted(na)) {
350		errno = EACCES;
351		return -1;
352	}
353	if (!count)
354		return 0;
355	/* Truncate reads beyond end of attribute. */
356	if (pos + count > na->data_size) {
357		if (pos >= na->data_size) {
358			return 0;
359		}
360		count = na->data_size - pos;
361	}
362	/* If it is a resident attribute, simply use ntfs_attr_pread(). */
363	if (!NAttrNonResident(na))
364		return ntfs_attr_pread(na, pos, count, b);
365	total = total2 = 0;
366	/* Zero out reads beyond initialized size. */
367	if (pos + count > na->initialized_size) {
368		if (pos >= na->initialized_size) {
369			memset(b, 0, count);
370			return count;
371		}
372		total2 = pos + count - na->initialized_size;
373		count -= total2;
374		memset((u8*)b + count, 0, total2);
375	}
376	vol = na->ni->vol;
377	cb_size = na->compression_block_size;
378	cb_size_mask = cb_size - 1UL;
379	cb_clusters = na->compression_block_clusters;
380
381	/* Need a temporary buffer for each loaded compression block. */
382	cb = ntfs_malloc(cb_size);
383	if (!cb)
384		return -1;
385
386	/* Need a temporary buffer for each uncompressed block. */
387	dest = ntfs_malloc(cb_size);
388	if (!dest) {
389		free(cb);
390		return -1;
391	}
392	/*
393	 * The first vcn in the first compression block (cb) which we need to
394	 * decompress.
395	 */
396	start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
397	/* Offset in the uncompressed cb at which to start reading data. */
398	ofs = pos & cb_size_mask;
399	/*
400	 * The first vcn in the cb after the last cb which we need to
401	 * decompress.
402	 */
403	end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
404			vol->cluster_size_bits;
405	/* Number of compression blocks (cbs) in the wanted vcn range. */
406	nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
407			na->compression_block_size_bits;
408	cb_end = cb + cb_size;
409do_next_cb:
410	nr_cbs--;
411	cb_pos = cb;
412	vcn = start_vcn;
413	start_vcn += cb_clusters;
414
415	/* Check whether the compression block is sparse. */
416	rl = ntfs_attr_find_vcn(na, vcn);
417	if (!rl || rl->lcn < LCN_HOLE) {
418		free(cb);
419		free(dest);
420		if (total)
421			return total;
422		/* FIXME: Do we want EIO or the error code? (AIA) */
423		errno = EIO;
424		return -1;
425	}
426	if (rl->lcn == LCN_HOLE) {
427		/* Sparse cb, zero out destination range overlapping the cb. */
428		ntfs_log_debug("Found sparse compression block.\n");
429		to_read = min(count, cb_size - ofs);
430		memset(b, 0, to_read);
431		ofs = 0;
432		total += to_read;
433		count -= to_read;
434		b = (u8*)b + to_read;
435	} else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
436		s64 tdata_size, tinitialized_size;
437		/*
438		 * Uncompressed cb, read it straight into the destination range
439		 * overlapping the cb.
440		 */
441		ntfs_log_debug("Found uncompressed compression block.\n");
442		/*
443		 * Read the uncompressed data into the destination buffer.
444		 * NOTE: We cheat a little bit here by marking the attribute as
445		 * not compressed in the ntfs_attr structure so that we can
446		 * read the data by simply using ntfs_attr_pread().  (-8
447		 * NOTE: we have to modify data_size and initialized_size
448		 * temporarily as well...
449		 */
450		to_read = min(count, cb_size - ofs);
451		ofs += vcn << vol->cluster_size_bits;
452		NAttrClearCompressed(na);
453		tdata_size = na->data_size;
454		tinitialized_size = na->initialized_size;
455		na->data_size = na->initialized_size = na->allocated_size;
456		do {
457			br = ntfs_attr_pread(na, ofs, to_read, b);
458			if (br < 0) {
459				err = errno;
460				na->data_size = tdata_size;
461				na->initialized_size = tinitialized_size;
462				NAttrSetCompressed(na);
463				free(cb);
464				free(dest);
465				if (total)
466					return total;
467				errno = err;
468				return br;
469			}
470			total += br;
471			count -= br;
472			b = (u8*)b + br;
473			to_read -= br;
474			ofs += br;
475		} while (to_read > 0);
476		na->data_size = tdata_size;
477		na->initialized_size = tinitialized_size;
478		NAttrSetCompressed(na);
479		ofs = 0;
480	} else {
481		s64 tdata_size, tinitialized_size;
482
483		/*
484		 * Compressed cb, decompress it into the temporary buffer, then
485		 * copy the data to the destination range overlapping the cb.
486		 */
487		ntfs_log_debug("Found compressed compression block.\n");
488		/*
489		 * Read the compressed data into the temporary buffer.
490		 * NOTE: We cheat a little bit here by marking the attribute as
491		 * not compressed in the ntfs_attr structure so that we can
492		 * read the raw, compressed data by simply using
493		 * ntfs_attr_pread().  (-8
494		 * NOTE: We have to modify data_size and initialized_size
495		 * temporarily as well...
496		 */
497		to_read = cb_size;
498		NAttrClearCompressed(na);
499		tdata_size = na->data_size;
500		tinitialized_size = na->initialized_size;
501		na->data_size = na->initialized_size = na->allocated_size;
502		do {
503			br = ntfs_attr_pread(na,
504					(vcn << vol->cluster_size_bits) +
505					(cb_pos - cb), to_read, cb_pos);
506			if (br < 0) {
507				err = errno;
508				na->data_size = tdata_size;
509				na->initialized_size = tinitialized_size;
510				NAttrSetCompressed(na);
511				free(cb);
512				free(dest);
513				if (total)
514					return total;
515				errno = err;
516				return br;
517			}
518			cb_pos += br;
519			to_read -= br;
520		} while (to_read > 0);
521		na->data_size = tdata_size;
522		na->initialized_size = tinitialized_size;
523		NAttrSetCompressed(na);
524		/* Just a precaution. */
525		if (cb_pos + 2 <= cb_end)
526			*(u16*)cb_pos = 0;
527		ntfs_log_debug("Successfully read the compression block.\n");
528		if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
529			err = errno;
530			free(cb);
531			free(dest);
532			if (total)
533				return total;
534			errno = err;
535			return -1;
536		}
537		to_read = min(count, cb_size - ofs);
538		memcpy(b, dest + ofs, to_read);
539		total += to_read;
540		count -= to_read;
541		b = (u8*)b + to_read;
542		ofs = 0;
543	}
544	/* Do we have more work to do? */
545	if (nr_cbs)
546		goto do_next_cb;
547	/* We no longer need the buffers. */
548	free(cb);
549	free(dest);
550	/* Return number of bytes read. */
551	return total + total2;
552}
553