1/**
2 * compress.c - Compressed attribute handling code.  Originated from the Linux-NTFS
3 *		project.
4 *
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 * Copyright (c) 2004-2006 Szabolcs Szakacsits
7 * Copyright (c)      2005 Yura Pakhuchiy
8 * Copyright (c) 2009-2014 Jean-Pierre Andre
9 * Copyright (c)      2014 Eric Biggers
10 *
11 * This program/include file is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as published
13 * by the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program/include file is distributed in the hope that it will be
17 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
18 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program (in the main directory of the NTFS-3G
23 * distribution in the file COPYING); if not, write to the Free Software
24 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#ifdef HAVE_STDIO_H
32#include <stdio.h>
33#endif
34#ifdef HAVE_STRING_H
35#include <string.h>
36#endif
37#ifdef HAVE_STDLIB_H
38#include <stdlib.h>
39#endif
40#ifdef HAVE_ERRNO_H
41#include <errno.h>
42#endif
43
44#include "attrib.h"
45#include "debug.h"
46#include "volume.h"
47#include "types.h"
48#include "layout.h"
49#include "runlist.h"
50#include "compress.h"
51#include "lcnalloc.h"
52#include "logging.h"
53#include "misc.h"
54
55#undef le16_to_cpup
56/* the standard le16_to_cpup() crashes for unaligned data on some processors */
57#define le16_to_cpup(p) (*(u8*)(p) + (((u8*)(p))[1] << 8))
58
59/**
60 * enum ntfs_compression_constants - constants used in the compression code
61 */
62typedef enum {
63	/* Token types and access mask. */
64	NTFS_SYMBOL_TOKEN	=	0,
65	NTFS_PHRASE_TOKEN	=	1,
66	NTFS_TOKEN_MASK		=	1,
67
68	/* Compression sub-block constants. */
69	NTFS_SB_SIZE_MASK	=	0x0fff,
70	NTFS_SB_SIZE		=	0x1000,
71	NTFS_SB_IS_COMPRESSED	=	0x8000,
72} ntfs_compression_constants;
73
74/* Match length at or above which ntfs_best_match() will stop searching for
75 * longer matches.  */
76#define NICE_MATCH_LEN 18
77
78/* Maximum number of potential matches that ntfs_best_match() will consider at
79 * each position.  */
80#define MAX_SEARCH_DEPTH 24
81
82/* log base 2 of the number of entries in the hash table for match-finding.  */
83#define HASH_SHIFT 14
84
85/* Constant for the multiplicative hash function.  */
86#define HASH_MULTIPLIER 0x1E35A7BD
87
88struct COMPRESS_CONTEXT {
89	const unsigned char *inbuf;
90	int bufsize;
91	int size;
92	int rel;
93	int mxsz;
94	s16 head[1 << HASH_SHIFT];
95	s16 prev[NTFS_SB_SIZE];
96} ;
97
98/*
99 *		Hash the next 3-byte sequence in the input buffer
100 */
101static inline unsigned int ntfs_hash(const u8 *p)
102{
103	u32 str;
104	u32 hash;
105
106#if defined(__i386__) || defined(__x86_64__)
107	/* Unaligned access allowed, and little endian CPU.
108	 * Callers ensure that at least 4 (not 3) bytes are remaining.  */
109	str = *(const u32 *)p & 0xFFFFFF;
110#else
111	str = ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
112#endif
113
114	hash = str * HASH_MULTIPLIER;
115
116	/* High bits are more random than the low bits.  */
117	return hash >> (32 - HASH_SHIFT);
118}
119
120/*
121 *		Search for the longest sequence matching current position
122 *
123 *	A hash table, each entry of which points to a chain of sequence
124 *	positions sharing the corresponding hash code, is maintained to speed up
125 *	searching for matches.  To maintain the hash table, either
126 *	ntfs_best_match() or ntfs_skip_position() has to be called for each
127 *	consecutive position.
128 *
129 *	This function is heavily used; it has to be optimized carefully.
130 *
131 *	This function sets pctx->size and pctx->rel to the length and offset,
132 *	respectively, of the longest match found.
133 *
134 *	The minimum match length is assumed to be 3, and the maximum match
135 *	length is assumed to be pctx->mxsz.  If this function produces
136 *	pctx->size < 3, then no match was found.
137 *
138 *	Note: for the following reasons, this function is not guaranteed to find
139 *	*the* longest match up to pctx->mxsz:
140 *
141 *	(1) If this function finds a match of NICE_MATCH_LEN bytes or greater,
142 *	    it ends early because a match this long is good enough and it's not
143 *	    worth spending more time searching.
144 *
145 *	(2) If this function considers MAX_SEARCH_DEPTH matches with a single
146 *	    position, it ends early and returns the longest match found so far.
147 *	    This saves a lot of time on degenerate inputs.
148 */
149static void ntfs_best_match(struct COMPRESS_CONTEXT *pctx, const int i,
150			    int best_len)
151{
152	const u8 * const inbuf = pctx->inbuf;
153	const u8 * const strptr = &inbuf[i]; /* String we're matching against */
154	s16 * const prev = pctx->prev;
155	const int max_len = min(pctx->bufsize - i, pctx->mxsz);
156	const int nice_len = min(NICE_MATCH_LEN, max_len);
157	int depth_remaining = MAX_SEARCH_DEPTH;
158	const u8 *best_matchptr = strptr;
159	unsigned int hash;
160	s16 cur_match;
161	const u8 *matchptr;
162	int len;
163
164	if (max_len < 4)
165		goto out;
166
167	/* Insert the current sequence into the appropriate hash chain.  */
168	hash = ntfs_hash(strptr);
169	cur_match = pctx->head[hash];
170	prev[i] = cur_match;
171	pctx->head[hash] = i;
172
173	if (best_len >= max_len) {
174		/* Lazy match is being attempted, but there aren't enough length
175		 * bits remaining to code a longer match.  */
176		goto out;
177	}
178
179	/* Search the appropriate hash chain for matches.  */
180
181	for (; cur_match >= 0 && depth_remaining--;
182		cur_match = prev[cur_match])
183	{
184
185		matchptr = &inbuf[cur_match];
186
187		/* Considering the potential match at 'matchptr':  is it longer
188		 * than 'best_len'?
189		 *
190		 * The bytes at index 'best_len' are the most likely to differ,
191		 * so check them first.
192		 *
193		 * The bytes at indices 'best_len - 1' and '0' are less
194		 * important to check separately.  But doing so still gives a
195		 * slight performance improvement, at least on x86_64, probably
196		 * because they create separate branches for the CPU to predict
197		 * independently of the branches in the main comparison loops.
198		 */
199		if (matchptr[best_len] != strptr[best_len] ||
200		    matchptr[best_len - 1] != strptr[best_len - 1] ||
201		    matchptr[0] != strptr[0])
202			goto next_match;
203
204		for (len = 1; len < best_len - 1; len++)
205			if (matchptr[len] != strptr[len])
206				goto next_match;
207
208		/* The match is the longest found so far ---
209		 * at least 'best_len' + 1 bytes.  Continue extending it.  */
210
211		best_matchptr = matchptr;
212
213		do {
214			if (++best_len >= nice_len) {
215				/* 'nice_len' reached; don't waste time
216				 * searching for longer matches.  Extend the
217				 * match as far as possible and terminate the
218				 * search.  */
219				while (best_len < max_len &&
220					(best_matchptr[best_len] ==
221						strptr[best_len]))
222				{
223					best_len++;
224				}
225				goto out;
226			}
227		} while (best_matchptr[best_len] == strptr[best_len]);
228
229		/* Found a longer match, but 'nice_len' not yet reached.  */
230
231	next_match:
232		/* Continue to next match in the chain.  */
233		;
234	}
235
236	/* Reached end of chain, or ended early due to reaching the maximum
237	 * search depth.  */
238
239out:
240	/* Return the longest match we were able to find.  */
241	pctx->size = best_len;
242	pctx->rel = best_matchptr - strptr; /* given as a negative number! */
243}
244
245/*
246 *		Advance the match-finder, but don't search for matches.
247 */
248static void ntfs_skip_position(struct COMPRESS_CONTEXT *pctx, const int i)
249{
250	unsigned int hash;
251
252	if (pctx->bufsize - i < 4)
253		return;
254
255	/* Insert the current sequence into the appropriate hash chain.  */
256	hash = ntfs_hash(pctx->inbuf + i);
257	pctx->prev[i] = pctx->head[hash];
258	pctx->head[hash] = i;
259}
260
261/*
262 *		Compress a 4096-byte block
263 *
264 *	Returns a header of two bytes followed by the compressed data.
265 *	If compression is not effective, the header and an uncompressed
266 *	block is returned.
267 *
268 *	Note : two bytes may be output before output buffer overflow
269 *	is detected, so a 4100-bytes output buffer must be reserved.
270 *
271 *	Returns the size of the compressed block, including the
272 *			header (minimal size is 2, maximum size is 4098)
273 *		0 if an error has been met.
274 */
275
276static unsigned int ntfs_compress_block(const char *inbuf, const int bufsize,
277				char *outbuf)
278{
279	struct COMPRESS_CONTEXT *pctx;
280	int i; /* current position */
281	int j; /* end of best match from current position */
282	int k; /* end of best match from next position */
283	int offs; /* offset to best match */
284	int bp; /* bits to store offset */
285	int bp_cur; /* saved bits to store offset at current position */
286	int mxoff; /* max match offset : 1 << bp */
287	unsigned int xout;
288	unsigned int q; /* aggregated offset and size */
289	int have_match; /* do we have a match at the current position? */
290	char *ptag; /* location reserved for a tag */
291	int tag;    /* current value of tag */
292	int ntag;   /* count of bits still undefined in tag */
293
294	pctx = ntfs_malloc(sizeof(struct COMPRESS_CONTEXT));
295	if (!pctx) {
296		errno = ENOMEM;
297		return 0;
298	}
299
300	/* All hash chains start as empty.  The special value '-1' indicates the
301	 * end of each hash chain.  */
302	memset(pctx->head, 0xFF, sizeof(pctx->head));
303
304	pctx->inbuf = (const unsigned char*)inbuf;
305	pctx->bufsize = bufsize;
306	xout = 2;
307	i = 0;
308	bp = 4;
309	mxoff = 1 << bp;
310	pctx->mxsz = (1 << (16 - bp)) + 2;
311	have_match = 0;
312	tag = 0;
313	ntag = 8;
314	ptag = &outbuf[xout++];
315
316	while ((i < bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
317
318		/* This implementation uses "lazy" parsing: it always chooses
319		 * the longest match, unless the match at the next position is
320		 * longer.  This is the same strategy used by the high
321		 * compression modes of zlib.  */
322
323		if (!have_match) {
324			/* Find the longest match at the current position.  But
325			 * first adjust the maximum match length if needed.
326			 * (This loop might need to run more than one time in
327			 * the case that we just output a long match.)  */
328			while (mxoff < i) {
329				bp++;
330				mxoff <<= 1;
331				pctx->mxsz = (pctx->mxsz + 2) >> 1;
332			}
333			ntfs_best_match(pctx, i, 2);
334		}
335
336		if (pctx->size >= 3) {
337
338			/* Found a match at the current position.  */
339
340			j = i + pctx->size;
341			bp_cur = bp;
342			offs = pctx->rel;
343
344			if (pctx->size >= NICE_MATCH_LEN) {
345
346				/* Choose long matches immediately.  */
347
348				q = (~offs << (16 - bp_cur)) + (j - i - 3);
349				outbuf[xout++] = q & 255;
350				outbuf[xout++] = (q >> 8) & 255;
351				tag |= (1 << (8 - ntag));
352
353				if (j == bufsize) {
354					/* Shortcut if the match extends to the
355					 * end of the buffer.  */
356					i = j;
357					--ntag;
358					break;
359				}
360				i += 1;
361				do {
362					ntfs_skip_position(pctx, i);
363				} while (++i != j);
364				have_match = 0;
365			} else {
366				/* Check for a longer match at the next
367				 * position.  */
368
369				/* Doesn't need to be while() since we just
370				 * adjusted the maximum match length at the
371				 * previous position.  */
372				if (mxoff < i + 1) {
373					bp++;
374					mxoff <<= 1;
375					pctx->mxsz = (pctx->mxsz + 2) >> 1;
376				}
377				ntfs_best_match(pctx, i + 1, pctx->size);
378				k = i + 1 + pctx->size;
379
380				if (k > (j + 1)) {
381					/* Next match is longer.
382					 * Output a literal.  */
383					outbuf[xout++] = inbuf[i++];
384					have_match = 1;
385				} else {
386					/* Next match isn't longer.
387					 * Output the current match.  */
388					q = (~offs << (16 - bp_cur)) +
389							(j - i - 3);
390					outbuf[xout++] = q & 255;
391					outbuf[xout++] = (q >> 8) & 255;
392					tag |= (1 << (8 - ntag));
393
394					/* The minimum match length is 3, and
395					 * we've run two bytes through the
396					 * matchfinder already.  So the minimum
397					 * number of positions we need to skip
398					 * is 1.  */
399					i += 2;
400					do {
401						ntfs_skip_position(pctx, i);
402					} while (++i != j);
403					have_match = 0;
404				}
405			}
406		} else {
407			/* No match at current position.  Output a literal.  */
408			outbuf[xout++] = inbuf[i++];
409			have_match = 0;
410		}
411
412		/* Store the tag if fully used.  */
413		if (!--ntag) {
414			*ptag = tag;
415			ntag = 8;
416			ptag = &outbuf[xout++];
417			tag = 0;
418		}
419	}
420
421	/* Store the last tag if partially used.  */
422	if (ntag == 8)
423		xout--;
424	else
425		*ptag = tag;
426
427	/* Determine whether to store the data compressed or uncompressed.  */
428
429	if ((i >= bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
430		/* Compressed.  */
431		outbuf[0] = (xout - 3) & 255;
432		outbuf[1] = 0xb0 + (((xout - 3) >> 8) & 15);
433	} else {
434		/* Uncompressed.  */
435		memcpy(&outbuf[2], inbuf, bufsize);
436		if (bufsize < NTFS_SB_SIZE)
437			memset(&outbuf[bufsize + 2], 0, NTFS_SB_SIZE - bufsize);
438		outbuf[0] = 0xff;
439		outbuf[1] = 0x3f;
440		xout = NTFS_SB_SIZE + 2;
441	}
442
443	/* Free the compression context and return the total number of bytes
444	 * written to 'outbuf'.  */
445	free(pctx);
446	return (xout);
447}
448
449/**
450 * ntfs_decompress - decompress a compression block into an array of pages
451 * @dest:	buffer to which to write the decompressed data
452 * @dest_size:	size of buffer @dest in bytes
453 * @cb_start:	compression block to decompress
454 * @cb_size:	size of compression block @cb_start in bytes
455 *
456 * This decompresses the compression block @cb_start into the destination
457 * buffer @dest.
458 *
459 * @cb_start is a pointer to the compression block which needs decompressing
460 * and @cb_size is the size of @cb_start in bytes (8-64kiB).
461 *
462 * Return 0 if success or -EOVERFLOW on error in the compressed stream.
463 */
464static int ntfs_decompress(u8 *dest, const u32 dest_size,
465		u8 *const cb_start, const u32 cb_size)
466{
467	/*
468	 * Pointers into the compressed data, i.e. the compression block (cb),
469	 * and the therein contained sub-blocks (sb).
470	 */
471	u8 *cb_end = cb_start + cb_size; /* End of cb. */
472	u8 *cb = cb_start;	/* Current position in cb. */
473	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
474	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
475	/* Variables for uncompressed data / destination. */
476	u8 *dest_end = dest + dest_size;	/* End of dest buffer. */
477	u8 *dest_sb_start;	/* Start of current sub-block in dest. */
478	u8 *dest_sb_end;	/* End of current sb in dest. */
479	/* Variables for tag and token parsing. */
480	u8 tag;			/* Current tag. */
481	int token;		/* Loop counter for the eight tokens in tag. */
482
483	ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
484do_next_sb:
485	ntfs_log_debug("Beginning sub-block at offset = %d in the cb.\n",
486			(int)(cb - cb_start));
487	/*
488	 * Have we reached the end of the compression block or the end of the
489	 * decompressed data?  The latter can happen for example if the current
490	 * position in the compression block is one byte before its end so the
491	 * first two checks do not detect it.
492	 */
493	if (cb == cb_end || !le16_to_cpup((le16*)cb) || dest == dest_end) {
494		if (dest_end > dest)
495			memset(dest, 0, dest_end - dest);
496		ntfs_log_debug("Completed. Returning success (0).\n");
497		return 0;
498	}
499	/* Setup offset for the current sub-block destination. */
500	dest_sb_start = dest;
501	dest_sb_end = dest + NTFS_SB_SIZE;
502	/* Check that we are still within allowed boundaries. */
503	if (dest_sb_end > dest_end)
504		goto return_overflow;
505	/* Does the minimum size of a compressed sb overflow valid range? */
506	if (cb + 6 > cb_end)
507		goto return_overflow;
508	/* Setup the current sub-block source pointers and validate range. */
509	cb_sb_start = cb;
510	cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK)
511			+ 3;
512	if (cb_sb_end > cb_end)
513		goto return_overflow;
514	/* Now, we are ready to process the current sub-block (sb). */
515	if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) {
516		ntfs_log_debug("Found uncompressed sub-block.\n");
517		/* This sb is not compressed, just copy it into destination. */
518		/* Advance source position to first data byte. */
519		cb += 2;
520		/* An uncompressed sb must be full size. */
521		if (cb_sb_end - cb != NTFS_SB_SIZE)
522			goto return_overflow;
523		/* Copy the block and advance the source position. */
524		memcpy(dest, cb, NTFS_SB_SIZE);
525		cb += NTFS_SB_SIZE;
526		/* Advance destination position to next sub-block. */
527		dest += NTFS_SB_SIZE;
528		goto do_next_sb;
529	}
530	ntfs_log_debug("Found compressed sub-block.\n");
531	/* This sb is compressed, decompress it into destination. */
532	/* Forward to the first tag in the sub-block. */
533	cb += 2;
534do_next_tag:
535	if (cb == cb_sb_end) {
536		/* Check if the decompressed sub-block was not full-length. */
537		if (dest < dest_sb_end) {
538			int nr_bytes = dest_sb_end - dest;
539
540			ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
541			/* Zero remainder and update destination position. */
542			memset(dest, 0, nr_bytes);
543			dest += nr_bytes;
544		}
545		/* We have finished the current sub-block. */
546		goto do_next_sb;
547	}
548	/* Check we are still in range. */
549	if (cb > cb_sb_end || dest > dest_sb_end)
550		goto return_overflow;
551	/* Get the next tag and advance to first token. */
552	tag = *cb++;
553	/* Parse the eight tokens described by the tag. */
554	for (token = 0; token < 8; token++, tag >>= 1) {
555		u16 lg, pt, length, max_non_overlap;
556		register u16 i;
557		u8 *dest_back_addr;
558
559		/* Check if we are done / still in range. */
560		if (cb >= cb_sb_end || dest > dest_sb_end)
561			break;
562		/* Determine token type and parse appropriately.*/
563		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
564			/*
565			 * We have a symbol token, copy the symbol across, and
566			 * advance the source and destination positions.
567			 */
568			*dest++ = *cb++;
569			/* Continue with the next token. */
570			continue;
571		}
572		/*
573		 * We have a phrase token. Make sure it is not the first tag in
574		 * the sb as this is illegal and would confuse the code below.
575		 */
576		if (dest == dest_sb_start)
577			goto return_overflow;
578		/*
579		 * Determine the number of bytes to go back (p) and the number
580		 * of bytes to copy (l). We use an optimized algorithm in which
581		 * we first calculate log2(current destination position in sb),
582		 * which allows determination of l and p in O(1) rather than
583		 * O(n). We just need an arch-optimized log2() function now.
584		 */
585		lg = 0;
586		for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
587			lg++;
588		/* Get the phrase token into i. */
589		pt = le16_to_cpup((le16*)cb);
590		/*
591		 * Calculate starting position of the byte sequence in
592		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
593		 * and make sure we don't go too far back.
594		 */
595		dest_back_addr = dest - (pt >> (12 - lg)) - 1;
596		if (dest_back_addr < dest_sb_start)
597			goto return_overflow;
598		/* Now calculate the length of the byte sequence. */
599		length = (pt & (0xfff >> lg)) + 3;
600		/* Verify destination is in range. */
601		if (dest + length > dest_sb_end)
602			goto return_overflow;
603		/* The number of non-overlapping bytes. */
604		max_non_overlap = dest - dest_back_addr;
605		if (length <= max_non_overlap) {
606			/* The byte sequence doesn't overlap, just copy it. */
607			memcpy(dest, dest_back_addr, length);
608			/* Advance destination pointer. */
609			dest += length;
610		} else {
611			/*
612			 * The byte sequence does overlap, copy non-overlapping
613			 * part and then do a slow byte by byte copy for the
614			 * overlapping part. Also, advance the destination
615			 * pointer.
616			 */
617			memcpy(dest, dest_back_addr, max_non_overlap);
618			dest += max_non_overlap;
619			dest_back_addr += max_non_overlap;
620			length -= max_non_overlap;
621			while (length--)
622				*dest++ = *dest_back_addr++;
623		}
624		/* Advance source position and continue with the next token. */
625		cb += 2;
626	}
627	/* No tokens left in the current tag. Continue with the next tag. */
628	goto do_next_tag;
629return_overflow:
630	errno = EOVERFLOW;
631	ntfs_log_perror("Failed to decompress file");
632	return -1;
633}
634
635/**
636 * ntfs_is_cb_compressed - internal function, do not use
637 *
638 * This is a very specialised function determining if a cb is compressed or
639 * uncompressed.  It is assumed that checking for a sparse cb has already been
640 * performed and that the cb is not sparse.  It makes all sorts of other
641 * assumptions as well and hence it is not useful anywhere other than where it
642 * is used at the moment.  Please, do not make this function available for use
643 * outside of compress.c as it is bound to confuse people and not do what they
644 * want.
645 *
646 * Return TRUE on errors so that the error will be detected later on in the
647 * code.  Might be a bit confusing to debug but there really should never be
648 * errors coming from here.
649 */
650static BOOL ntfs_is_cb_compressed(ntfs_attr *na, runlist_element *rl,
651				  VCN cb_start_vcn, int cb_clusters)
652{
653	/*
654	 * The simplest case: the run starting at @cb_start_vcn contains
655	 * @cb_clusters clusters which are all not sparse, thus the cb is not
656	 * compressed.
657	 */
658restart:
659	cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
660	while (cb_clusters > 0) {
661		/* Go to the next run. */
662		rl++;
663		/* Map the next runlist fragment if it is not mapped. */
664		if (rl->lcn < LCN_HOLE || !rl->length) {
665			cb_start_vcn = rl->vcn;
666			rl = ntfs_attr_find_vcn(na, rl->vcn);
667			if (!rl || rl->lcn < LCN_HOLE || !rl->length)
668				return TRUE;
669			/*
670			 * If the runs were merged need to deal with the
671			 * resulting partial run so simply restart.
672			 */
673			if (rl->vcn < cb_start_vcn)
674				goto restart;
675		}
676		/* If the current run is sparse, the cb is compressed. */
677		if (rl->lcn == LCN_HOLE)
678			return TRUE;
679		/* If the whole cb is not sparse, it is not compressed. */
680		if (rl->length >= cb_clusters)
681			return FALSE;
682		cb_clusters -= rl->length;
683	};
684	/* All cb_clusters were not sparse thus the cb is not compressed. */
685	return FALSE;
686}
687
688/**
689 * ntfs_compressed_attr_pread - read from a compressed attribute
690 * @na:		ntfs attribute to read from
691 * @pos:	byte position in the attribute to begin reading from
692 * @count:	number of bytes to read
693 * @b:		output data buffer
694 *
695 * NOTE:  You probably want to be using attrib.c::ntfs_attr_pread() instead.
696 *
697 * This function will read @count bytes starting at offset @pos from the
698 * compressed ntfs attribute @na into the data buffer @b.
699 *
700 * On success, return the number of successfully read bytes.  If this number
701 * is lower than @count this means that the read reached end of file or that
702 * an error was encountered during the read so that the read is partial.
703 * 0 means end of file or nothing was read (also return 0 when @count is 0).
704 *
705 * On error and nothing has been read, return -1 with errno set appropriately
706 * to the return code of ntfs_pread(), or to EINVAL in case of invalid
707 * arguments.
708 */
709s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
710{
711	s64 br, to_read, ofs, total, total2;
712	u64 cb_size_mask;
713	VCN start_vcn, vcn, end_vcn;
714	ntfs_volume *vol;
715	runlist_element *rl;
716	u8 *dest, *cb, *cb_pos, *cb_end;
717	u32 cb_size;
718	int err;
719	ATTR_FLAGS data_flags;
720	FILE_ATTR_FLAGS compression;
721	unsigned int nr_cbs, cb_clusters;
722
723	ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
724			(unsigned long long)na->ni->mft_no, le32_to_cpu(na->type),
725			(long long)pos, (long long)count);
726	data_flags = na->data_flags;
727	compression = na->ni->flags & FILE_ATTR_COMPRESSED;
728	if (!na || !na->ni || !na->ni->vol || !b
729			|| ((data_flags & ATTR_COMPRESSION_MASK)
730				!= ATTR_IS_COMPRESSED)
731			|| pos < 0 || count < 0) {
732		errno = EINVAL;
733		return -1;
734	}
735	/*
736	 * Encrypted attributes are not supported.  We return access denied,
737	 * which is what Windows NT4 does, too.
738	 */
739	if (NAttrEncrypted(na)) {
740		errno = EACCES;
741		return -1;
742	}
743	if (!count)
744		return 0;
745	/* Truncate reads beyond end of attribute. */
746	if (pos + count > na->data_size) {
747		if (pos >= na->data_size) {
748			return 0;
749		}
750		count = na->data_size - pos;
751	}
752	/* If it is a resident attribute, simply use ntfs_attr_pread(). */
753	if (!NAttrNonResident(na))
754		return ntfs_attr_pread(na, pos, count, b);
755	if (na->compression_block_size < NTFS_SB_SIZE) {
756		ntfs_log_error("Unsupported compression block size %ld\n",
757				(long)na->compression_block_size);
758		errno = EOVERFLOW;
759		return (-1);
760	}
761	total = total2 = 0;
762	/* Zero out reads beyond initialized size. */
763	if (pos + count > na->initialized_size) {
764		if (pos >= na->initialized_size) {
765			memset(b, 0, count);
766			return count;
767		}
768		total2 = pos + count - na->initialized_size;
769		count -= total2;
770		memset((u8*)b + count, 0, total2);
771	}
772	vol = na->ni->vol;
773	cb_size = na->compression_block_size;
774	cb_size_mask = cb_size - 1UL;
775	cb_clusters = na->compression_block_clusters;
776
777	/* Need a temporary buffer for each loaded compression block. */
778	cb = (u8*)ntfs_malloc(cb_size);
779	if (!cb)
780		return -1;
781
782	/* Need a temporary buffer for each uncompressed block. */
783	dest = (u8*)ntfs_malloc(cb_size);
784	if (!dest) {
785		free(cb);
786		return -1;
787	}
788	/*
789	 * The first vcn in the first compression block (cb) which we need to
790	 * decompress.
791	 */
792	start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
793	/* Offset in the uncompressed cb at which to start reading data. */
794	ofs = pos & cb_size_mask;
795	/*
796	 * The first vcn in the cb after the last cb which we need to
797	 * decompress.
798	 */
799	end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
800			vol->cluster_size_bits;
801	/* Number of compression blocks (cbs) in the wanted vcn range. */
802	nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
803			na->compression_block_size_bits;
804	cb_end = cb + cb_size;
805do_next_cb:
806	nr_cbs--;
807	cb_pos = cb;
808	vcn = start_vcn;
809	start_vcn += cb_clusters;
810
811	/* Check whether the compression block is sparse. */
812	rl = ntfs_attr_find_vcn(na, vcn);
813	if (!rl || rl->lcn < LCN_HOLE) {
814		free(cb);
815		free(dest);
816		if (total)
817			return total;
818		/* FIXME: Do we want EIO or the error code? (AIA) */
819		errno = EIO;
820		return -1;
821	}
822	if (rl->lcn == LCN_HOLE) {
823		/* Sparse cb, zero out destination range overlapping the cb. */
824		ntfs_log_debug("Found sparse compression block.\n");
825		to_read = min(count, cb_size - ofs);
826		memset(b, 0, to_read);
827		ofs = 0;
828		total += to_read;
829		count -= to_read;
830		b = (u8*)b + to_read;
831	} else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
832		s64 tdata_size, tinitialized_size;
833		/*
834		 * Uncompressed cb, read it straight into the destination range
835		 * overlapping the cb.
836		 */
837		ntfs_log_debug("Found uncompressed compression block.\n");
838		/*
839		 * Read the uncompressed data into the destination buffer.
840		 * NOTE: We cheat a little bit here by marking the attribute as
841		 * not compressed in the ntfs_attr structure so that we can
842		 * read the data by simply using ntfs_attr_pread().  (-8
843		 * NOTE: we have to modify data_size and initialized_size
844		 * temporarily as well...
845		 */
846		to_read = min(count, cb_size - ofs);
847		ofs += vcn << vol->cluster_size_bits;
848		NAttrClearCompressed(na);
849		na->data_flags &= ~ATTR_COMPRESSION_MASK;
850		tdata_size = na->data_size;
851		tinitialized_size = na->initialized_size;
852		na->data_size = na->initialized_size = na->allocated_size;
853		do {
854			br = ntfs_attr_pread(na, ofs, to_read, b);
855			if (br <= 0) {
856				if (!br) {
857					ntfs_log_error("Failed to read an"
858						" uncompressed cluster,"
859						" inode %lld offs 0x%llx\n",
860						(long long)na->ni->mft_no,
861						(long long)ofs);
862					errno = EIO;
863				}
864				err = errno;
865				na->data_size = tdata_size;
866				na->initialized_size = tinitialized_size;
867				na->ni->flags |= compression;
868				na->data_flags = data_flags;
869				free(cb);
870				free(dest);
871				if (total)
872					return total;
873				errno = err;
874				return br;
875			}
876			total += br;
877			count -= br;
878			b = (u8*)b + br;
879			to_read -= br;
880			ofs += br;
881		} while (to_read > 0);
882		na->data_size = tdata_size;
883		na->initialized_size = tinitialized_size;
884		na->ni->flags |= compression;
885		na->data_flags = data_flags;
886		ofs = 0;
887	} else {
888		s64 tdata_size, tinitialized_size;
889		u32 decompsz;
890
891		/*
892		 * Compressed cb, decompress it into the temporary buffer, then
893		 * copy the data to the destination range overlapping the cb.
894		 */
895		ntfs_log_debug("Found compressed compression block.\n");
896		/*
897		 * Read the compressed data into the temporary buffer.
898		 * NOTE: We cheat a little bit here by marking the attribute as
899		 * not compressed in the ntfs_attr structure so that we can
900		 * read the raw, compressed data by simply using
901		 * ntfs_attr_pread().  (-8
902		 * NOTE: We have to modify data_size and initialized_size
903		 * temporarily as well...
904		 */
905		to_read = cb_size;
906		NAttrClearCompressed(na);
907		na->data_flags &= ~ATTR_COMPRESSION_MASK;
908		tdata_size = na->data_size;
909		tinitialized_size = na->initialized_size;
910		na->data_size = na->initialized_size = na->allocated_size;
911		do {
912			br = ntfs_attr_pread(na,
913					(vcn << vol->cluster_size_bits) +
914					(cb_pos - cb), to_read, cb_pos);
915			if (br <= 0) {
916				if (!br) {
917					ntfs_log_error("Failed to read a"
918						" compressed cluster, "
919						" inode %lld offs 0x%llx\n",
920						(long long)na->ni->mft_no,
921						(long long)(vcn << vol->cluster_size_bits));
922					errno = EIO;
923				}
924				err = errno;
925				na->data_size = tdata_size;
926				na->initialized_size = tinitialized_size;
927				na->ni->flags |= compression;
928				na->data_flags = data_flags;
929				free(cb);
930				free(dest);
931				if (total)
932					return total;
933				errno = err;
934				return br;
935			}
936			cb_pos += br;
937			to_read -= br;
938		} while (to_read > 0);
939		na->data_size = tdata_size;
940		na->initialized_size = tinitialized_size;
941		na->ni->flags |= compression;
942		na->data_flags = data_flags;
943		/* Just a precaution. */
944		if (cb_pos + 2 <= cb_end)
945			*(u16*)cb_pos = 0;
946		ntfs_log_debug("Successfully read the compression block.\n");
947		/* Do not decompress beyond the requested block */
948		to_read = min(count, cb_size - ofs);
949		decompsz = ((ofs + to_read - 1) | (NTFS_SB_SIZE - 1)) + 1;
950		if (ntfs_decompress(dest, decompsz, cb, cb_size) < 0) {
951			err = errno;
952			free(cb);
953			free(dest);
954			if (total)
955				return total;
956			errno = err;
957			return -1;
958		}
959		memcpy(b, dest + ofs, to_read);
960		total += to_read;
961		count -= to_read;
962		b = (u8*)b + to_read;
963		ofs = 0;
964	}
965	/* Do we have more work to do? */
966	if (nr_cbs)
967		goto do_next_cb;
968	/* We no longer need the buffers. */
969	free(cb);
970	free(dest);
971	/* Return number of bytes read. */
972	return total + total2;
973}
974
975/*
976 *		Read data from a set of clusters
977 *
978 *	Returns the amount of data read
979 */
980
981static u32 read_clusters(ntfs_volume *vol, const runlist_element *rl,
982			s64 offs, u32 to_read, char *inbuf)
983{
984	u32 count;
985	int xgot;
986	u32 got;
987	s64 xpos;
988	BOOL first;
989	char *xinbuf;
990	const runlist_element *xrl;
991
992	got = 0;
993	xrl = rl;
994	xinbuf = inbuf;
995	first = TRUE;
996	do {
997		count = xrl->length << vol->cluster_size_bits;
998		xpos = xrl->lcn << vol->cluster_size_bits;
999		if (first) {
1000			count -= offs;
1001			xpos += offs;
1002		}
1003		if ((to_read - got) < count)
1004			count = to_read - got;
1005		xgot = ntfs_pread(vol->dev, xpos, count, xinbuf);
1006		if (xgot == (int)count) {
1007			got += count;
1008			xpos += count;
1009			xinbuf += count;
1010			xrl++;
1011		}
1012		first = FALSE;
1013	} while ((xgot == (int)count) && (got < to_read));
1014	return (got);
1015}
1016
1017/*
1018 *		Write data to a set of clusters
1019 *
1020 *	Returns the amount of data written
1021 */
1022
1023static s32 write_clusters(ntfs_volume *vol, const runlist_element *rl,
1024			s64 offs, s32 to_write, const char *outbuf)
1025{
1026	s32 count;
1027	s32 put, xput;
1028	s64 xpos;
1029	BOOL first;
1030	const char *xoutbuf;
1031	const runlist_element *xrl;
1032
1033	put = 0;
1034	xrl = rl;
1035	xoutbuf = outbuf;
1036	first = TRUE;
1037	do {
1038		count = xrl->length << vol->cluster_size_bits;
1039		xpos = xrl->lcn << vol->cluster_size_bits;
1040		if (first) {
1041			count -= offs;
1042			xpos += offs;
1043		}
1044		if ((to_write - put) < count)
1045			count = to_write - put;
1046		xput = ntfs_pwrite(vol->dev, xpos, count, xoutbuf);
1047		if (xput == count) {
1048			put += count;
1049			xpos += count;
1050			xoutbuf += count;
1051			xrl++;
1052		}
1053		first = FALSE;
1054	} while ((xput == count) && (put < to_write));
1055	return (put);
1056}
1057
1058
1059/*
1060 *		Compress and write a set of blocks
1061 *
1062 *	returns the size actually written (rounded to a full cluster)
1063 *		or 0 if all zeroes (nothing is written)
1064 *		or -1 if could not compress (nothing is written)
1065 *		or -2 if there were an irrecoverable error (errno set)
1066 */
1067
1068static s32 ntfs_comp_set(ntfs_attr *na, runlist_element *rl,
1069			s64 offs, u32 insz, const char *inbuf)
1070{
1071	ntfs_volume *vol;
1072	char *outbuf;
1073	char *pbuf;
1074	u32 compsz;
1075	s32 written;
1076	s32 rounded;
1077	unsigned int clsz;
1078	u32 p;
1079	unsigned int sz;
1080	unsigned int bsz;
1081	BOOL fail;
1082	BOOL allzeroes;
1083		/* a single compressed zero */
1084	static char onezero[] = { 0x01, 0xb0, 0x00, 0x00 } ;
1085		/* a couple of compressed zeroes */
1086	static char twozeroes[] = { 0x02, 0xb0, 0x00, 0x00, 0x00 } ;
1087		/* more compressed zeroes, to be followed by some count */
1088	static char morezeroes[] = { 0x03, 0xb0, 0x02, 0x00 } ;
1089
1090	vol = na->ni->vol;
1091	written = -1; /* default return */
1092	clsz = 1 << vol->cluster_size_bits;
1093		/* may need 2 extra bytes per block and 2 more bytes */
1094	outbuf = (char*)ntfs_malloc(na->compression_block_size
1095			+ 2*(na->compression_block_size/NTFS_SB_SIZE)
1096			+ 2);
1097	if (outbuf) {
1098		fail = FALSE;
1099		compsz = 0;
1100		allzeroes = TRUE;
1101		for (p=0; (p<insz) && !fail; p+=NTFS_SB_SIZE) {
1102			if ((p + NTFS_SB_SIZE) < insz)
1103				bsz = NTFS_SB_SIZE;
1104			else
1105				bsz = insz - p;
1106			pbuf = &outbuf[compsz];
1107			sz = ntfs_compress_block(&inbuf[p],bsz,pbuf);
1108			/* fail if all the clusters (or more) are needed */
1109			if (!sz || ((compsz + sz + clsz + 2)
1110					 > na->compression_block_size))
1111				fail = TRUE;
1112			else {
1113				if (allzeroes) {
1114				/* check whether this is all zeroes */
1115					switch (sz) {
1116					case 4 :
1117						allzeroes = !memcmp(
1118							pbuf,onezero,4);
1119						break;
1120					case 5 :
1121						allzeroes = !memcmp(
1122							pbuf,twozeroes,5);
1123						break;
1124					case 6 :
1125						allzeroes = !memcmp(
1126							pbuf,morezeroes,4);
1127						break;
1128					default :
1129						allzeroes = FALSE;
1130						break;
1131					}
1132				}
1133			compsz += sz;
1134			}
1135		}
1136		if (!fail && !allzeroes) {
1137			/* add a couple of null bytes, space has been checked */
1138			outbuf[compsz++] = 0;
1139			outbuf[compsz++] = 0;
1140			/* write a full cluster, to avoid partial reading */
1141			rounded = ((compsz - 1) | (clsz - 1)) + 1;
1142			memset(&outbuf[compsz], 0, rounded - compsz);
1143			written = write_clusters(vol, rl, offs, rounded, outbuf);
1144			if (written != rounded) {
1145				/*
1146				 * TODO : previously written text has been
1147				 * spoilt, should return a specific error
1148				 */
1149				ntfs_log_error("error writing compressed data\n");
1150				errno = EIO;
1151				written = -2;
1152			}
1153		} else
1154			if (!fail)
1155				written = 0;
1156		free(outbuf);
1157	}
1158	return (written);
1159}
1160
1161/*
1162 *		Check the validity of a compressed runlist
1163 *	The check starts at the beginning of current run and ends
1164 *	at the end of runlist
1165 *	errno is set if the runlist is not valid
1166 */
1167
1168static BOOL valid_compressed_run(ntfs_attr *na, runlist_element *rl,
1169			BOOL fullcheck, const char *text)
1170{
1171	runlist_element *xrl;
1172	const char *err;
1173	BOOL ok = TRUE;
1174
1175	xrl = rl;
1176	while (xrl->vcn & (na->compression_block_clusters - 1))
1177		xrl--;
1178	err = (const char*)NULL;
1179	while (xrl->length) {
1180		if ((xrl->vcn + xrl->length) != xrl[1].vcn)
1181			err = "Runs not adjacent";
1182		if (xrl->lcn == LCN_HOLE) {
1183			if ((xrl->vcn + xrl->length)
1184			    & (na->compression_block_clusters - 1)) {
1185				err = "Invalid hole";
1186			}
1187			if (fullcheck && (xrl[1].lcn == LCN_HOLE)) {
1188				err = "Adjacent holes";
1189			}
1190		}
1191		if (err) {
1192			ntfs_log_error("%s at %s index %ld inode %lld\n",
1193				err, text, (long)(xrl - na->rl),
1194				(long long)na->ni->mft_no);
1195			errno = EIO;
1196			ok = FALSE;
1197			err = (const char*)NULL;
1198		}
1199		xrl++;
1200	}
1201	return (ok);
1202}
1203
1204/*
1205 *		Free unneeded clusters after overwriting compressed data
1206 *
1207 *	This generally requires one or two empty slots at the end of runlist,
1208 *	but we do not want to reallocate the runlist here because
1209 *	there are many pointers to it.
1210 *	So the empty slots have to be reserved beforehand
1211 *
1212 *	Returns zero unless some error occurred (described by errno)
1213 *
1214 *         +======= start of block =====+
1215 *      0  |A     chunk may overflow    | <-- rl         usedcnt : A + B
1216 *         |A     on previous block     |                        then B
1217 *         |A                           |
1218 *         +-- end of allocated chunk --+                freelength : C
1219 *         |B                           |                      (incl overflow)
1220 *         +== end of compressed data ==+
1221 *         |C                           | <-- freerl     freecnt : C + D
1222 *         |C     chunk may overflow    |
1223 *         |C     on next block         |
1224 *         +-- end of allocated chunk --+
1225 *         |D                           |
1226 *         |D     chunk may overflow    |
1227 *     15  |D     on next block         |
1228 *         +======== end of block ======+
1229 *
1230 */
1231
1232static int ntfs_compress_overwr_free(ntfs_attr *na, runlist_element *rl,
1233			s32 usedcnt, s32 freecnt, VCN *update_from)
1234{
1235	BOOL beginhole;
1236	BOOL mergeholes;
1237	s32 oldlength;
1238	s32 freelength;
1239	s64 freelcn;
1240	s64 freevcn;
1241	runlist_element *freerl;
1242	ntfs_volume *vol;
1243	s32 carry;
1244	int res;
1245
1246	vol = na->ni->vol;
1247	res = 0;
1248	freelcn = rl->lcn + usedcnt;
1249	freevcn = rl->vcn + usedcnt;
1250	freelength = rl->length - usedcnt;
1251	beginhole = !usedcnt && !rl->vcn;
1252		/* can merge with hole before ? */
1253	mergeholes = !usedcnt
1254			&& rl[0].vcn
1255			&& (rl[-1].lcn == LCN_HOLE);
1256		/* truncate current run, carry to subsequent hole */
1257	carry = freelength;
1258	oldlength = rl->length;
1259	if (mergeholes) {
1260			/* merging with a hole before */
1261		freerl = rl;
1262	} else {
1263		rl->length -= freelength; /* warning : can be zero */
1264		freerl = ++rl;
1265	}
1266	if (!mergeholes && (usedcnt || beginhole)) {
1267		s32 freed;
1268		runlist_element *frl;
1269		runlist_element *erl;
1270		int holes = 0;
1271		BOOL threeparts;
1272
1273		/* free the unneeded clusters from initial run, then freerl */
1274		threeparts = (freelength > freecnt);
1275		freed = 0;
1276		frl = freerl;
1277		if (freelength) {
1278      			res = ntfs_cluster_free_basic(vol,freelcn,
1279				(threeparts ? freecnt : freelength));
1280			if (!res)
1281				freed += (threeparts ? freecnt : freelength);
1282			if (!usedcnt) {
1283				holes++;
1284				freerl--;
1285				freerl->length += (threeparts
1286						? freecnt : freelength);
1287				if (freerl->vcn < *update_from)
1288					*update_from = freerl->vcn;
1289			}
1290   		}
1291   		while (!res && frl->length && (freed < freecnt)) {
1292      			if (frl->length <= (freecnt - freed)) {
1293         			res = ntfs_cluster_free_basic(vol, frl->lcn,
1294						frl->length);
1295				if (!res) {
1296         				freed += frl->length;
1297         				frl->lcn = LCN_HOLE;
1298					frl->length += carry;
1299					carry = 0;
1300         				holes++;
1301				}
1302      			} else {
1303         			res = ntfs_cluster_free_basic(vol, frl->lcn,
1304						freecnt - freed);
1305				if (!res) {
1306         				frl->lcn += freecnt - freed;
1307         				frl->vcn += freecnt - freed;
1308         				frl->length -= freecnt - freed;
1309         				freed = freecnt;
1310				}
1311      			}
1312      			frl++;
1313   		}
1314		na->compressed_size -= freed << vol->cluster_size_bits;
1315		switch (holes) {
1316		case 0 :
1317			/* there are no hole, must insert one */
1318			/* space for hole has been prereserved */
1319			if (freerl->lcn == LCN_HOLE) {
1320				if (threeparts) {
1321					erl = freerl;
1322					while (erl->length)
1323						erl++;
1324					do {
1325						erl[2] = *erl;
1326					} while (erl-- != freerl);
1327
1328					freerl[1].length = freelength - freecnt;
1329					freerl->length = freecnt;
1330					freerl[1].lcn = freelcn + freecnt;
1331					freerl[1].vcn = freevcn + freecnt;
1332					freerl[2].lcn = LCN_HOLE;
1333					freerl[2].vcn = freerl[1].vcn
1334							+ freerl[1].length;
1335					freerl->vcn = freevcn;
1336				} else {
1337					freerl->vcn = freevcn;
1338					freerl->length += freelength;
1339				}
1340			} else {
1341				erl = freerl;
1342				while (erl->length)
1343					erl++;
1344				if (threeparts) {
1345					do {
1346						erl[2] = *erl;
1347					} while (erl-- != freerl);
1348					freerl[1].lcn = freelcn + freecnt;
1349					freerl[1].vcn = freevcn + freecnt;
1350					freerl[1].length = oldlength - usedcnt - freecnt;
1351				} else {
1352					do {
1353						erl[1] = *erl;
1354					} while (erl-- != freerl);
1355				}
1356				freerl->lcn = LCN_HOLE;
1357				freerl->vcn = freevcn;
1358				freerl->length = freecnt;
1359			}
1360			break;
1361		case 1 :
1362			/* there is a single hole, may have to merge */
1363			freerl->vcn = freevcn;
1364			freerl->length = freecnt;
1365			if (freerl[1].lcn == LCN_HOLE) {
1366				freerl->length += freerl[1].length;
1367				erl = freerl;
1368				do {
1369					erl++;
1370					*erl = erl[1];
1371				} while (erl->length);
1372			}
1373			break;
1374		default :
1375			/* there were several holes, must merge them */
1376			freerl->lcn = LCN_HOLE;
1377			freerl->vcn = freevcn;
1378			freerl->length = freecnt;
1379			if (freerl[holes].lcn == LCN_HOLE) {
1380				freerl->length += freerl[holes].length;
1381				holes++;
1382			}
1383			erl = freerl;
1384			do {
1385				erl++;
1386				*erl = erl[holes - 1];
1387			} while (erl->length);
1388			break;
1389		}
1390	} else {
1391		s32 freed;
1392		runlist_element *frl;
1393		runlist_element *xrl;
1394
1395		freed = 0;
1396		frl = freerl--;
1397		if (freerl->vcn < *update_from)
1398			*update_from = freerl->vcn;
1399		while (!res && frl->length && (freed < freecnt)) {
1400			if (frl->length <= (freecnt - freed)) {
1401				freerl->length += frl->length;
1402				freed += frl->length;
1403				res = ntfs_cluster_free_basic(vol, frl->lcn,
1404						frl->length);
1405				frl++;
1406			} else {
1407				freerl->length += freecnt - freed;
1408				res = ntfs_cluster_free_basic(vol, frl->lcn,
1409						freecnt - freed);
1410				frl->lcn += freecnt - freed;
1411				frl->vcn += freecnt - freed;
1412				frl->length -= freecnt - freed;
1413				freed = freecnt;
1414			}
1415		}
1416			/* remove unneded runlist entries */
1417		xrl = freerl;
1418			/* group with next run if also a hole */
1419		if (frl->length && (frl->lcn == LCN_HOLE)) {
1420			xrl->length += frl->length;
1421			frl++;
1422		}
1423		while (frl->length) {
1424			*++xrl = *frl++;
1425		}
1426		*++xrl = *frl; /* terminator */
1427	na->compressed_size -= freed << vol->cluster_size_bits;
1428	}
1429	return (res);
1430}
1431
1432
1433/*
1434 *		Free unneeded clusters after compression
1435 *
1436 *	This generally requires one or two empty slots at the end of runlist,
1437 *	but we do not want to reallocate the runlist here because
1438 *	there are many pointers to it.
1439 *	So the empty slots have to be reserved beforehand
1440 *
1441 *	Returns zero unless some error occurred (described by errno)
1442 */
1443
1444static int ntfs_compress_free(ntfs_attr *na, runlist_element *rl,
1445				s64 used, s64 reserved, BOOL appending,
1446				VCN *update_from)
1447{
1448	s32 freecnt;
1449	s32 usedcnt;
1450	int res;
1451	s64 freelcn;
1452	s64 freevcn;
1453	s32 freelength;
1454	BOOL mergeholes;
1455	BOOL beginhole;
1456	ntfs_volume *vol;
1457	runlist_element *freerl;
1458
1459	res = -1; /* default return */
1460	vol = na->ni->vol;
1461	freecnt = (reserved - used) >> vol->cluster_size_bits;
1462	usedcnt = (reserved >> vol->cluster_size_bits) - freecnt;
1463	if (rl->vcn < *update_from)
1464		*update_from = rl->vcn;
1465		/* skip entries fully used, if any */
1466	while (rl->length && (rl->length < usedcnt)) {
1467		usedcnt -= rl->length; /* must be > 0 */
1468		rl++;
1469	}
1470	if (rl->length) {
1471		/*
1472		 * Splitting the current allocation block requires
1473		 * an extra runlist element to create the hole.
1474		 * The required entry has been prereserved when
1475		 * mapping the runlist.
1476		 */
1477			/* get the free part in initial run */
1478		freelcn = rl->lcn + usedcnt;
1479		freevcn = rl->vcn + usedcnt;
1480			/* new count of allocated clusters */
1481		if (!((freevcn + freecnt)
1482			    & (na->compression_block_clusters - 1))) {
1483			if (!appending)
1484				res = ntfs_compress_overwr_free(na,rl,
1485						usedcnt,freecnt,update_from);
1486			else {
1487				freelength = rl->length - usedcnt;
1488				beginhole = !usedcnt && !rl->vcn;
1489				mergeholes = !usedcnt
1490						&& rl[0].vcn
1491						&& (rl[-1].lcn == LCN_HOLE);
1492				if (mergeholes) {
1493					s32 carry;
1494
1495				/* shorten the runs which have free space */
1496					carry = freecnt;
1497					freerl = rl;
1498					while (freerl->length < carry) {
1499						carry -= freerl->length;
1500						freerl++;
1501					}
1502					freerl->length = carry;
1503					freerl = rl;
1504				} else {
1505					rl->length = usedcnt; /* can be zero ? */
1506					freerl = ++rl;
1507				}
1508				if ((freelength > 0)
1509				    && !mergeholes
1510				    && (usedcnt || beginhole)) {
1511				/*
1512				 * move the unused part to the end. Doing so,
1513				 * the vcn will be out of order. This does
1514				 * not harm, the vcn are meaningless now, and
1515				 * only the lcn are meaningful for freeing.
1516				 */
1517					/* locate current end */
1518					while (rl->length)
1519						rl++;
1520					/* new terminator relocated */
1521					rl[1].vcn = rl->vcn;
1522					rl[1].lcn = LCN_ENOENT;
1523					rl[1].length = 0;
1524					/* hole, currently allocated */
1525					rl->vcn = freevcn;
1526					rl->lcn = freelcn;
1527					rl->length = freelength;
1528				} else {
1529	/* why is this different from the begin hole case ? */
1530					if ((freelength > 0)
1531					    && !mergeholes
1532					    && !usedcnt) {
1533						freerl--;
1534						freerl->length = freelength;
1535						if (freerl->vcn < *update_from)
1536							*update_from
1537								= freerl->vcn;
1538					}
1539				}
1540				/* free the hole */
1541				res = ntfs_cluster_free_from_rl(vol,freerl);
1542				if (!res) {
1543					na->compressed_size -= freecnt
1544						<< vol->cluster_size_bits;
1545					if (mergeholes) {
1546						/* merge with adjacent hole */
1547						freerl--;
1548						freerl->length += freecnt;
1549					} else {
1550						if (beginhole)
1551							freerl--;
1552						/* mark hole as free */
1553						freerl->lcn = LCN_HOLE;
1554						freerl->vcn = freevcn;
1555						freerl->length = freecnt;
1556					}
1557					if (freerl->vcn < *update_from)
1558						*update_from = freerl->vcn;
1559						/* and set up the new end */
1560					freerl[1].lcn = LCN_ENOENT;
1561					freerl[1].vcn = freevcn + freecnt;
1562					freerl[1].length = 0;
1563				}
1564			}
1565		} else {
1566			ntfs_log_error("Bad end of a compression block set\n");
1567			errno = EIO;
1568		}
1569	} else {
1570		ntfs_log_error("No cluster to free after compression\n");
1571		errno = EIO;
1572	}
1573	NAttrSetRunlistDirty(na);
1574	return (res);
1575}
1576
1577/*
1578 *		Read existing data, decompress and append buffer
1579 *	Do nothing if something fails
1580 */
1581
1582static int ntfs_read_append(ntfs_attr *na, const runlist_element *rl,
1583			s64 offs, u32 compsz, s32 pos, BOOL appending,
1584			char *outbuf, s64 to_write, const void *b)
1585{
1586	int fail = 1;
1587	char *compbuf;
1588	u32 decompsz;
1589	u32 got;
1590
1591	if (compsz == na->compression_block_size) {
1592			/* if the full block was requested, it was a hole */
1593		memset(outbuf,0,compsz);
1594		memcpy(&outbuf[pos],b,to_write);
1595		fail = 0;
1596	} else {
1597		compbuf = (char*)ntfs_malloc(compsz);
1598		if (compbuf) {
1599			/* must align to full block for decompression */
1600			if (appending)
1601				decompsz = ((pos - 1) | (NTFS_SB_SIZE - 1)) + 1;
1602			else
1603				decompsz = na->compression_block_size;
1604			got = read_clusters(na->ni->vol, rl, offs,
1605					compsz, compbuf);
1606			if ((got == compsz)
1607			    && !ntfs_decompress((u8*)outbuf,decompsz,
1608					(u8*)compbuf,compsz)) {
1609				memcpy(&outbuf[pos],b,to_write);
1610				fail = 0;
1611			}
1612			free(compbuf);
1613		}
1614	}
1615	return (fail);
1616}
1617
1618/*
1619 *		Flush a full compression block
1620 *
1621 *	returns the size actually written (rounded to a full cluster)
1622 *		or 0 if could not compress (and written uncompressed)
1623 *		or -1 if there were an irrecoverable error (errno set)
1624 */
1625
1626static s32 ntfs_flush(ntfs_attr *na, runlist_element *rl, s64 offs,
1627			char *outbuf, s32 count, BOOL compress,
1628			BOOL appending, VCN *update_from)
1629{
1630	s32 rounded;
1631	s32 written;
1632	int clsz;
1633
1634	if (compress) {
1635		written = ntfs_comp_set(na, rl, offs, count, outbuf);
1636		if (written == -1)
1637			compress = FALSE;
1638		if ((written >= 0)
1639		   && ntfs_compress_free(na,rl,offs + written,
1640				offs + na->compression_block_size, appending,
1641				update_from))
1642			written = -1;
1643	} else
1644		written = 0;
1645	if (!compress) {
1646		clsz = 1 << na->ni->vol->cluster_size_bits;
1647		rounded = ((count - 1) | (clsz - 1)) + 1;
1648		if (rounded > count)
1649			memset(&outbuf[count], 0, rounded - count);
1650		written = write_clusters(na->ni->vol, rl,
1651				offs, rounded, outbuf);
1652		if (written != rounded)
1653			written = -1;
1654	}
1655	return (written);
1656}
1657
1658/*
1659 *		Write some data to be compressed.
1660 *	Compression only occurs when a few clusters (usually 16) are
1661 *	full. When this occurs an extra runlist slot may be needed, so
1662 *	it has to be reserved beforehand.
1663 *
1664 *	Returns the size of uncompressed data written,
1665 *		or negative if an error occurred.
1666 *	When the returned size is less than requested, new clusters have
1667 *	to be allocated before the function is called again.
1668 */
1669
1670s64 ntfs_compressed_pwrite(ntfs_attr *na, runlist_element *wrl, s64 wpos,
1671				s64 offs, s64 to_write, s64 rounded,
1672				const void *b, int compressed_part,
1673				VCN *update_from)
1674{
1675	ntfs_volume *vol;
1676	runlist_element *brl; /* entry containing the beginning of block */
1677	int compression_length;
1678	s64 written;
1679	s64 to_read;
1680	s64 to_flush;
1681	s64 roffs;
1682	s64 got;
1683	s64 start_vcn;
1684	s64 nextblock;
1685	s64 endwrite;
1686	u32 compsz;
1687	char *inbuf;
1688	char *outbuf;
1689	BOOL fail;
1690	BOOL done;
1691	BOOL compress;
1692	BOOL appending;
1693
1694	if (!valid_compressed_run(na,wrl,FALSE,"begin compressed write")) {
1695		return (-1);
1696	}
1697	if ((*update_from < 0)
1698	    || (compressed_part < 0)
1699	    || (compressed_part > (int)na->compression_block_clusters)) {
1700		ntfs_log_error("Bad update vcn or compressed_part %d for compressed write\n",
1701			compressed_part);
1702		errno = EIO;
1703		return (-1);
1704	}
1705		/* make sure there are two unused entries in runlist */
1706	if (na->unused_runs < 2) {
1707		ntfs_log_error("No unused runs for compressed write\n");
1708		errno = EIO;
1709		return (-1);
1710	}
1711	if (na->compression_block_size < NTFS_SB_SIZE) {
1712		ntfs_log_error("Unsupported compression block size %ld\n",
1713				(long)na->compression_block_size);
1714		errno = EOVERFLOW;
1715		return (-1);
1716	}
1717	if (wrl->vcn < *update_from)
1718		*update_from = wrl->vcn;
1719	written = -1; /* default return */
1720	vol = na->ni->vol;
1721	compression_length = na->compression_block_clusters;
1722	compress = FALSE;
1723	done = FALSE;
1724		/*
1725		 * Cannot accept writing beyond the current compression set
1726		 * because when compression occurs, clusters are freed
1727		 * and have to be reallocated.
1728		 * (cannot happen with standard fuse 4K buffers)
1729		 * Caller has to avoid this situation, or face consequences.
1730		 */
1731	nextblock = ((offs + (wrl->vcn << vol->cluster_size_bits))
1732			| (na->compression_block_size - 1)) + 1;
1733		/* determine whether we are appending to file */
1734	endwrite = offs + to_write + (wrl->vcn << vol->cluster_size_bits);
1735	appending = endwrite >= na->initialized_size;
1736	if (endwrite >= nextblock) {
1737			/* it is time to compress */
1738		compress = TRUE;
1739			/* only process what we can */
1740		to_write = rounded = nextblock
1741			- (offs + (wrl->vcn << vol->cluster_size_bits));
1742	}
1743	start_vcn = 0;
1744	fail = FALSE;
1745	brl = wrl;
1746	roffs = 0;
1747		/*
1748		 * If we are about to compress or we need to decompress
1749		 * existing data, we have to process a full set of blocks.
1750		 * So relocate the parameters to the beginning of allocation
1751		 * containing the first byte of the set of blocks.
1752		 */
1753	if (compress || compressed_part) {
1754		/* find the beginning of block */
1755		start_vcn = (wrl->vcn + (offs >> vol->cluster_size_bits))
1756				& -compression_length;
1757		if (start_vcn < *update_from)
1758			*update_from = start_vcn;
1759		while (brl->vcn && (brl->vcn > start_vcn)) {
1760			/* jumping back a hole means big trouble */
1761			if (brl->lcn == (LCN)LCN_HOLE) {
1762				ntfs_log_error("jump back over a hole when appending\n");
1763				fail = TRUE;
1764				errno = EIO;
1765			}
1766			brl--;
1767			offs += brl->length << vol->cluster_size_bits;
1768		}
1769		roffs = (start_vcn - brl->vcn) << vol->cluster_size_bits;
1770	}
1771	if (compressed_part && !fail) {
1772		/*
1773		 * The set of compression blocks contains compressed data
1774		 * (we are reopening an existing file to append to it)
1775		 * Decompress the data and append
1776		 */
1777		compsz = (s32)compressed_part << vol->cluster_size_bits;
1778		outbuf = (char*)ntfs_malloc(na->compression_block_size);
1779		if (outbuf) {
1780			if (appending) {
1781				to_read = offs - roffs;
1782				to_flush = to_read + to_write;
1783			} else {
1784				to_read = na->data_size
1785					- (brl->vcn << vol->cluster_size_bits);
1786				if (to_read > na->compression_block_size)
1787					to_read = na->compression_block_size;
1788				to_flush = to_read;
1789			}
1790			if (!ntfs_read_append(na, brl, roffs, compsz,
1791					(s32)(offs - roffs), appending,
1792					outbuf, to_write, b)) {
1793				written = ntfs_flush(na, brl, roffs,
1794					outbuf, to_flush, compress, appending,
1795					update_from);
1796				if (written >= 0) {
1797					written = to_write;
1798					done = TRUE;
1799				}
1800			}
1801		free(outbuf);
1802		}
1803	} else {
1804		if (compress && !fail) {
1805			/*
1806			 * we are filling up a block, read the full set
1807			 * of blocks and compress it
1808		 	 */
1809			inbuf = (char*)ntfs_malloc(na->compression_block_size);
1810			if (inbuf) {
1811				to_read = offs - roffs;
1812				if (to_read)
1813					got = read_clusters(vol, brl, roffs,
1814							to_read, inbuf);
1815				else
1816					got = 0;
1817				if (got == to_read) {
1818					memcpy(&inbuf[to_read],b,to_write);
1819					written = ntfs_comp_set(na, brl, roffs,
1820						to_read + to_write, inbuf);
1821				/*
1822				 * if compression was not successful,
1823				 * only write the part which was requested
1824				 */
1825					if ((written >= 0)
1826						/* free the unused clusters */
1827				  	  && !ntfs_compress_free(na,brl,
1828						    written + roffs,
1829						    na->compression_block_size
1830						         + roffs,
1831						    appending, update_from)) {
1832						done = TRUE;
1833						written = to_write;
1834					}
1835				}
1836				free(inbuf);
1837			}
1838		}
1839		if (!done) {
1840			/*
1841			 * if the compression block is not full, or
1842			 * if compression failed for whatever reason,
1843		 	 * write uncompressed
1844			 */
1845			/* check we are not overflowing current allocation */
1846			if ((wpos + rounded)
1847			    > ((wrl->lcn + wrl->length)
1848				 << vol->cluster_size_bits)) {
1849				ntfs_log_error("writing on unallocated clusters\n");
1850				errno = EIO;
1851			} else {
1852				written = ntfs_pwrite(vol->dev, wpos,
1853					rounded, b);
1854				if (written == rounded)
1855					written = to_write;
1856			}
1857		}
1858	}
1859	if ((written >= 0)
1860	    && !valid_compressed_run(na,wrl,TRUE,"end compressed write"))
1861		written = -1;
1862	return (written);
1863}
1864
1865/*
1866 *		Close a file written compressed.
1867 *	This compresses the last partial compression block of the file.
1868 *	Two empty runlist slots have to be reserved beforehand.
1869 *
1870 *	Returns zero if closing is successful.
1871 */
1872
1873int ntfs_compressed_close(ntfs_attr *na, runlist_element *wrl, s64 offs,
1874			VCN *update_from)
1875{
1876	ntfs_volume *vol;
1877	runlist_element *brl; /* entry containing the beginning of block */
1878	int compression_length;
1879	s64 written;
1880	s64 to_read;
1881	s64 roffs;
1882	s64 got;
1883	s64 start_vcn;
1884	char *inbuf;
1885	BOOL fail;
1886	BOOL done;
1887
1888	if (na->unused_runs < 2) {
1889		ntfs_log_error("No unused runs for compressed close\n");
1890		errno = EIO;
1891		return (-1);
1892	}
1893	if (*update_from < 0) {
1894		ntfs_log_error("Bad update vcn for compressed close\n");
1895		errno = EIO;
1896		return (-1);
1897	}
1898	if (na->compression_block_size < NTFS_SB_SIZE) {
1899		ntfs_log_error("Unsupported compression block size %ld\n",
1900				(long)na->compression_block_size);
1901		errno = EOVERFLOW;
1902		return (-1);
1903	}
1904	if (wrl->vcn < *update_from)
1905		*update_from = wrl->vcn;
1906	vol = na->ni->vol;
1907	compression_length = na->compression_block_clusters;
1908	done = FALSE;
1909		/*
1910		 * There generally is an uncompressed block at end of file,
1911		 * read the full block and compress it
1912		 */
1913	inbuf = (char*)ntfs_malloc(na->compression_block_size);
1914	if (inbuf) {
1915		start_vcn = (wrl->vcn + (offs >> vol->cluster_size_bits))
1916				& -compression_length;
1917		if (start_vcn < *update_from)
1918			*update_from = start_vcn;
1919		to_read = offs + ((wrl->vcn - start_vcn)
1920					<< vol->cluster_size_bits);
1921		brl = wrl;
1922		fail = FALSE;
1923		while (brl->vcn && (brl->vcn > start_vcn)) {
1924			if (brl->lcn == (LCN)LCN_HOLE) {
1925				ntfs_log_error("jump back over a hole when closing\n");
1926				fail = TRUE;
1927				errno = EIO;
1928			}
1929			brl--;
1930		}
1931		if (!fail) {
1932			/* roffs can be an offset from another uncomp block */
1933			roffs = (start_vcn - brl->vcn)
1934						<< vol->cluster_size_bits;
1935			if (to_read) {
1936				got = read_clusters(vol, brl, roffs, to_read,
1937						 inbuf);
1938				if (got == to_read) {
1939					written = ntfs_comp_set(na, brl, roffs,
1940							to_read, inbuf);
1941					if ((written >= 0)
1942					/* free the unused clusters */
1943					    && !ntfs_compress_free(na,brl,
1944							written + roffs,
1945							na->compression_block_size + roffs,
1946							TRUE, update_from)) {
1947						done = TRUE;
1948					} else
1949				/* if compression failed, leave uncompressed */
1950						if (written == -1)
1951							done = TRUE;
1952				}
1953			} else
1954				done = TRUE;
1955			free(inbuf);
1956		}
1957	}
1958	if (done && !valid_compressed_run(na,wrl,TRUE,"end compressed close"))
1959		done = FALSE;
1960	return (!done);
1961}
1962