lznt.c revision 195c52bd
1// SPDX-License-Identifier: GPL-2.0
2/*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 */
7#include <linux/blkdev.h>
8#include <linux/buffer_head.h>
9#include <linux/fs.h>
10#include <linux/nls.h>
11
12#include "debug.h"
13#include "ntfs.h"
14#include "ntfs_fs.h"
15
16// clang-format off
17/* src buffer is zero */
18#define LZNT_ERROR_ALL_ZEROS	1
19#define LZNT_CHUNK_SIZE		0x1000
20// clang-format on
21
22struct lznt_hash {
23	const u8 *p1;
24	const u8 *p2;
25};
26
27struct lznt {
28	const u8 *unc;
29	const u8 *unc_end;
30	const u8 *best_match;
31	size_t max_len;
32	bool std;
33
34	struct lznt_hash hash[LZNT_CHUNK_SIZE];
35};
36
37static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
38				   size_t max_len)
39{
40	size_t len = 0;
41
42	while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
43		;
44	return len;
45}
46
47static size_t longest_match_std(const u8 *src, struct lznt *ctx)
48{
49	size_t hash_index;
50	size_t len1 = 0, len2 = 0;
51	const u8 **hash;
52
53	hash_index =
54		((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
55		(LZNT_CHUNK_SIZE - 1);
56
57	hash = &(ctx->hash[hash_index].p1);
58
59	if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
60	    hash[0][1] == src[1] && hash[0][2] == src[2]) {
61		len1 = 3;
62		if (ctx->max_len > 3)
63			len1 += get_match_len(src + 3, ctx->unc_end,
64					      hash[0] + 3, ctx->max_len - 3);
65	}
66
67	if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
68	    hash[1][1] == src[1] && hash[1][2] == src[2]) {
69		len2 = 3;
70		if (ctx->max_len > 3)
71			len2 += get_match_len(src + 3, ctx->unc_end,
72					      hash[1] + 3, ctx->max_len - 3);
73	}
74
75	/* Compare two matches and select the best one */
76	if (len1 < len2) {
77		ctx->best_match = hash[1];
78		len1 = len2;
79	} else {
80		ctx->best_match = hash[0];
81	}
82
83	hash[1] = hash[0];
84	hash[0] = src;
85	return len1;
86}
87
88static size_t longest_match_best(const u8 *src, struct lznt *ctx)
89{
90	size_t max_len;
91	const u8 *ptr;
92
93	if (ctx->unc >= src || !ctx->max_len)
94		return 0;
95
96	max_len = 0;
97	for (ptr = ctx->unc; ptr < src; ++ptr) {
98		size_t len =
99			get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
100		if (len >= max_len) {
101			max_len = len;
102			ctx->best_match = ptr;
103		}
104	}
105
106	return max_len >= 3 ? max_len : 0;
107}
108
109static const size_t s_max_len[] = {
110	0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
111};
112
113static const size_t s_max_off[] = {
114	0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
115};
116
117static inline u16 make_pair(size_t offset, size_t len, size_t index)
118{
119	return ((offset - 1) << (12 - index)) |
120	       ((len - 3) & (((1 << (12 - index)) - 1)));
121}
122
123static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
124{
125	*offset = 1 + (pair >> (12 - index));
126	return 3 + (pair & ((1 << (12 - index)) - 1));
127}
128
129/*
130 * compress_chunk
131 *
132 * returns one of the three values:
133 * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data
134 * 1 - input buffer is full zero
135 * -2 - the compressed buffer is too small to hold the compressed data
136 */
137static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
138				 const u8 *unc, const u8 *unc_end, u8 *cmpr,
139				 u8 *cmpr_end, size_t *cmpr_chunk_size,
140				 struct lznt *ctx)
141{
142	size_t cnt = 0;
143	size_t idx = 0;
144	const u8 *up = unc;
145	u8 *cp = cmpr + 3;
146	u8 *cp2 = cmpr + 2;
147	u8 not_zero = 0;
148	/* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */
149	u8 ohdr = 0;
150	u8 *last;
151	u16 t16;
152
153	if (unc + LZNT_CHUNK_SIZE < unc_end)
154		unc_end = unc + LZNT_CHUNK_SIZE;
155
156	last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
157
158	ctx->unc = unc;
159	ctx->unc_end = unc_end;
160	ctx->max_len = s_max_len[0];
161
162	while (up < unc_end) {
163		size_t max_len;
164
165		while (unc + s_max_off[idx] < up)
166			ctx->max_len = s_max_len[++idx];
167
168		// Find match
169		max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
170
171		if (!max_len) {
172			if (cp >= last)
173				goto NotCompressed;
174			not_zero |= *cp++ = *up++;
175		} else if (cp + 1 >= last) {
176			goto NotCompressed;
177		} else {
178			t16 = make_pair(up - ctx->best_match, max_len, idx);
179			*cp++ = t16;
180			*cp++ = t16 >> 8;
181
182			ohdr |= 1 << cnt;
183			up += max_len;
184		}
185
186		cnt = (cnt + 1) & 7;
187		if (!cnt) {
188			*cp2 = ohdr;
189			ohdr = 0;
190			cp2 = cp;
191			cp += 1;
192		}
193	}
194
195	if (cp2 < last)
196		*cp2 = ohdr;
197	else
198		cp -= 1;
199
200	*cmpr_chunk_size = cp - cmpr;
201
202	t16 = (*cmpr_chunk_size - 3) | 0xB000;
203	cmpr[0] = t16;
204	cmpr[1] = t16 >> 8;
205
206	return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
207
208NotCompressed:
209
210	if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
211		return -2;
212
213	/*
214	 * Copy non cmpr data
215	 * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
216	 */
217	cmpr[0] = 0xff;
218	cmpr[1] = 0x3f;
219
220	memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
221	*cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
222
223	return 0;
224}
225
226static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
227				       const u8 *cmpr_end)
228{
229	u8 *up = unc;
230	u8 ch = *cmpr++;
231	size_t bit = 0;
232	size_t index = 0;
233	u16 pair;
234	size_t offset, length;
235
236	/* Do decompression until pointers are inside range */
237	while (up < unc_end && cmpr < cmpr_end) {
238		/* Correct index */
239		while (unc + s_max_off[index] < up)
240			index += 1;
241
242		/* Check the current flag for zero */
243		if (!(ch & (1 << bit))) {
244			/* Just copy byte */
245			*up++ = *cmpr++;
246			goto next;
247		}
248
249		/* Check for boundary */
250		if (cmpr + 1 >= cmpr_end)
251			return -EINVAL;
252
253		/* Read a short from little endian stream */
254		pair = cmpr[1];
255		pair <<= 8;
256		pair |= cmpr[0];
257
258		cmpr += 2;
259
260		/* Translate packed information into offset and length */
261		length = parse_pair(pair, &offset, index);
262
263		/* Check offset for boundary */
264		if (unc + offset > up)
265			return -EINVAL;
266
267		/* Truncate the length if necessary */
268		if (up + length >= unc_end)
269			length = unc_end - up;
270
271		/* Now we copy bytes. This is the heart of LZ algorithm. */
272		for (; length > 0; length--, up++)
273			*up = *(up - offset);
274
275next:
276		/* Advance flag bit value */
277		bit = (bit + 1) & 7;
278
279		if (!bit) {
280			if (cmpr >= cmpr_end)
281				break;
282
283			ch = *cmpr++;
284		}
285	}
286
287	/* return the size of uncompressed data */
288	return up - unc;
289}
290
291/*
292 * 0 - standard compression
293 * !0 - best compression, requires a lot of cpu
294 */
295struct lznt *get_lznt_ctx(int level)
296{
297	struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash) :
298					 sizeof(struct lznt), GFP_NOFS);
299
300	if (r)
301		r->std = !level;
302	return r;
303}
304
305/*
306 * compress_lznt
307 *
308 * Compresses "unc" into "cmpr"
309 * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data
310 * 0 - input buffer is full zero
311 */
312size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
313		     size_t cmpr_size, struct lznt *ctx)
314{
315	int err;
316	size_t (*match)(const u8 *src, struct lznt *ctx);
317	u8 *p = cmpr;
318	u8 *end = p + cmpr_size;
319	const u8 *unc_chunk = unc;
320	const u8 *unc_end = unc_chunk + unc_size;
321	bool is_zero = true;
322
323	if (ctx->std) {
324		match = &longest_match_std;
325		memset(ctx->hash, 0, sizeof(ctx->hash));
326	} else {
327		match = &longest_match_best;
328	}
329
330	/* compression cycle */
331	for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
332		cmpr_size = 0;
333		err = compress_chunk(match, unc_chunk, unc_end, p, end,
334				     &cmpr_size, ctx);
335		if (err < 0)
336			return unc_size;
337
338		if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
339			is_zero = false;
340
341		p += cmpr_size;
342	}
343
344	if (p <= end - 2)
345		p[0] = p[1] = 0;
346
347	return is_zero ? 0 : PtrOffset(cmpr, p);
348}
349
350/*
351 * decompress_lznt
352 *
353 * decompresses "cmpr" into "unc"
354 */
355ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
356			size_t unc_size)
357{
358	const u8 *cmpr_chunk = cmpr;
359	const u8 *cmpr_end = cmpr_chunk + cmpr_size;
360	u8 *unc_chunk = unc;
361	u8 *unc_end = unc_chunk + unc_size;
362	u16 chunk_hdr;
363
364	if (cmpr_size < sizeof(short))
365		return -EINVAL;
366
367	/* read chunk header */
368	chunk_hdr = cmpr_chunk[1];
369	chunk_hdr <<= 8;
370	chunk_hdr |= cmpr_chunk[0];
371
372	/* loop through decompressing chunks */
373	for (;;) {
374		size_t chunk_size_saved;
375		size_t unc_use;
376		size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
377
378		/* Check that the chunk actually fits the supplied buffer */
379		if (cmpr_chunk + cmpr_use > cmpr_end)
380			return -EINVAL;
381
382		/* First make sure the chunk contains compressed data */
383		if (chunk_hdr & 0x8000) {
384			/* Decompress a chunk and return if we get an error */
385			ssize_t err =
386				decompress_chunk(unc_chunk, unc_end,
387						 cmpr_chunk + sizeof(chunk_hdr),
388						 cmpr_chunk + cmpr_use);
389			if (err < 0)
390				return err;
391			unc_use = err;
392		} else {
393			/* This chunk does not contain compressed data */
394			unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end
395					  ? unc_end - unc_chunk
396					  : LZNT_CHUNK_SIZE;
397
398			if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
399			    cmpr_end) {
400				return -EINVAL;
401			}
402
403			memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
404			       unc_use);
405		}
406
407		/* Advance pointers */
408		cmpr_chunk += cmpr_use;
409		unc_chunk += unc_use;
410
411		/* Check for the end of unc buffer */
412		if (unc_chunk >= unc_end)
413			break;
414
415		/* Proceed the next chunk */
416		if (cmpr_chunk > cmpr_end - 2)
417			break;
418
419		chunk_size_saved = LZNT_CHUNK_SIZE;
420
421		/* read chunk header */
422		chunk_hdr = cmpr_chunk[1];
423		chunk_hdr <<= 8;
424		chunk_hdr |= cmpr_chunk[0];
425
426		if (!chunk_hdr)
427			break;
428
429		/* Check the size of unc buffer */
430		if (unc_use < chunk_size_saved) {
431			size_t t1 = chunk_size_saved - unc_use;
432			u8 *t2 = unc_chunk + t1;
433
434			/* 'Zero' memory */
435			if (t2 >= unc_end)
436				break;
437
438			memset(unc_chunk, 0, t1);
439			unc_chunk = t2;
440		}
441	}
442
443	/* Check compression boundary */
444	if (cmpr_chunk > cmpr_end)
445		return -EINVAL;
446
447	/*
448	 * The unc size is just a difference between current
449	 * pointer and original one
450	 */
451	return PtrOffset(unc, unc_chunk);
452}
453