1219354Spjd/*
2219354Spjd * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
3219354Spjd *
4219354Spjd * Redistribution and use in source and binary forms, with or without modifica-
5219354Spjd * tion, are permitted provided that the following conditions are met:
6219354Spjd *
7219354Spjd *   1.  Redistributions of source code must retain the above copyright notice,
8219354Spjd *       this list of conditions and the following disclaimer.
9219354Spjd *
10219354Spjd *   2.  Redistributions in binary form must reproduce the above copyright
11219354Spjd *       notice, this list of conditions and the following disclaimer in the
12219354Spjd *       documentation and/or other materials provided with the distribution.
13219354Spjd *
14219354Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
15219354Spjd * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
16219354Spjd * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
17219354Spjd * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
18219354Spjd * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19219354Spjd * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20219354Spjd * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21219354Spjd * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
22219354Spjd * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
23219354Spjd * OF THE POSSIBILITY OF SUCH DAMAGE.
24219354Spjd *
25219354Spjd * Alternatively, the contents of this file may be used under the terms of
26219354Spjd * the GNU General Public License ("GPL") version 2 or any later version,
27219354Spjd * in which case the provisions of the GPL are applicable instead of
28219354Spjd * the above. If you wish to allow the use of your version of this file
29219354Spjd * only under the terms of the GPL and not to allow others to use your
30219354Spjd * version of this file under the BSD license, indicate your decision
31219354Spjd * by deleting the provisions above and replace them with the notice
32219354Spjd * and other provisions required by the GPL. If you do not delete the
33219354Spjd * provisions above, a recipient may use your version of this file under
34219354Spjd * either the BSD or the GPL.
35219354Spjd */
36219354Spjd
37219354Spjd#include "lzf.h"
38219354Spjd
39219354Spjd#define HSIZE (1 << (HLOG))
40219354Spjd
41219354Spjd/*
42219354Spjd * don't play with this unless you benchmark!
43219354Spjd * decompression is not dependent on the hash function
44219354Spjd * the hashing function might seem strange, just believe me
45219354Spjd * it works ;)
46219354Spjd */
47219354Spjd#ifndef FRST
48219354Spjd# define FRST(p) (((p[0]) << 8) | p[1])
49219354Spjd# define NEXT(v,p) (((v) << 8) | p[2])
50219354Spjd# if ULTRA_FAST
51219354Spjd#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
52219354Spjd# elif VERY_FAST
53219354Spjd#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
54219354Spjd# else
55219354Spjd#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
56219354Spjd# endif
57219354Spjd#endif
58219354Spjd/*
59219354Spjd * IDX works because it is very similar to a multiplicative hash, e.g.
60219354Spjd * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
61219354Spjd * the latter is also quite fast on newer CPUs, and compresses similarly.
62219354Spjd *
63219354Spjd * the next one is also quite good, albeit slow ;)
64219354Spjd * (int)(cos(h & 0xffffff) * 1e6)
65219354Spjd */
66219354Spjd
67219354Spjd#if 0
68219354Spjd/* original lzv-like hash function, much worse and thus slower */
69219354Spjd# define FRST(p) (p[0] << 5) ^ p[1]
70219354Spjd# define NEXT(v,p) ((v) << 5) ^ p[2]
71219354Spjd# define IDX(h) ((h) & (HSIZE - 1))
72219354Spjd#endif
73219354Spjd
74219354Spjd#define        MAX_LIT        (1 <<  5)
75219354Spjd#define        MAX_OFF        (1 << 13)
76219354Spjd#define        MAX_REF        ((1 << 8) + (1 << 3))
77219354Spjd
78219354Spjd#if __GNUC__ >= 3
79219354Spjd# define expect(expr,value)         __builtin_expect ((expr),(value))
80219354Spjd# define inline                     inline
81219354Spjd#else
82219354Spjd# define expect(expr,value)         (expr)
83219354Spjd# define inline                     static
84219354Spjd#endif
85219354Spjd
86219354Spjd#define expect_false(expr) expect ((expr) != 0, 0)
87219354Spjd#define expect_true(expr)  expect ((expr) != 0, 1)
88219354Spjd
89219354Spjd/*
90219354Spjd * compressed format
91219354Spjd *
92219354Spjd * 000LLLLL <L+1>    ; literal
93219354Spjd * LLLooooo oooooooo ; backref L
94219354Spjd * 111ooooo LLLLLLLL oooooooo ; backref L+7
95219354Spjd *
96219354Spjd */
97219354Spjd
98219354Spjdunsigned int
99219354Spjdlzf_compress (const void *const in_data, unsigned int in_len,
100219354Spjd	      void *out_data, unsigned int out_len
101219354Spjd#if LZF_STATE_ARG
102219354Spjd              , LZF_STATE htab
103219354Spjd#endif
104219354Spjd              )
105219354Spjd{
106219354Spjd#if !LZF_STATE_ARG
107219354Spjd  LZF_STATE htab;
108219354Spjd#endif
109219354Spjd  const u8 **hslot;
110219354Spjd  const u8 *ip = (const u8 *)in_data;
111219354Spjd        u8 *op = (u8 *)out_data;
112219354Spjd  const u8 *in_end  = ip + in_len;
113219354Spjd        u8 *out_end = op + out_len;
114219354Spjd  const u8 *ref;
115219354Spjd
116219354Spjd  /* off requires a type wide enough to hold a general pointer difference.
117219354Spjd   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
118219354Spjd   * works for differences within a single object). We also assume that no
119219354Spjd   * no bit pattern traps. Since the only platform that is both non-POSIX
120219354Spjd   * and fails to support both assumptions is windows 64 bit, we make a
121219354Spjd   * special workaround for it.
122219354Spjd   */
123219354Spjd#if defined (WIN32) && defined (_M_X64)
124219354Spjd  unsigned _int64 off; /* workaround for missing POSIX compliance */
125219354Spjd#else
126219354Spjd  unsigned long off;
127219354Spjd#endif
128219354Spjd  unsigned int hval;
129219354Spjd  int lit;
130219354Spjd
131219354Spjd  if (!in_len || !out_len)
132219354Spjd    return 0;
133219354Spjd
134219354Spjd#if INIT_HTAB
135219354Spjd  memset (htab, 0, sizeof (htab));
136219354Spjd# if 0
137219354Spjd  for (hslot = htab; hslot < htab + HSIZE; hslot++)
138219354Spjd    *hslot++ = ip;
139219354Spjd# endif
140219354Spjd#endif
141219354Spjd
142219354Spjd  lit = 0; op++; /* start run */
143219354Spjd
144219354Spjd  hval = FRST (ip);
145219354Spjd  while (ip < in_end - 2)
146219354Spjd    {
147219354Spjd      hval = NEXT (hval, ip);
148219354Spjd      hslot = htab + IDX (hval);
149219354Spjd      ref = *hslot; *hslot = ip;
150219354Spjd
151219354Spjd      if (1
152219354Spjd#if INIT_HTAB
153219354Spjd          && ref < ip /* the next test will actually take care of this, but this is faster */
154219354Spjd#endif
155219354Spjd          && (off = ip - ref - 1) < MAX_OFF
156219354Spjd          && ip + 4 < in_end
157219354Spjd          && ref > (const u8 *)in_data
158219354Spjd#if STRICT_ALIGN
159219354Spjd          && ref[0] == ip[0]
160219354Spjd          && ref[1] == ip[1]
161219354Spjd          && ref[2] == ip[2]
162219354Spjd#else
163219354Spjd          && *(const u16 *)ref == *(const u16 *)ip
164219354Spjd          && ref[2] == ip[2]
165219354Spjd#endif
166219354Spjd        )
167219354Spjd        {
168219354Spjd          /* match found at *ref++ */
169219354Spjd          unsigned int len = 2;
170219354Spjd          unsigned int maxlen = in_end - ip - len;
171219354Spjd          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
172219354Spjd
173219354Spjd          if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
174219354Spjd            if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
175219354Spjd              return 0;
176219354Spjd
177219354Spjd          op [- lit - 1] = lit - 1; /* stop run */
178219354Spjd          op -= !lit; /* undo run if length is zero */
179219354Spjd
180219354Spjd          for (;;)
181219354Spjd            {
182219354Spjd              if (expect_true (maxlen > 16))
183219354Spjd                {
184219354Spjd                  len++; if (ref [len] != ip [len]) break;
185219354Spjd                  len++; if (ref [len] != ip [len]) break;
186219354Spjd                  len++; if (ref [len] != ip [len]) break;
187219354Spjd                  len++; if (ref [len] != ip [len]) break;
188219354Spjd
189219354Spjd                  len++; if (ref [len] != ip [len]) break;
190219354Spjd                  len++; if (ref [len] != ip [len]) break;
191219354Spjd                  len++; if (ref [len] != ip [len]) break;
192219354Spjd                  len++; if (ref [len] != ip [len]) break;
193219354Spjd
194219354Spjd                  len++; if (ref [len] != ip [len]) break;
195219354Spjd                  len++; if (ref [len] != ip [len]) break;
196219354Spjd                  len++; if (ref [len] != ip [len]) break;
197219354Spjd                  len++; if (ref [len] != ip [len]) break;
198219354Spjd
199219354Spjd                  len++; if (ref [len] != ip [len]) break;
200219354Spjd                  len++; if (ref [len] != ip [len]) break;
201219354Spjd                  len++; if (ref [len] != ip [len]) break;
202219354Spjd                  len++; if (ref [len] != ip [len]) break;
203219354Spjd                }
204219354Spjd
205219354Spjd              do
206219354Spjd                len++;
207219354Spjd              while (len < maxlen && ref[len] == ip[len]);
208219354Spjd
209219354Spjd              break;
210219354Spjd            }
211219354Spjd
212219354Spjd          len -= 2; /* len is now #octets - 1 */
213219354Spjd          ip++;
214219354Spjd
215219354Spjd          if (len < 7)
216219354Spjd            {
217219354Spjd              *op++ = (off >> 8) + (len << 5);
218219354Spjd            }
219219354Spjd          else
220219354Spjd            {
221219354Spjd              *op++ = (off >> 8) + (  7 << 5);
222219354Spjd              *op++ = len - 7;
223219354Spjd            }
224219354Spjd
225219354Spjd          *op++ = off;
226219354Spjd          lit = 0; op++; /* start run */
227219354Spjd
228219354Spjd          ip += len + 1;
229219354Spjd
230219354Spjd          if (expect_false (ip >= in_end - 2))
231219354Spjd            break;
232219354Spjd
233219354Spjd#if ULTRA_FAST || VERY_FAST
234219354Spjd          --ip;
235219354Spjd# if VERY_FAST && !ULTRA_FAST
236219354Spjd          --ip;
237219354Spjd# endif
238219354Spjd          hval = FRST (ip);
239219354Spjd
240219354Spjd          hval = NEXT (hval, ip);
241219354Spjd          htab[IDX (hval)] = ip;
242219354Spjd          ip++;
243219354Spjd
244219354Spjd# if VERY_FAST && !ULTRA_FAST
245219354Spjd          hval = NEXT (hval, ip);
246219354Spjd          htab[IDX (hval)] = ip;
247219354Spjd          ip++;
248219354Spjd# endif
249219354Spjd#else
250219354Spjd          ip -= len + 1;
251219354Spjd
252219354Spjd          do
253219354Spjd            {
254219354Spjd              hval = NEXT (hval, ip);
255219354Spjd              htab[IDX (hval)] = ip;
256219354Spjd              ip++;
257219354Spjd            }
258219354Spjd          while (len--);
259219354Spjd#endif
260219354Spjd        }
261219354Spjd      else
262219354Spjd        {
263219354Spjd          /* one more literal byte we must copy */
264219354Spjd          if (expect_false (op >= out_end))
265219354Spjd            return 0;
266219354Spjd
267219354Spjd          lit++; *op++ = *ip++;
268219354Spjd
269219354Spjd          if (expect_false (lit == MAX_LIT))
270219354Spjd            {
271219354Spjd              op [- lit - 1] = lit - 1; /* stop run */
272219354Spjd              lit = 0; op++; /* start run */
273219354Spjd            }
274219354Spjd        }
275219354Spjd    }
276219354Spjd
277219354Spjd  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
278219354Spjd    return 0;
279219354Spjd
280219354Spjd  while (ip < in_end)
281219354Spjd    {
282219354Spjd      lit++; *op++ = *ip++;
283219354Spjd
284219354Spjd      if (expect_false (lit == MAX_LIT))
285219354Spjd        {
286219354Spjd          op [- lit - 1] = lit - 1; /* stop run */
287219354Spjd          lit = 0; op++; /* start run */
288219354Spjd        }
289219354Spjd    }
290219354Spjd
291219354Spjd  op [- lit - 1] = lit - 1; /* end run */
292219354Spjd  op -= !lit; /* undo run if length is zero */
293219354Spjd
294219354Spjd  return op - (u8 *)out_data;
295219354Spjd}
296219354Spjd
297219354Spjd#if AVOID_ERRNO
298219354Spjd# define SET_ERRNO(n)
299219354Spjd#else
300219354Spjd# include <errno.h>
301219354Spjd# define SET_ERRNO(n) errno = (n)
302219354Spjd#endif
303219354Spjd
304219354Spjd#if (__i386 || __amd64) && __GNUC__ >= 3
305219354Spjd# define lzf_movsb(dst, src, len)                \
306219354Spjd   asm ("rep movsb"                              \
307219354Spjd        : "=D" (dst), "=S" (src), "=c" (len)     \
308219354Spjd        :  "0" (dst),  "1" (src),  "2" (len));
309219354Spjd#endif
310219354Spjd
311219354Spjdunsigned int
312219354Spjdlzf_decompress (const void *const in_data,  unsigned int in_len,
313219354Spjd                void             *out_data, unsigned int out_len)
314219354Spjd{
315219354Spjd  u8 const *ip = (const u8 *)in_data;
316219354Spjd  u8       *op = (u8 *)out_data;
317219354Spjd  u8 const *const in_end  = ip + in_len;
318219354Spjd  u8       *const out_end = op + out_len;
319219354Spjd
320219354Spjd  do
321219354Spjd    {
322219354Spjd      unsigned int ctrl = *ip++;
323219354Spjd
324219354Spjd      if (ctrl < (1 << 5)) /* literal run */
325219354Spjd        {
326219354Spjd          ctrl++;
327219354Spjd
328219354Spjd          if (op + ctrl > out_end)
329219354Spjd            {
330219354Spjd              SET_ERRNO (E2BIG);
331219354Spjd              return 0;
332219354Spjd            }
333219354Spjd
334219354Spjd#if CHECK_INPUT
335219354Spjd          if (ip + ctrl > in_end)
336219354Spjd            {
337219354Spjd              SET_ERRNO (EINVAL);
338219354Spjd              return 0;
339219354Spjd            }
340219354Spjd#endif
341219354Spjd
342219354Spjd#ifdef lzf_movsb
343219354Spjd          lzf_movsb (op, ip, ctrl);
344219354Spjd#else
345219354Spjd          do
346219354Spjd            *op++ = *ip++;
347219354Spjd          while (--ctrl);
348219354Spjd#endif
349219354Spjd        }
350219354Spjd      else /* back reference */
351219354Spjd        {
352219354Spjd          unsigned int len = ctrl >> 5;
353219354Spjd
354219354Spjd          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
355219354Spjd
356219354Spjd#if CHECK_INPUT
357219354Spjd          if (ip >= in_end)
358219354Spjd            {
359219354Spjd              SET_ERRNO (EINVAL);
360219354Spjd              return 0;
361219354Spjd            }
362219354Spjd#endif
363219354Spjd          if (len == 7)
364219354Spjd            {
365219354Spjd              len += *ip++;
366219354Spjd#if CHECK_INPUT
367219354Spjd              if (ip >= in_end)
368219354Spjd                {
369219354Spjd                  SET_ERRNO (EINVAL);
370219354Spjd                  return 0;
371219354Spjd                }
372219354Spjd#endif
373219354Spjd            }
374219354Spjd
375219354Spjd          ref -= *ip++;
376219354Spjd
377219354Spjd          if (op + len + 2 > out_end)
378219354Spjd            {
379219354Spjd              SET_ERRNO (E2BIG);
380219354Spjd              return 0;
381219354Spjd            }
382219354Spjd
383219354Spjd          if (ref < (u8 *)out_data)
384219354Spjd            {
385219354Spjd              SET_ERRNO (EINVAL);
386219354Spjd              return 0;
387219354Spjd            }
388219354Spjd
389219354Spjd#ifdef lzf_movsb
390219354Spjd          len += 2;
391219354Spjd          lzf_movsb (op, ref, len);
392219354Spjd#else
393219354Spjd          *op++ = *ref++;
394219354Spjd          *op++ = *ref++;
395219354Spjd
396219354Spjd          do
397219354Spjd            *op++ = *ref++;
398219354Spjd          while (--len);
399219354Spjd#endif
400219354Spjd        }
401219354Spjd    }
402219354Spjd  while (ip < in_end);
403219354Spjd
404219354Spjd  return op - (u8 *)out_data;
405219354Spjd}
406219354Spjd
407