1/* lzo1.c -- implementation of the LZO1 algorithm
2
3   This file is part of the LZO real-time data compression library.
4
5   Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer
6   Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer
7   Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer
8   Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer
9   Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer
10   Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer
11   Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
12   Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
13   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
14   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
15   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
16   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
17   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
18   All Rights Reserved.
19
20   The LZO library is free software; you can redistribute it and/or
21   modify it under the terms of the GNU General Public License as
22   published by the Free Software Foundation; either version 2 of
23   the License, or (at your option) any later version.
24
25   The LZO library is distributed in the hope that it will be useful,
26   but WITHOUT ANY WARRANTY; without even the implied warranty of
27   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28   GNU General Public License for more details.
29
30   You should have received a copy of the GNU General Public License
31   along with the LZO library; see the file COPYING.
32   If not, write to the Free Software Foundation, Inc.,
33   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
34
35   Markus F.X.J. Oberhumer
36   <markus@oberhumer.com>
37   http://www.oberhumer.com/opensource/lzo/
38 */
39
40
41#include "lzo_conf.h"
42#include "lzo/lzo1.h"
43
44
45/***********************************************************************
46// The next two defines can be changed to customize LZO1.
47// The default version is LZO1-5/1.
48************************************************************************/
49
50/* run bits (3 - 5) - the compressor and the decompressor
51 * must use the same value. */
52#if !defined(RBITS)
53#  define RBITS     5
54#endif
55
56/* compression level (1 - 9) - this only affects the compressor.
57 * 1 is fastest, 9 is best compression ratio */
58#if !defined(CLEVEL)
59#  define CLEVEL    1           /* fastest by default */
60#endif
61
62
63/* check configuration */
64#if (RBITS < 3 || RBITS > 5)
65#  error "invalid RBITS"
66#endif
67#if (CLEVEL < 1 || CLEVEL > 9)
68#  error "invalid CLEVEL"
69#endif
70
71
72/***********************************************************************
73// You should not have to change anything below this line.
74************************************************************************/
75
76/*
77     Format of the marker byte
78
79
80     76543210
81     --------
82     00000000   a long run (a 'R0' run) - there are short and long R0 runs
83     000rrrrr   a short run with len r
84     mmmooooo   a short match (len = 2+m, o = offset low bits)
85     111ooooo   a long match (o = offset low bits)
86*/
87
88
89#define RSIZE   (1 << RBITS)
90#define RMASK   (RSIZE - 1)
91
92#define OBITS   RBITS               /* offset and run-length use same bits */
93#define OSIZE   (1 << OBITS)
94#define OMASK   (OSIZE - 1)
95
96#define MBITS   (8 - OBITS)
97#define MSIZE   (1 << MBITS)
98#define MMASK   (MSIZE - 1)
99
100
101/* sanity checks */
102#if (OBITS < 3 || OBITS > 5)
103#  error "invalid OBITS"
104#endif
105#if (MBITS < 3 || MBITS > 5)
106#  error "invalid MBITS"
107#endif
108
109
110/***********************************************************************
111// some macros to improve readability
112************************************************************************/
113
114/* Minimum len of a match */
115#define MIN_MATCH           3
116#define THRESHOLD           (MIN_MATCH - 1)
117
118/* Minimum len of match coded in 2 bytes */
119#define MIN_MATCH_SHORT     MIN_MATCH
120
121/* Maximum len of match coded in 2 bytes */
122#define MAX_MATCH_SHORT     (THRESHOLD + (MSIZE - 2))
123/* MSIZE - 2: 0 is used to indicate runs,
124 *            MSIZE-1 is used to indicate a long match */
125
126/* Minimum len of match coded in 3 bytes */
127#define MIN_MATCH_LONG      (MAX_MATCH_SHORT + 1)
128
129/* Maximum len of match coded in 3 bytes */
130#define MAX_MATCH_LONG      (MIN_MATCH_LONG + 255)
131
132/* Maximum offset of a match */
133#define MAX_OFFSET          (1 << (8 + OBITS))
134
135
136/*
137
138RBITS | MBITS  MIN  THR.  MSIZE  MAXS  MINL  MAXL   MAXO  R0MAX R0FAST
139======+===============================================================
140  3   |   5      3    2     32    32    33    288   2048    263   256
141  4   |   4      3    2     16    16    17    272   4096    271   264
142  5   |   3      3    2      8     8     9    264   8192    287   280
143
144 */
145
146
147/***********************************************************************
148// internal configuration
149// all of these affect compression only
150************************************************************************/
151
152/* choose the hashing strategy */
153#ifndef LZO_HASH
154#define LZO_HASH    LZO_HASH_LZO_INCREMENTAL_A
155#endif
156#define D_INDEX1(d,p)       d = DM(DMUL(0x21,DX2(p,5,5)) >> 5)
157#define D_INDEX2(d,p)       d = d ^ D_MASK
158
159#define DBITS       (8 + RBITS)
160#include "lzo_dict.h"
161#define DVAL_LEN    DVAL_LOOKAHEAD
162
163
164/***********************************************************************
165// get algorithm info, return memory required for compression
166************************************************************************/
167
168LZO_EXTERN(lzo_uint) lzo1_info ( int *rbits, int *clevel );
169
170LZO_PUBLIC(lzo_uint)
171lzo1_info ( int *rbits, int *clevel )
172{
173    if (rbits)
174        *rbits = RBITS;
175    if (clevel)
176        *clevel = CLEVEL;
177    return D_SIZE * lzo_sizeof(lzo_bytep);
178}
179
180
181/***********************************************************************
182// decode a R0 literal run (a long run)
183************************************************************************/
184
185#define R0MIN   (RSIZE)             /* Minimum len of R0 run of literals */
186#define R0MAX   (R0MIN + 255)       /* Maximum len of R0 run of literals */
187#define R0FAST  (R0MAX & ~7u)       /* R0MAX aligned to 8 byte boundary */
188
189#if (R0MAX - R0FAST != 7) || ((R0FAST & 7) != 0)
190#  error "something went wrong"
191#endif
192
193/* 7 special codes from R0FAST+1 .. R0MAX
194 * these codes mean long R0 runs with lengths
195 * 512, 1024, 2048, 4096, 8192, 16384, 32768 */
196
197
198/***********************************************************************
199// LZO1 decompress a block of data.
200//
201// Could be easily translated into assembly code.
202************************************************************************/
203
204LZO_PUBLIC(int)
205lzo1_decompress  ( const lzo_bytep in , lzo_uint  in_len,
206                         lzo_bytep out, lzo_uintp out_len,
207                         lzo_voidp wrkmem )
208{
209    lzo_bytep op;
210    const lzo_bytep ip;
211    const lzo_bytep const ip_end = in + in_len;
212    lzo_uint t;
213
214    LZO_UNUSED(wrkmem);
215
216    op = out;
217    ip = in;
218    while (ip < ip_end)
219    {
220        t = *ip++;  /* get marker */
221
222        if (t < R0MIN)          /* a literal run */
223        {
224            if (t == 0)             /* a R0 literal run */
225            {
226                t = *ip++;
227                if (t >= R0FAST - R0MIN)            /* a long R0 run */
228                {
229                    t -= R0FAST - R0MIN;
230                    if (t == 0)
231                        t = R0FAST;
232                    else
233                    {
234#if 0
235                        t = 256u << ((unsigned) t);
236#else
237                        /* help the optimizer */
238                        lzo_uint tt = 256;
239                        do tt <<= 1; while (--t > 0);
240                        t = tt;
241#endif
242                    }
243                    MEMCPY8_DS(op,ip,t);
244                    continue;
245                }
246                t += R0MIN;
247            }
248            MEMCPY_DS(op,ip,t);
249        }
250        else                    /* a match */
251        {
252            lzo_uint tt;
253            /* get match offset */
254            const lzo_bytep m_pos = op - 1;
255            m_pos -= (lzo_uint)(t & OMASK) | (((lzo_uint) *ip++) << OBITS);
256
257            /* get match len */
258            if (t >= ((MSIZE - 1) << OBITS))                /* all m-bits set */
259                tt = (MIN_MATCH_LONG - THRESHOLD) + *ip++;  /* a long match */
260            else
261                tt = t >> OBITS;                            /* a short match */
262
263            assert(m_pos >= out);
264            assert(m_pos <  op);
265            /* a half unrolled loop */
266            *op++ = *m_pos++;
267            *op++ = *m_pos++;
268            MEMCPY_DS(op,m_pos,tt);
269        }
270    }
271
272    *out_len = pd(op, out);
273
274    /* the next line is the only check in the decompressor ! */
275    return (ip == ip_end ? LZO_E_OK :
276           (ip < ip_end  ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
277}
278
279
280/***********************************************************************
281// code a literal run
282************************************************************************/
283
284static
285#if LZO_ARCH_AVR
286__lzo_noinline
287#endif
288lzo_bytep
289store_run(lzo_bytep op, const lzo_bytep ii, lzo_uint r_len)
290{
291    assert(r_len > 0);
292
293    /* code a long R0 run */
294    if (r_len >= 512)
295    {
296        unsigned r_bits = 7;        /* 256 << 7 == 32768 */
297        do {
298            while (r_len >= (256u << r_bits))
299            {
300                r_len -= (256u << r_bits);
301                *op++ = 0; *op++ = LZO_BYTE((R0FAST - R0MIN) + r_bits);
302                MEMCPY8_DS(op, ii, (256u << r_bits));
303            }
304        } while (--r_bits > 0);
305    }
306    while (r_len >= R0FAST)
307    {
308        r_len -= R0FAST;
309        *op++ = 0; *op++ = R0FAST - R0MIN;
310        MEMCPY8_DS(op, ii, R0FAST);
311    }
312
313    if (r_len >= R0MIN)
314    {
315        /* code a short R0 run */
316        *op++ = 0; *op++ = LZO_BYTE(r_len - R0MIN);
317        MEMCPY_DS(op, ii, r_len);
318    }
319    else if (r_len > 0)
320    {
321        /* code a 'normal' run */
322        *op++ = LZO_BYTE(r_len);
323        MEMCPY_DS(op, ii, r_len);
324    }
325
326    assert(r_len == 0);
327    return op;
328}
329
330
331
332/***********************************************************************
333// LZO1 compress a block of data.
334//
335// Could be translated into assembly code without too much effort.
336//
337// I apologize for the spaghetti code, but it really helps the optimizer.
338************************************************************************/
339
340static int
341do_compress    ( const lzo_bytep in , lzo_uint  in_len,
342                       lzo_bytep out, lzo_uintp out_len,
343                       lzo_voidp wrkmem )
344{
345    const lzo_bytep ip;
346#if defined(__LZO_HASH_INCREMENTAL)
347    lzo_xint dv;
348#endif
349    lzo_bytep op;
350    const lzo_bytep m_pos;
351    const lzo_bytep const ip_end = in+in_len - DVAL_LEN - MIN_MATCH_LONG;
352    const lzo_bytep const in_end = in+in_len - DVAL_LEN;
353    const lzo_bytep ii;
354    lzo_dict_p const dict = (lzo_dict_p) wrkmem;
355
356#if !defined(NDEBUG)
357    const lzo_bytep m_pos_sav;
358#endif
359
360    op = out;
361    ip = in;
362    ii = ip;                /* point to start of literal run */
363    if (in_len <= MIN_MATCH_LONG + DVAL_LEN + 1)
364        goto the_end;
365
366    /* init dictionary */
367#if defined(LZO_DETERMINISTIC)
368    BZERO8_PTR(wrkmem,sizeof(lzo_dict_t),D_SIZE);
369#endif
370
371    DVAL_FIRST(dv,ip);
372    UPDATE_D(dict,0,dv,ip,in);
373    ip++;
374    DVAL_NEXT(dv,ip);
375
376    do {
377        lzo_uint m_off;
378        lzo_uint dindex;
379
380        DINDEX1(dindex,ip);
381        GINDEX(m_pos,m_off,dict,dindex,in);
382        if (LZO_CHECK_MPOS(m_pos,m_off,in,ip,MAX_OFFSET))
383            goto literal;
384        if (m_pos[0] == ip[0] && m_pos[1] == ip[1] && m_pos[2] == ip[2])
385            goto match;
386        DINDEX2(dindex,ip);
387        GINDEX(m_pos,m_off,dict,dindex,in);
388        if (LZO_CHECK_MPOS(m_pos,m_off,in,ip,MAX_OFFSET))
389            goto literal;
390        if (m_pos[0] == ip[0] && m_pos[1] == ip[1] && m_pos[2] == ip[2])
391            goto match;
392        goto literal;
393
394
395literal:
396        UPDATE_I(dict,0,dindex,ip,in);
397        if (++ip >= ip_end)
398            break;
399        continue;
400
401match:
402        UPDATE_I(dict,0,dindex,ip,in);
403#if !defined(NDEBUG) && defined(LZO_DICT_USE_PTR)
404        m_pos_sav = m_pos;
405#endif
406        m_pos += 3;
407        {
408    /* we have found a match (of at least length 3) */
409#if !defined(NDEBUG) && !defined(LZO_DICT_USE_PTR)
410            assert((m_pos_sav = ip - m_off) == (m_pos - 3));
411#endif
412            /* 1) store the current literal run */
413            if (pd(ip,ii) > 0)
414            {
415                lzo_uint t = pd(ip,ii);
416#if 1
417                /* OPTIMIZED: inline the copying of a short run */
418                if (t < R0MIN)
419                {
420                    *op++ = LZO_BYTE(t);
421                    MEMCPY_DS(op, ii, t);
422                }
423                else
424#endif
425                    op = store_run(op,ii,t);
426            }
427
428            /* 2a) compute match len */
429            ii = ip;        /* point to start of current match */
430
431            /* we already matched MIN_MATCH bytes,
432             * m_pos also already advanced MIN_MATCH bytes */
433            ip += MIN_MATCH;
434            assert(m_pos < ip);
435
436            /* try to match another MIN_MATCH_LONG - MIN_MATCH bytes
437             * to see if we get a long match */
438
439#define PS  *m_pos++ != *ip++
440
441#if (MIN_MATCH_LONG - MIN_MATCH == 2)                   /* MBITS == 2 */
442            if (PS || PS)
443#elif (MIN_MATCH_LONG - MIN_MATCH == 6)                 /* MBITS == 3 */
444            if (PS || PS || PS || PS || PS || PS)
445#elif (MIN_MATCH_LONG - MIN_MATCH == 14)                /* MBITS == 4 */
446            if (PS || PS || PS || PS || PS || PS || PS ||
447                PS || PS || PS || PS || PS || PS || PS)
448#elif (MIN_MATCH_LONG - MIN_MATCH == 30)                /* MBITS == 5 */
449            if (PS || PS || PS || PS || PS || PS || PS || PS ||
450                PS || PS || PS || PS || PS || PS || PS || PS ||
451                PS || PS || PS || PS || PS || PS || PS || PS ||
452                PS || PS || PS || PS || PS || PS)
453#else
454#  error "MBITS not yet implemented"
455#endif
456            {
457                lzo_uint m_len;
458
459            /* 2b) code a short match */
460                    assert(pd(ip,m_pos) == m_off);
461                --ip;   /* ran one too far, point back to non-match */
462                m_len = pd(ip, ii);
463                    assert(m_len >= MIN_MATCH_SHORT);
464                    assert(m_len <= MAX_MATCH_SHORT);
465                    assert(m_off > 0);
466                    assert(m_off <= MAX_OFFSET);
467                    assert(ii-m_off == m_pos_sav);
468                    assert(lzo_memcmp(m_pos_sav,ii,m_len) == 0);
469                --m_off;
470                /* code short match len + low offset bits */
471                *op++ = LZO_BYTE(((m_len - THRESHOLD) << OBITS) |
472                                 (m_off & OMASK));
473                /* code high offset bits */
474                *op++ = LZO_BYTE(m_off >> OBITS);
475
476
477            /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
478
479#define SI      /* nothing */
480#define DI      ++ii; DVAL_NEXT(dv,ii); UPDATE_D(dict,0,dv,ii,in);
481#define XI      assert(ii < ip); ii = ip; DVAL_FIRST(dv,(ip));
482
483#if (CLEVEL == 9) || (CLEVEL >= 7 && MBITS <= 4) || (CLEVEL >= 5 && MBITS <= 3)
484            /* Insert the whole match (ii+1)..(ip-1) into dictionary.  */
485                ++ii;
486                do {
487                    DVAL_NEXT(dv,ii);
488                    UPDATE_D(dict,0,dv,ii,in);
489                } while (++ii < ip);
490                DVAL_NEXT(dv,ii);
491                assert(ii == ip);
492                DVAL_ASSERT(dv,ip);
493#elif (CLEVEL >= 3)
494                SI   DI DI   XI
495#elif (CLEVEL >= 2)
496                SI   DI      XI
497#else
498                             XI
499#endif
500
501            }
502            else
503            {
504            /* we've found a long match - see how far we can still go */
505                const lzo_bytep end;
506                lzo_uint m_len;
507
508                assert(ip <= in_end);
509                assert(ii == ip - MIN_MATCH_LONG);
510
511                if (pd(in_end,ip) <= (MAX_MATCH_LONG - MIN_MATCH_LONG))
512                    end = in_end;
513                else
514                {
515                    end = ip + (MAX_MATCH_LONG - MIN_MATCH_LONG);
516                    assert(end < in_end);
517                }
518
519                while (ip < end  &&  *m_pos == *ip)
520                    m_pos++, ip++;
521                assert(ip <= in_end);
522
523            /* 2b) code the long match */
524                m_len = pd(ip, ii);
525                    assert(m_len >= MIN_MATCH_LONG);
526                    assert(m_len <= MAX_MATCH_LONG);
527                    assert(m_off > 0);
528                    assert(m_off <= MAX_OFFSET);
529                    assert(ii-m_off == m_pos_sav);
530                    assert(lzo_memcmp(m_pos_sav,ii,m_len) == 0);
531                    assert(pd(ip,m_pos) == m_off);
532                --m_off;
533                /* code long match flag + low offset bits */
534                *op++ = LZO_BYTE(((MSIZE - 1) << OBITS) | (m_off & OMASK));
535                /* code high offset bits */
536                *op++ = LZO_BYTE(m_off >> OBITS);
537                /* code match len */
538                *op++ = LZO_BYTE(m_len - MIN_MATCH_LONG);
539
540
541            /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
542#if (CLEVEL == 9)
543            /* Insert the whole match (ii+1)..(ip-1) into dictionary.  */
544            /* This is not recommended because it is slow. */
545                ++ii;
546                do {
547                    DVAL_NEXT(dv,ii);
548                    UPDATE_D(dict,0,dv,ii,in);
549                } while (++ii < ip);
550                DVAL_NEXT(dv,ii);
551                assert(ii == ip);
552                DVAL_ASSERT(dv,ip);
553#elif (CLEVEL >= 8)
554                SI   DI DI DI DI DI DI DI DI   XI
555#elif (CLEVEL >= 7)
556                SI   DI DI DI DI DI DI DI      XI
557#elif (CLEVEL >= 6)
558                SI   DI DI DI DI DI DI         XI
559#elif (CLEVEL >= 5)
560                SI   DI DI DI DI               XI
561#elif (CLEVEL >= 4)
562                SI   DI DI DI                  XI
563#elif (CLEVEL >= 3)
564                SI   DI DI                     XI
565#elif (CLEVEL >= 2)
566                SI   DI                        XI
567#else
568                                               XI
569#endif
570            }
571
572            /* ii now points to the start of next literal run */
573            assert(ii == ip);
574        }
575    } while (ip < ip_end);
576
577
578
579the_end:
580    assert(ip <= in_end);
581
582
583#if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
584    /* return -1 if op == out to indicate that we
585     * couldn't compress and didn't copy anything.
586     */
587    if (op == out)
588    {
589        *out_len = 0;
590        return LZO_E_NOT_COMPRESSIBLE;
591    }
592#endif
593
594
595    /* store the final literal run */
596    if (pd(in_end+DVAL_LEN,ii) > 0)
597        op = store_run(op,ii,pd(in_end+DVAL_LEN,ii));
598
599    *out_len = pd(op, out);
600    return 0;               /* compression went ok */
601}
602
603
604/***********************************************************************
605// compress public entry point.
606************************************************************************/
607
608LZO_PUBLIC(int)
609lzo1_compress ( const lzo_bytep in , lzo_uint  in_len,
610                      lzo_bytep out, lzo_uintp out_len,
611                      lzo_voidp wrkmem )
612{
613    int r = LZO_E_OK;
614
615    /* don't try to compress a block that's too short */
616    if (in_len <= 0)
617        *out_len = 0;
618    else if (in_len <= MIN_MATCH_LONG + DVAL_LEN + 1)
619    {
620#if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
621        r = LZO_E_NOT_COMPRESSIBLE;
622#else
623        *out_len = pd(store_run(out,in,in_len), out);
624#endif
625    }
626    else
627        r = do_compress(in,in_len,out,out_len,wrkmem);
628
629    return r;
630}
631
632
633/*
634vi:ts=4:et
635*/
636