1/* 2 * $Source: /home/user/PROJECT/WL-520gu-NewUI/src/linux/linux/drivers/char/ftape/compressor/lzrw3.c,v $ 3 * $Revision: 1.1.1.1 $ 4 * $Date: 2008/10/15 03:26:29 $ 5 * 6 * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape. 7 * 8 */ 9 10#include "../compressor/lzrw3.h" /* Defines single exported function "compress". */ 11 12/******************************************************************************/ 13/* */ 14/* LZRW3.C */ 15/* */ 16/******************************************************************************/ 17/* */ 18/* Author : Ross Williams. */ 19/* Date : 30-Jun-1991. */ 20/* Release : 1. */ 21/* */ 22/******************************************************************************/ 23/* */ 24/* This file contains an implementation of the LZRW3 data compression */ 25/* algorithm in C. */ 26/* */ 27/* The algorithm is a general purpose compression algorithm that runs fast */ 28/* and gives reasonable compression. The algorithm is a member of the Lempel */ 29/* Ziv family of algorithms and bases its compression on the presence in the */ 30/* data of repeated substrings. */ 31/* */ 32/* This algorithm is unpatented and the code is public domain. As the */ 33/* algorithm is based on the LZ77 class of algorithms, it is unlikely to be */ 34/* the subject of a patent challenge. */ 35/* */ 36/* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is */ 37/* deterministic and is guaranteed to yield the same compressed */ 38/* representation for a given file each time it is run. */ 39/* */ 40/* The LZRW3 algorithm was originally designed and implemented */ 41/* by Ross Williams on 31-Dec-1990. */ 42/* */ 43/* Here are the results of applying this code, compiled under THINK C 4.0 */ 44/* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */ 45/* */ 46/* +----------------------------------------------------------------+ */ 47/* | DATA COMPRESSION TEST | */ 48/* | ===================== | */ 49/* | Time of run : Sun 30-Jun-1991 09:31PM | */ 50/* | Timing accuracy : One part in 100 | */ 51/* | Context length : 262144 bytes (= 256.0000K) | */ 52/* | Test suite : Calgary Corpus Suite | */ 53/* | Files in suite : 14 | */ 54/* | Algorithm : LZRW3 | */ 55/* | Note: All averages are calculated from the un-rounded values. | */ 56/* +----------------------------------------------------------------+ */ 57/* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */ 58/* | ---------- ------ --- ------ ----- ---- ------- ------- | */ 59/* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | */ 60/* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | */ 61/* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | */ 62/* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | */ 63/* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | */ 64/* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | */ 65/* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | */ 66/* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | */ 67/* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | */ 68/* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | */ 69/* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | */ 70/* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | */ 71/* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | */ 72/* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | */ 73/* +----------------------------------------------------------------+ */ 74/* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | */ 75/* +----------------------------------------------------------------+ */ 76/* */ 77/******************************************************************************/ 78 79/******************************************************************************/ 80 81/* The following structure is returned by the "compress" function below when */ 82/* the user asks the function to return identifying information. */ 83/* The most important field in the record is the working memory field which */ 84/* tells the calling program how much working memory should be passed to */ 85/* "compress" when it is called to perform a compression or decompression. */ 86/* LZRW3 uses the same amount of memory during compression and decompression. */ 87/* For more information on this structure see "compress.h". */ 88 89#define U(X) ((ULONG) X) 90#define SIZE_P_BYTE (U(sizeof(UBYTE *))) 91#define SIZE_WORD (U(sizeof(UWORD ))) 92#define ALIGNMENT_FUDGE (U(16)) 93#define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE ) 94 95static struct compress_identity identity = 96{ 97 U(0x032DDEA8), /* Algorithm identification number. */ 98 MEM_REQ, /* Working memory (bytes) required. */ 99 "LZRW3", /* Name of algorithm. */ 100 "1.0", /* Version number of algorithm. */ 101 "31-Dec-1990", /* Date of algorithm. */ 102 "Public Domain", /* Copyright notice. */ 103 "Ross N. Williams", /* Author of algorithm. */ 104 "Renaissance Software", /* Affiliation of author. */ 105 "Public Domain" /* Vendor of algorithm. */ 106}; 107 108LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *); 109LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *); 110 111/******************************************************************************/ 112 113/* This function is the only function exported by this module. */ 114/* Depending on its first parameter, the function can be requested to */ 115/* compress a block of memory, decompress a block of memory, or to identify */ 116/* itself. For more information, see the specification file "compress.h". */ 117 118EXPORT void lzrw3_compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len) 119UWORD action; /* Action to be performed. */ 120UBYTE *wrk_mem; /* Address of working memory we can use. */ 121UBYTE *src_adr; /* Address of input data. */ 122LONG src_len; /* Length of input data. */ 123UBYTE *dst_adr; /* Address to put output data. */ 124void *p_dst_len; /* Address of longword for length of output data. */ 125{ 126 switch (action) 127 { 128 case COMPRESS_ACTION_IDENTITY: 129 *((struct compress_identity **)p_dst_len)= &identity; 130 break; 131 case COMPRESS_ACTION_COMPRESS: 132 compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); 133 break; 134 case COMPRESS_ACTION_DECOMPRESS: 135 compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); 136 break; 137 } 138} 139 140/******************************************************************************/ 141/* */ 142/* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM */ 143/* ======================================== */ 144/* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that */ 145/* instead of transmitting history offsets, it transmits hash table indexes. */ 146/* In order to decode the indexes, the decompressor must maintain an */ 147/* identical hash table. Copy items are straightforward:when the decompressor */ 148/* receives a copy item, it simply looks up the hash table to translate the */ 149/* index into a pointer into the data already decompressed. To update the */ 150/* hash table, it replaces the same table entry with a pointer to the start */ 151/* of the newly decoded phrase. The tricky part is with literal items, for at */ 152/* the time that the decompressor receives a literal item the decompressor */ 153/* does not have the three bytes in the Ziv (that the compressor has) to */ 154/* perform the three-byte hash. To solve this problem, in LZRW3, both the */ 155/* compressor and decompressor are wired up so that they "buffer" these */ 156/* literals and update their hash tables only when three bytes are available. */ 157/* This makes the maximum buffering 2 bytes. */ 158/* */ 159/* Replacement of offsets by hash table indexes yields a few percent extra */ 160/* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */ 161/* and LZRW2, but yields better compression. */ 162/* */ 163/* Extra compression could be obtained by using a hash table of depth two. */ 164/* However, increasing the depth above one incurs a significant decrease in */ 165/* compression speed which was not considered worthwhile. Another reason for */ 166/* keeping the depth down to one was to allow easy comparison with the */ 167/* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the */ 168/* use of direct hash indexes. */ 169/* */ 170/* +---+ */ 171/* |___|4095 */ 172/* |___| */ 173/* +---------------------*_|<---+ /----+---\ */ 174/* | |___| +---|Hash | */ 175/* | |___| |Function| */ 176/* | |___| \--------/ */ 177/* | |___|0 ^ */ 178/* | +---+ | */ 179/* | Hash +-----+ */ 180/* | Table | */ 181/* | --- */ 182/* v ^^^ */ 183/* +-------------------------------------|----------------+ */ 184/* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ 185/* +-------------------------------------|----------------+ */ 186/* | |1......18| | */ 187/* |<------- Lempel=History ------------>|<--Ziv-->| | */ 188/* | (=bytes already processed) |<-Still to go-->| */ 189/* |<-------------------- INPUT BLOCK ------------------->| */ 190/* */ 191/* The diagram above for LZRW3 looks almost identical to the diagram for */ 192/* LZRW1. The difference is that in LZRW3, the compressor transmits hash */ 193/* table indices instead of Lempel offsets. For this to work, the */ 194/* decompressor must maintain a hash table as well as the compressor and both */ 195/* compressor and decompressor must "buffer" literals, as the decompressor */ 196/* cannot hash phrases commencing with a literal until another two bytes have */ 197/* arrived. */ 198/* */ 199/* LZRW3 Algorithm Execution Summary */ 200/* --------------------------------- */ 201/* 1. Hash the first three bytes of the Ziv to yield a hash table index h. */ 202/* 2. Look up the hash table yielding history pointer p. */ 203/* 3. Match where p points with the Ziv. If there is a match of three or */ 204/* more bytes, code those bytes (in the Ziv) as a copy item, otherwise */ 205/* code the next byte in the Ziv as a literal item. */ 206/* 4. Update the hash table as possible subject to the constraint that only */ 207/* phrases commencing three bytes back from the Ziv can be hashed and */ 208/* entered into the hash table. (This enables the decompressor to keep */ 209/* pace). See the description and code for more details. */ 210/* */ 211/******************************************************************************/ 212/* */ 213/* DEFINITION OF COMPRESSED FILE FORMAT */ 214/* ==================================== */ 215/* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */ 216/* * The copy flag CF uses up four bytes with the first byte being the */ 217/* least significant. */ 218/* * If CF=1, then the compressed file represents the remainder of the file */ 219/* exactly. Otherwise CF=0 and the remainder of the file consists of zero */ 220/* or more GROUPS, each of which represents one or more bytes. */ 221/* * Each group consists of two bytes of CONTROL information followed by */ 222/* sixteen ITEMs except for the last group which can contain from one */ 223/* to sixteen items. */ 224/* * An item can be either a LITERAL item or a COPY item. */ 225/* * Each item corresponds to a bit in the control bytes. */ 226/* * The first control byte corresponds to the first 8 items in the group */ 227/* with bit 0 corresponding to the first item in the group and bit 7 to */ 228/* the eighth item in the group. */ 229/* * The second control byte corresponds to the second 8 items in the group */ 230/* with bit 0 corresponding to the ninth item in the group and bit 7 to */ 231/* the sixteenth item in the group. */ 232/* * A zero bit in a control word means that the corresponding item is a */ 233/* literal item. A one bit corresponds to a copy item. */ 234/* * A literal item consists of a single byte which represents itself. */ 235/* * A copy item consists of two bytes that represent from 3 to 18 bytes. */ 236/* * The first byte in a copy item will be denoted C1. */ 237/* * The second byte in a copy item will be denoted C2. */ 238/* * Bits will be selected using square brackets. */ 239/* For example: C1[0..3] is the low nibble of the first control byte. */ 240/* of copy item C1. */ 241/* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */ 242/* in the range [3,18]. */ 243/* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */ 244/* is a number in the range [0,4095]. */ 245/* * A copy item represents the sequence of bytes */ 246/* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */ 247/* text is the entire text of the uncompressed string. */ 248/* POS is the index in the text of the character following the */ 249/* string represented by all the items preceeding the item */ 250/* being defined. */ 251/* OFFSET is obtained from INDEX by looking up the hash table. */ 252/* */ 253/******************************************************************************/ 254 255/* The following #define defines the length of the copy flag that appears at */ 256/* the start of the compressed file. The value of four bytes was chosen */ 257/* because the fast_copy routine on my Macintosh runs faster if the source */ 258/* and destination blocks are relatively longword aligned. */ 259/* The actual flag data appears in the first byte. The rest are zeroed so as */ 260/* to normalize the compressed representation (i.e. not non-deterministic). */ 261#define FLAG_BYTES 4 262 263/* The following #defines define the meaning of the values of the copy */ 264/* flag at the start of the compressed file. */ 265#define FLAG_COMPRESS 0 /* Signals that output was result of compression. */ 266#define FLAG_COPY 1 /* Signals that output was simply copied over. */ 267 268/* The 68000 microprocessor (on which this algorithm was originally developed */ 269/* is fussy about non-aligned arrays of words. To avoid these problems the */ 270/* following macro can be used to "waste" from 0 to 3 bytes so as to align */ 271/* the argument pointer. */ 272#define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1)) 273 274 275/* The following constant defines the maximum length of an uncompressed item. */ 276/* This definition must not be changed; its value is hardwired into the code. */ 277/* The longest number of bytes that can be spanned by a single item is 18 */ 278/* for the longest copy item. */ 279#define MAX_RAW_ITEM (18) 280 281/* The following constant defines the maximum length of an uncompressed group.*/ 282/* This definition must not be changed; its value is hardwired into the code. */ 283/* A group contains at most 16 items which explains this definition. */ 284#define MAX_RAW_GROUP (16*MAX_RAW_ITEM) 285 286/* The following constant defines the maximum length of a compressed group. */ 287/* This definition must not be changed; its value is hardwired into the code. */ 288/* A compressed group consists of two control bytes followed by up to 16 */ 289/* compressed items each of which can have a maximum length of two bytes. */ 290#define MAX_CMP_GROUP (2+16*2) 291 292/* The following constant defines the number of entries in the hash table. */ 293/* This definition must not be changed; its value is hardwired into the code. */ 294#define HASH_TABLE_LENGTH (4096) 295 296/* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable */ 297/* the compressor and decompressor to stay in step maintaining identical hash */ 298/* tables. In an early version of the algorithm, the tables were simply */ 299/* initialized to zero and a check for zero was included just before the */ 300/* matching code. However, this test costs time. A better solution is to */ 301/* initialize all the entries in the hash table to point to a constant */ 302/* string. The decompressor does the same. This solution requires no extra */ 303/* test. The contents of the string do not matter so long as the string is */ 304/* the same for the compressor and decompressor and contains at least */ 305/* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */ 306/* have white space problems (e.g. there is no chance that the compiler will */ 307/* replace more than one space by a TAB) and because they make the length of */ 308/* the string obvious by inspection. */ 309#define START_STRING_18 ((UBYTE *) "123456789012345678") 310 311/* In this algorithm, hash values have to be calculated at more than one */ 312/* point. The following macro neatens the code up for this. */ 313#define HASH(PTR) \ 314 (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF) 315 316/******************************************************************************/ 317 318LOCAL void compress_compress 319 (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len) 320/* Input : Hand over the required amount of working memory in p_wrk_mem. */ 321/* Input : Specify input block using p_src_first and src_len. */ 322/* Input : Point p_dst_first to the start of the output zone (OZ). */ 323/* Input : Point p_dst_len to a ULONG to receive the output length. */ 324/* Input : Input block and output zone must not overlap. */ 325/* Output : Length of output block written to *p_dst_len. */ 326/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */ 327/* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/ 328/* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */ 329UBYTE *p_wrk_mem; 330UBYTE *p_src_first; 331ULONG src_len; 332UBYTE *p_dst_first; 333LONG *p_dst_len; 334{ 335 /* p_src and p_dst step through the source and destination blocks. */ 336 register UBYTE *p_src = p_src_first; 337 register UBYTE *p_dst = p_dst_first; 338 339 /* The following variables are never modified and are used in the */ 340 /* calculations that determine when the main loop terminates. */ 341 UBYTE *p_src_post = p_src_first+src_len; 342 UBYTE *p_dst_post = p_dst_first+src_len; 343 UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; 344 UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; 345 346 /* The variables 'p_control' and 'control' are used to buffer control bits. */ 347 /* Before each group is processed, the next two bytes of the output block */ 348 /* are set aside for the control word for the group about to be processed. */ 349 /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ 350 /* 'control' buffers the control bits being generated during the processing */ 351 /* of the group. Instead of having a counter to keep track of how many items */ 352 /* have been processed (=the number of bits in the control word), at the */ 353 /* start of each group, the top word of 'control' is filled with 1 bits. */ 354 /* As 'control' is shifted for each item, the 1 bits in the top word are */ 355 /* absorbed or destroyed. When they all run out (i.e. when the top word is */ 356 /* all zero bits, we know that we are at the end of a group. */ 357# define TOPWORD 0xFFFF0000 358 UBYTE *p_control; 359 register ULONG control=TOPWORD; 360 361 /* THe variable 'hash' always points to the first element of the hash table. */ 362 UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); 363 364 /* The following two variables represent the literal buffer. p_h1 points to */ 365 /* the hash table entry corresponding to the youngest literal. p_h2 points */ 366 /* to the hash table entry corresponding to the second youngest literal. */ 367 /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */ 368 /* literal. The variables are initialized to zero meaning an empty "buffer". */ 369 UBYTE **p_h1=0; 370 UBYTE **p_h2=0; 371 372 /* To start, we write the flag bytes. Being optimistic, we set the flag to */ 373 /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ 374 /* algorithm deterministic. */ 375 *p_dst++=FLAG_COMPRESS; 376 {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} 377 378 /* Reserve the first word of output as the control word for the first group. */ 379 /* Note: This is undone at the end if the input block is empty. */ 380 p_control=p_dst; p_dst+=2; 381 382 /* Initialize all elements of the hash table to point to a constant string. */ 383 /* Use of an unrolled loop speeds this up considerably. */ 384 {UWORD i; UBYTE **p_h=hash; 385# define ZH *p_h++=START_STRING_18 386 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ 387 {ZH;ZH;ZH;ZH; 388 ZH;ZH;ZH;ZH; 389 ZH;ZH;ZH;ZH; 390 ZH;ZH;ZH;ZH;} 391 } 392 393 /* The main loop processes either 1 or 16 items per iteration. As its */ 394 /* termination logic is complicated, I have opted for an infinite loop */ 395 /* structure containing 'break' and 'goto' statements. */ 396 while (TRUE) 397 {/* Begin main processing loop. */ 398 399 /* Note: All the variables here except unroll should be defined within */ 400 /* the inner loop. Unfortunately the loop hasn't got a block. */ 401 register UBYTE *p; /* Scans through targ phrase during matching. */ 402 register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */ 403 register UWORD unroll; /* Loop counter for unrolled inner loop. */ 404 register UWORD index; /* Index of current hash table entry. */ 405 register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */ 406 407 /* Test for overrun and jump to overrun code if necessary. */ 408 if (p_dst>p_dst_post) 409 goto overrun; 410 411 /* The following cascade of if statements efficiently catches and deals */ 412 /* with varying degrees of closeness to the end of the input block. */ 413 /* When we get very close to the end, we stop updating the table and */ 414 /* code the remaining bytes as literals. This makes the code simpler. */ 415 unroll=16; 416 if (p_src>p_src_max16) 417 { 418 unroll=1; 419 if (p_src>p_src_max1) 420 { 421 if (p_src==p_src_post) 422 break; 423 else 424 goto literal; 425 } 426 } 427 428 /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ 429 /* or 16) items. I have chosen to implement this loop with labels and */ 430 /* gotos to heighten the ease with which the loop may be implemented with */ 431 /* a single decrement and branch instruction in assembly language and */ 432 /* also because the labels act as highly readable place markers. */ 433 /* (Also because we jump into the loop for endgame literals (see above)). */ 434 435 begin_unrolled_loop: 436 437 /* To process the next phrase, we hash the next three bytes and use */ 438 /* the resultant hash table index to look up the hash table. A pointer */ 439 /* to the entry is stored in p_h0 so as to avoid an array lookup. The */ 440 /* hash table entry *p_h0 is looked up yielding a pointer p to a */ 441 /* potential match of the Ziv in the history. */ 442 index=HASH(p_src); 443 p_h0=&hash[index]; 444 p=*p_h0; 445 446 /* Having looked up the candidate position, we are in a position to */ 447 /* attempt a match. The match loop has been unrolled using the PS */ 448 /* macro so that failure within the first three bytes automatically */ 449 /* results in the literal branch being taken. The coding is simple. */ 450 /* p_ziv saves p_src so we can let p_src wander. */ 451# define PS *p++!=*p_src++ 452 p_ziv=p_src; 453 if (PS || PS || PS) 454 { 455 /* Literal. */ 456 457 /* Code the literal byte as itself and a zero control bit. */ 458 p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; 459 460 /* We have just coded a literal. If we had two pending ones, that */ 461 /* makes three and we can update the hash table. */ 462 if (p_h2!=0) 463 {*p_h2=p_ziv-2;} 464 465 /* In any case, rotate the hash table pointers for next time. */ 466 p_h2=p_h1; p_h1=p_h0; 467 468 } 469 else 470 { 471 /* Copy */ 472 473 /* Match up to 15 remaining bytes using an unrolled loop and code. */ 474 if ( 475 !( PS || PS || PS || PS || PS || PS || PS || PS || 476 PS || PS || PS || PS || PS || PS || PS ) 477 ) p_src++; 478 *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3); 479 *p_dst++=index&0xFF; 480 481 /* As we have just coded three bytes, we are now in a position to */ 482 /* update the hash table with the literal bytes that were pending */ 483 /* upon the arrival of extra context bytes. */ 484 if (p_h1!=0) 485 { 486 if (p_h2!=0) 487 {*p_h2=p_ziv-2; p_h2=0;} 488 *p_h1=p_ziv-1; p_h1=0; 489 } 490 491 /* In any case, we can update the hash table based on the current */ 492 /* position as we just coded at least three bytes in a copy items. */ 493 *p_h0=p_ziv; 494 495 } 496 control>>=1; 497 498 /* This loop is all set up for a decrement and jump instruction! */ 499#ifndef linux 500` end_unrolled_loop: if (--unroll) goto begin_unrolled_loop; 501#else 502 /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop; 503#endif 504 505 /* At this point it will nearly always be the end of a group in which */ 506 /* case, we have to do some control-word processing. However, near the */ 507 /* end of the input block, the inner unrolled loop is only executed once. */ 508 /* This necessitates the 'if' test. */ 509 if ((control&TOPWORD)==0) 510 { 511 /* Write the control word to the place we saved for it in the output. */ 512 *p_control++= control &0xFF; 513 *p_control = (control>>8) &0xFF; 514 515 /* Reserve the next word in the output block for the control word */ 516 /* for the group about to be processed. */ 517 p_control=p_dst; p_dst+=2; 518 519 /* Reset the control bits buffer. */ 520 control=TOPWORD; 521 } 522 523 } /* End main processing loop. */ 524 525 /* After the main processing loop has executed, all the input bytes have */ 526 /* been processed. However, the control word has still to be written to the */ 527 /* word reserved for it in the output at the start of the most recent group. */ 528 /* Before writing, the control word has to be shifted so that all the bits */ 529 /* are in the right place. The "empty" bit positions are filled with 1s */ 530 /* which partially fill the top word. */ 531 while(control&TOPWORD) control>>=1; 532 *p_control++= control &0xFF; 533 *p_control++=(control>>8) &0xFF; 534 535 /* If the last group contained no items, delete the control word too. */ 536 if (p_control==p_dst) p_dst-=2; 537 538 /* Write the length of the output block to the dst_len parameter and return. */ 539 *p_dst_len=p_dst-p_dst_first; 540 return; 541 542 /* Jump here as soon as an overrun is detected. An overrun is defined to */ 543 /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */ 544 /* length of the output written so far exceeds the length of the input block.*/ 545 /* The algorithm checks for overruns at least at the end of each group */ 546 /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */ 547 /* Once an overrun occurs, the only thing to do is to set the copy flag and */ 548 /* copy the input over. */ 549 overrun: 550 fast_copy(p_src_first,p_dst_first,src_len); 551 *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */ 552} 553 554/******************************************************************************/ 555 556LOCAL void compress_decompress 557 (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len) 558/* Input : Hand over the required amount of working memory in p_wrk_mem. */ 559/* Input : Specify input block using p_src_first and src_len. */ 560/* Input : Point p_dst_first to the start of the output zone. */ 561/* Input : Point p_dst_len to a ULONG to receive the output length. */ 562/* Input : Input block and output zone must not overlap. User knows */ 563/* Input : upperbound on output block length from earlier compression. */ 564/* Input : In any case, maximum expansion possible is nine times. */ 565/* Output : Length of output block written to *p_dst_len. */ 566/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ 567/* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ 568UBYTE *p_wrk_mem; 569UBYTE *p_src_first; 570LONG src_len; 571UBYTE *p_dst_first; 572ULONG *p_dst_len; 573{ 574 /* Byte pointers p_src and p_dst scan through the input and output blocks. */ 575 register UBYTE *p_src = p_src_first+FLAG_BYTES; 576 register UBYTE *p_dst = p_dst_first; 577 /* we need to avoid a SEGV when trying to uncompress corrupt data */ 578 register UBYTE *p_dst_post = p_dst_first + *p_dst_len; 579 580 /* The following two variables are never modified and are used to control */ 581 /* the main loop. */ 582 UBYTE *p_src_post = p_src_first+src_len; 583 UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2); 584 585 /* The hash table is the only resident of the working memory. The hash table */ 586 /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */ 587 /* keep Macintoshes happy, it is longword aligned. */ 588 UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); 589 590 /* The variable 'control' is used to buffer the control bits which appear in */ 591 /* groups of 16 bits (control words) at the start of each compressed group. */ 592 /* When each group is read, bit 16 of the register is set to one. Whenever */ 593 /* a new bit is needed, the register is shifted right. When the value of the */ 594 /* register becomes 1, we know that we have reached the end of a group. */ 595 /* Initializing the register to 1 thus instructs the code to follow that it */ 596 /* should read a new control word immediately. */ 597 register ULONG control=1; 598 599 /* The value of 'literals' is always in the range 0..3. It is the number of */ 600 /* consecutive literal items just seen. We have to record this number so as */ 601 /* to know when to update the hash table. When literals gets to 3, there */ 602 /* have been three consecutive literals and we can update at the position of */ 603 /* the oldest of the three. */ 604 register UWORD literals=0; 605 606 /* Check the leading copy flag to see if the compressor chose to use a copy */ 607 /* operation instead of a compression operation. If a copy operation was */ 608 /* used, then all we need to do is copy the data over, set the output length */ 609 /* and return. */ 610 if ( src_len < 0 ) 611 { 612 fast_copy(p_src_first,p_dst_first,-src_len ); 613 *p_dst_len = (ULONG)-src_len; 614 return; 615 } 616 617 /* Initialize all elements of the hash table to point to a constant string. */ 618 /* Use of an unrolled loop speeds this up considerably. */ 619 {UWORD i; UBYTE **p_h=hash; 620# define ZJ *p_h++=START_STRING_18 621 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ 622 {ZJ;ZJ;ZJ;ZJ; 623 ZJ;ZJ;ZJ;ZJ; 624 ZJ;ZJ;ZJ;ZJ; 625 ZJ;ZJ;ZJ;ZJ;} 626 } 627 628 /* The outer loop processes either 1 or 16 items per iteration depending on */ 629 /* how close p_src is to the end of the input block. */ 630 while (p_src!=p_src_post) 631 {/* Start of outer loop */ 632 633 register UWORD unroll; /* Counts unrolled loop executions. */ 634 635 /* When 'control' has the value 1, it means that the 16 buffered control */ 636 /* bits that were read in at the start of the current group have all been */ 637 /* shifted out and that all that is left is the 1 bit that was injected */ 638 /* into bit 16 at the start of the current group. When we reach the end */ 639 /* of a group, we have to load a new control word and inject a new 1 bit. */ 640 if (control==1) 641 { 642 control=0x10000|*p_src++; 643 control|=(*p_src++)<<8; 644 } 645 646 /* If it is possible that we are within 16 groups from the end of the */ 647 /* input, execute the unrolled loop only once, else process a whole group */ 648 /* of 16 items by looping 16 times. */ 649 unroll= p_src<=p_src_max16 ? 16 : 1; 650 651 /* This inner loop processes one phrase (item) per iteration. */ 652 while (unroll--) 653 { /* Begin unrolled inner loop. */ 654 655 /* Process a literal or copy item depending on the next control bit. */ 656 if (control&1) 657 { 658 /* Copy item. */ 659 660 register UBYTE *p; /* Points to place from which to copy. */ 661 register UWORD lenmt; /* Length of copy item minus three. */ 662 register UBYTE **p_hte; /* Pointer to current hash table entry.*/ 663 register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */ 664 665 /* Read and dismantle the copy word. Work out from where to copy. */ 666 lenmt=*p_src++; 667 p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++]; 668 p=*p_hte; 669 lenmt&=0xF; 670 671 /* Now perform the copy using a half unrolled loop. */ 672 *p_dst++=*p++; 673 *p_dst++=*p++; 674 *p_dst++=*p++; 675 while (lenmt--) 676 *p_dst++=*p++; 677 678 /* Because we have just received 3 or more bytes in a copy item */ 679 /* (whose bytes we have just installed in the output), we are now */ 680 /* in a position to flush all the pending literal hashings that had */ 681 /* been postponed for lack of bytes. */ 682 if (literals>0) 683 { 684 register UBYTE *r=p_ziv-literals;; 685 hash[HASH(r)]=r; 686 if (literals==2) 687 {r++; hash[HASH(r)]=r;} 688 literals=0; 689 } 690 691 /* In any case, we can immediately update the hash table with the */ 692 /* current position. We don't need to do a HASH(...) to work out */ 693 /* where to put the pointer, as the compressor just told us!!! */ 694 *p_hte=p_ziv; 695 696 } 697 else 698 { 699 /* Literal item. */ 700 701 /* Copy over the literal byte. */ 702 *p_dst++=*p_src++; 703 704 /* If we now have three literals waiting to be hashed into the hash */ 705 /* table, we can do one of them now (because there are three). */ 706 if (++literals == 3) 707 {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;} 708 } 709 710 /* Shift the control buffer so the next control bit is in bit 0. */ 711 control>>=1; 712 if (p_dst > p_dst_post) 713 { 714 /* Shit: we tried to decompress corrupt data */ 715 *p_dst_len = 0; 716 return; 717 } 718 } /* End unrolled inner loop. */ 719 720 } /* End of outer loop */ 721 722 /* Write the length of the decompressed data before returning. */ 723 *p_dst_len=p_dst-p_dst_first; 724} 725 726/******************************************************************************/ 727/* End of LZRW3.C */ 728/******************************************************************************/ 729