bzip2recover.c revision 1.1.1.2
1/*	$NetBSD: bzip2recover.c,v 1.1.1.2 2012/05/07 00:41:46 wiz Exp $	*/
2
3/*-----------------------------------------------------------*/
4/*--- Block recoverer program for bzip2                   ---*/
5/*---                                      bzip2recover.c ---*/
6/*-----------------------------------------------------------*/
7
8/* ------------------------------------------------------------------
9   This file is part of bzip2/libbzip2, a program and library for
10   lossless, block-sorting data compression.
11
12   bzip2/libbzip2 version 1.0.6 of 6 September 2010
13   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14
15   Please read the WARNING, DISCLAIMER and PATENTS sections in the
16   README file.
17
18   This program is released under the terms of the license contained
19   in the file LICENSE.
20   ------------------------------------------------------------------ */
21
22/* This program is a complete hack and should be rewritten properly.
23	 It isn't very complicated. */
24
25#include <stdio.h>
26#include <errno.h>
27#include <stdlib.h>
28#include <string.h>
29
30
31/* This program records bit locations in the file to be recovered.
32   That means that if 64-bit ints are not supported, we will not
33   be able to recover .bz2 files over 512MB (2^32 bits) long.
34   On GNU supported platforms, we take advantage of the 64-bit
35   int support to circumvent this problem.  Ditto MSVC.
36
37   This change occurred in version 1.0.2; all prior versions have
38   the 512MB limitation.
39*/
40#ifdef __GNUC__
41   typedef  unsigned long long int  MaybeUInt64;
42#  define MaybeUInt64_FMT "%Lu"
43#else
44#ifdef _MSC_VER
45   typedef  unsigned __int64  MaybeUInt64;
46#  define MaybeUInt64_FMT "%I64u"
47#else
48   typedef  unsigned int   MaybeUInt64;
49#  define MaybeUInt64_FMT "%u"
50#endif
51#endif
52
53typedef  unsigned int   UInt32;
54typedef  int            Int32;
55typedef  unsigned char  UChar;
56typedef  char           Char;
57typedef  unsigned char  Bool;
58#define True    ((Bool)1)
59#define False   ((Bool)0)
60
61
62#define BZ_MAX_FILENAME 2000
63
64Char inFileName[BZ_MAX_FILENAME];
65Char outFileName[BZ_MAX_FILENAME];
66Char progName[BZ_MAX_FILENAME];
67
68MaybeUInt64 bytesOut = 0;
69MaybeUInt64 bytesIn  = 0;
70
71
72/*---------------------------------------------------*/
73/*--- Header bytes                                ---*/
74/*---------------------------------------------------*/
75
76#define BZ_HDR_B 0x42                         /* 'B' */
77#define BZ_HDR_Z 0x5a                         /* 'Z' */
78#define BZ_HDR_h 0x68                         /* 'h' */
79#define BZ_HDR_0 0x30                         /* '0' */
80
81
82/*---------------------------------------------------*/
83/*--- I/O errors                                  ---*/
84/*---------------------------------------------------*/
85
86/*---------------------------------------------*/
87static void readError ( void )
88{
89   fprintf ( stderr,
90             "%s: I/O error reading `%s', possible reason follows.\n",
91            progName, inFileName );
92   perror ( progName );
93   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
94             progName );
95   exit ( 1 );
96}
97
98
99/*---------------------------------------------*/
100static void writeError ( void )
101{
102   fprintf ( stderr,
103             "%s: I/O error reading `%s', possible reason follows.\n",
104            progName, inFileName );
105   perror ( progName );
106   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
107             progName );
108   exit ( 1 );
109}
110
111
112/*---------------------------------------------*/
113static void mallocFail ( Int32 n )
114{
115   fprintf ( stderr,
116             "%s: malloc failed on request for %d bytes.\n",
117            progName, n );
118   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
119             progName );
120   exit ( 1 );
121}
122
123
124/*---------------------------------------------*/
125static void tooManyBlocks ( Int32 max_handled_blocks )
126{
127   fprintf ( stderr,
128             "%s: `%s' appears to contain more than %d blocks\n",
129            progName, inFileName, max_handled_blocks );
130   fprintf ( stderr,
131             "%s: and cannot be handled.  To fix, increase\n",
132             progName );
133   fprintf ( stderr,
134             "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
135             progName );
136   exit ( 1 );
137}
138
139
140
141/*---------------------------------------------------*/
142/*--- Bit stream I/O                              ---*/
143/*---------------------------------------------------*/
144
145typedef
146   struct {
147      FILE*  handle;
148      Int32  buffer;
149      Int32  buffLive;
150      Char   mode;
151   }
152   BitStream;
153
154
155/*---------------------------------------------*/
156static BitStream* bsOpenReadStream ( FILE* stream )
157{
158   BitStream *bs = malloc ( sizeof(BitStream) );
159   if (bs == NULL) mallocFail ( sizeof(BitStream) );
160   bs->handle = stream;
161   bs->buffer = 0;
162   bs->buffLive = 0;
163   bs->mode = 'r';
164   return bs;
165}
166
167
168/*---------------------------------------------*/
169static BitStream* bsOpenWriteStream ( FILE* stream )
170{
171   BitStream *bs = malloc ( sizeof(BitStream) );
172   if (bs == NULL) mallocFail ( sizeof(BitStream) );
173   bs->handle = stream;
174   bs->buffer = 0;
175   bs->buffLive = 0;
176   bs->mode = 'w';
177   return bs;
178}
179
180
181/*---------------------------------------------*/
182static void bsPutBit ( BitStream* bs, Int32 bit )
183{
184   if (bs->buffLive == 8) {
185      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
186      if (retVal == EOF) writeError();
187      bytesOut++;
188      bs->buffLive = 1;
189      bs->buffer = bit & 0x1;
190   } else {
191      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
192      bs->buffLive++;
193   };
194}
195
196
197/*---------------------------------------------*/
198/*--
199   Returns 0 or 1, or 2 to indicate EOF.
200--*/
201static Int32 bsGetBit ( BitStream* bs )
202{
203   if (bs->buffLive > 0) {
204      bs->buffLive --;
205      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
206   } else {
207      Int32 retVal = getc ( bs->handle );
208      if ( retVal == EOF ) {
209         if (errno != 0) readError();
210         return 2;
211      }
212      bs->buffLive = 7;
213      bs->buffer = retVal;
214      return ( ((bs->buffer) >> 7) & 0x1 );
215   }
216}
217
218
219/*---------------------------------------------*/
220static void bsClose ( BitStream* bs )
221{
222   Int32 retVal;
223
224   if ( bs->mode == 'w' ) {
225      while ( bs->buffLive < 8 ) {
226         bs->buffLive++;
227         bs->buffer <<= 1;
228      };
229      retVal = putc ( (UChar) (bs->buffer), bs->handle );
230      if (retVal == EOF) writeError();
231      bytesOut++;
232      retVal = fflush ( bs->handle );
233      if (retVal == EOF) writeError();
234   }
235   retVal = fclose ( bs->handle );
236   if (retVal == EOF) {
237      if (bs->mode == 'w') writeError(); else readError();
238   }
239   free ( bs );
240}
241
242
243/*---------------------------------------------*/
244static void bsPutUChar ( BitStream* bs, UChar c )
245{
246   Int32 i;
247   for (i = 7; i >= 0; i--)
248      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
249}
250
251
252/*---------------------------------------------*/
253static void bsPutUInt32 ( BitStream* bs, UInt32 c )
254{
255   Int32 i;
256
257   for (i = 31; i >= 0; i--)
258      bsPutBit ( bs, (c >> i) & 0x1 );
259}
260
261
262/*---------------------------------------------*/
263static Bool endsInBz2 ( Char* name )
264{
265   Int32 n = strlen ( name );
266   if (n <= 4) return False;
267   return
268      (name[n-4] == '.' &&
269       name[n-3] == 'b' &&
270       name[n-2] == 'z' &&
271       name[n-1] == '2');
272}
273
274
275/*---------------------------------------------------*/
276/*---                                             ---*/
277/*---------------------------------------------------*/
278
279/* This logic isn't really right when it comes to Cygwin. */
280#ifdef _WIN32
281#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
282#else
283#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
284#endif
285
286#define BLOCK_HEADER_HI  0x00003141UL
287#define BLOCK_HEADER_LO  0x59265359UL
288
289#define BLOCK_ENDMARK_HI 0x00001772UL
290#define BLOCK_ENDMARK_LO 0x45385090UL
291
292/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
293   would have an uncompressed size of at least 40GB, so the chances
294   are low you'll need to up this.
295*/
296#define BZ_MAX_HANDLED_BLOCKS 50000
297
298MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
299MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
300MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
301MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
302
303Int32 main ( Int32 argc, Char** argv )
304{
305   FILE*       inFile;
306   FILE*       outFile;
307   BitStream*  bsIn, *bsWr;
308   Int32       b, wrBlock, currBlock, rbCtr;
309   MaybeUInt64 bitsRead;
310
311   UInt32      buffHi, buffLo, blockCRC;
312   Char*       p;
313
314   strcpy ( progName, argv[0] );
315   inFileName[0] = outFileName[0] = 0;
316
317   fprintf ( stderr,
318             "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
319
320   if (argc != 2) {
321      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
322                        progName, progName );
323      switch (sizeof(MaybeUInt64)) {
324         case 8:
325            fprintf(stderr,
326                    "\trestrictions on size of recovered file: None\n");
327            break;
328         case 4:
329            fprintf(stderr,
330                    "\trestrictions on size of recovered file: 512 MB\n");
331            fprintf(stderr,
332                    "\tto circumvent, recompile with MaybeUInt64 as an\n"
333                    "\tunsigned 64-bit int.\n");
334            break;
335         default:
336            fprintf(stderr,
337                    "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
338                    "configuration error.\n");
339            break;
340      }
341      exit(1);
342   }
343
344   if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
345      fprintf ( stderr,
346                "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
347                progName, (int)strlen(argv[1]) );
348      exit(1);
349   }
350
351   strcpy ( inFileName, argv[1] );
352
353   inFile = fopen ( inFileName, "rb" );
354   if (inFile == NULL) {
355      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
356      exit(1);
357   }
358
359   bsIn = bsOpenReadStream ( inFile );
360   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
361
362   bitsRead = 0;
363   buffHi = buffLo = 0;
364   currBlock = 0;
365   bStart[currBlock] = 0;
366
367   rbCtr = 0;
368
369   while (True) {
370      b = bsGetBit ( bsIn );
371      bitsRead++;
372      if (b == 2) {
373         if (bitsRead >= bStart[currBlock] &&
374            (bitsRead - bStart[currBlock]) >= 40) {
375            bEnd[currBlock] = bitsRead-1;
376            if (currBlock > 0)
377               fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
378                                 " to " MaybeUInt64_FMT " (incomplete)\n",
379                         currBlock,  bStart[currBlock], bEnd[currBlock] );
380         } else
381            currBlock--;
382         break;
383      }
384      buffHi = (buffHi << 1) | (buffLo >> 31);
385      buffLo = (buffLo << 1) | (b & 1);
386      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
387             && buffLo == BLOCK_HEADER_LO)
388           ||
389           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
390             && buffLo == BLOCK_ENDMARK_LO)
391         ) {
392         if (bitsRead > 49) {
393            bEnd[currBlock] = bitsRead-49;
394         } else {
395            bEnd[currBlock] = 0;
396         }
397         if (currBlock > 0 &&
398	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
399            fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
400                              " to " MaybeUInt64_FMT "\n",
401                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
402            rbStart[rbCtr] = bStart[currBlock];
403            rbEnd[rbCtr] = bEnd[currBlock];
404            rbCtr++;
405         }
406         if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
407            tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
408         currBlock++;
409
410         bStart[currBlock] = bitsRead;
411      }
412   }
413
414   bsClose ( bsIn );
415
416   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
417
418   if (rbCtr < 1) {
419      fprintf ( stderr,
420                "%s: sorry, I couldn't find any block boundaries.\n",
421                progName );
422      exit(1);
423   };
424
425   fprintf ( stderr, "%s: splitting into blocks\n", progName );
426
427   inFile = fopen ( inFileName, "rb" );
428   if (inFile == NULL) {
429      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
430      exit(1);
431   }
432   bsIn = bsOpenReadStream ( inFile );
433
434   /*-- placate gcc's dataflow analyser --*/
435   blockCRC = 0; bsWr = 0;
436
437   bitsRead = 0;
438   outFile = NULL;
439   wrBlock = 0;
440   while (True) {
441      b = bsGetBit(bsIn);
442      if (b == 2) break;
443      buffHi = (buffHi << 1) | (buffLo >> 31);
444      buffLo = (buffLo << 1) | (b & 1);
445      if (bitsRead == 47+rbStart[wrBlock])
446         blockCRC = (buffHi << 16) | (buffLo >> 16);
447
448      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
449                          && bitsRead <= rbEnd[wrBlock]) {
450         bsPutBit ( bsWr, b );
451      }
452
453      bitsRead++;
454
455      if (bitsRead == rbEnd[wrBlock]+1) {
456         if (outFile != NULL) {
457            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
458            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
459            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
460            bsPutUInt32 ( bsWr, blockCRC );
461            bsClose ( bsWr );
462         }
463         if (wrBlock >= rbCtr) break;
464         wrBlock++;
465      } else
466      if (bitsRead == rbStart[wrBlock]) {
467         /* Create the output file name, correctly handling leading paths.
468            (31.10.2001 by Sergey E. Kusikov) */
469         Char* split;
470         Int32 ofs, k;
471         for (k = 0; k < BZ_MAX_FILENAME; k++)
472            outFileName[k] = 0;
473         strcpy (outFileName, inFileName);
474         split = strrchr (outFileName, BZ_SPLIT_SYM);
475         if (split == NULL) {
476            split = outFileName;
477         } else {
478            ++split;
479	 }
480	 /* Now split points to the start of the basename. */
481         ofs  = split - outFileName;
482         sprintf (split, "rec%5d", wrBlock+1);
483         for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
484         strcat (outFileName, inFileName + ofs);
485
486         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
487
488         fprintf ( stderr, "   writing block %d to `%s' ...\n",
489                           wrBlock+1, outFileName );
490
491         outFile = fopen ( outFileName, "wb" );
492         if (outFile == NULL) {
493            fprintf ( stderr, "%s: can't write `%s'\n",
494                      progName, outFileName );
495            exit(1);
496         }
497         bsWr = bsOpenWriteStream ( outFile );
498         bsPutUChar ( bsWr, BZ_HDR_B );
499         bsPutUChar ( bsWr, BZ_HDR_Z );
500         bsPutUChar ( bsWr, BZ_HDR_h );
501         bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
502         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
503         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
504         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
505      }
506   }
507
508   fprintf ( stderr, "%s: finished\n", progName );
509   return 0;
510}
511
512
513
514/*-----------------------------------------------------------*/
515/*--- end                                  bzip2recover.c ---*/
516/*-----------------------------------------------------------*/
517