1/*	$NetBSD: bzip2recover.c,v 1.5 2019/07/21 11:52:14 maya Exp $	*/
2
3/*-----------------------------------------------------------*/
4/*--- Block recoverer program for bzip2                   ---*/
5/*---                                      bzip2recover.c ---*/
6/*-----------------------------------------------------------*/
7
8/* ------------------------------------------------------------------
9   This file is part of bzip2/libbzip2, a program and library for
10   lossless, block-sorting data compression.
11
12   bzip2/libbzip2 version 1.0.8 of 13 July 2019
13   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
14
15   Please read the WARNING, DISCLAIMER and PATENTS sections in the
16   README file.
17
18   This program is released under the terms of the license contained
19   in the file LICENSE.
20   ------------------------------------------------------------------ */
21
22/* This program is a complete hack and should be rewritten properly.
23	 It isn't very complicated. */
24
25#include <inttypes.h>
26#include <stdio.h>
27#include <errno.h>
28#include <stdlib.h>
29#include <string.h>
30
31
32/* This program records bit locations in the file to be recovered.
33   That means that if 64-bit ints are not supported, we will not
34   be able to recover .bz2 files over 512MB (2^32 bits) long.
35   On GNU supported platforms, we take advantage of the 64-bit
36   int support to circumvent this problem.  Ditto MSVC.
37
38   This change occurred in version 1.0.2; all prior versions have
39   the 512MB limitation.
40*/
41#if 1
42   typedef uint64_t  MaybeUInt64;
43#  define MaybeUInt64_FMT "%" PRIu64
44#else
45#ifdef _MSC_VER
46   typedef  unsigned __int64  MaybeUInt64;
47#  define MaybeUInt64_FMT "%I64u"
48#else
49   typedef  unsigned int   MaybeUInt64;
50#  define MaybeUInt64_FMT "%u"
51#endif
52#endif
53
54typedef  unsigned int   UInt32;
55typedef  int            Int32;
56typedef  unsigned char  UChar;
57typedef  char           Char;
58typedef  unsigned char  Bool;
59#define True    ((Bool)1)
60#define False   ((Bool)0)
61
62
63#define BZ_MAX_FILENAME 2000
64
65Char inFileName[BZ_MAX_FILENAME];
66Char outFileName[BZ_MAX_FILENAME];
67Char progName[BZ_MAX_FILENAME];
68
69MaybeUInt64 bytesOut = 0;
70MaybeUInt64 bytesIn  = 0;
71
72/*---------------------------------------------------*/
73/*--- Bit stream I/O                              ---*/
74/*---------------------------------------------------*/
75
76typedef
77   struct {
78      FILE*  handle;
79      Int32  buffer;
80      Int32  buffLive;
81      Char   mode;
82   }
83   BitStream;
84
85static void readError ( void );
86static void writeError ( void );
87static void mallocFail ( Int32 n );
88static BitStream* bsOpenReadStream ( FILE* stream );
89static BitStream* bsOpenWriteStream ( FILE* stream );
90static void bsPutBit ( BitStream* bs, Int32 bit );
91static Int32 bsGetBit ( BitStream* bs );
92static void bsClose ( BitStream* bs );
93static void bsPutUChar ( BitStream* bs, UChar c );
94static void bsPutUInt32 ( BitStream* bs, UInt32 c );
95static Bool endsInBz2 ( Char* name );
96static void tooManyBlocks ( Int32 max_handled_blocks );
97
98
99/*---------------------------------------------------*/
100/*--- Header bytes                                ---*/
101/*---------------------------------------------------*/
102
103#define BZ_HDR_B 0x42                         /* 'B' */
104#define BZ_HDR_Z 0x5a                         /* 'Z' */
105#define BZ_HDR_h 0x68                         /* 'h' */
106#define BZ_HDR_0 0x30                         /* '0' */
107
108
109/*---------------------------------------------------*/
110/*--- I/O errors                                  ---*/
111/*---------------------------------------------------*/
112
113/*---------------------------------------------*/
114__dead static void readError ( void )
115{
116   fprintf ( stderr,
117             "%s: I/O error reading `%s', possible reason follows.\n",
118            progName, inFileName );
119   perror ( progName );
120   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
121             progName );
122   exit ( 1 );
123}
124
125
126/*---------------------------------------------*/
127__dead static void writeError ( void )
128{
129   fprintf ( stderr,
130             "%s: I/O error reading `%s', possible reason follows.\n",
131            progName, inFileName );
132   perror ( progName );
133   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
134             progName );
135   exit ( 1 );
136}
137
138
139/*---------------------------------------------*/
140__dead static void mallocFail ( Int32 n )
141{
142   fprintf ( stderr,
143             "%s: malloc failed on request for %d bytes.\n",
144            progName, n );
145   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
146             progName );
147   exit ( 1 );
148}
149
150
151/*---------------------------------------------*/
152__dead static void tooManyBlocks ( Int32 max_handled_blocks )
153{
154   fprintf ( stderr,
155             "%s: `%s' appears to contain more than %d blocks\n",
156            progName, inFileName, max_handled_blocks );
157   fprintf ( stderr,
158             "%s: and cannot be handled.  To fix, increase\n",
159             progName );
160   fprintf ( stderr,
161             "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
162             progName );
163   exit ( 1 );
164}
165
166
167
168/*---------------------------------------------*/
169static BitStream* bsOpenReadStream ( FILE* stream )
170{
171   BitStream *bs = malloc ( sizeof(BitStream) );
172   if (bs == NULL) mallocFail ( sizeof(BitStream) );
173   bs->handle = stream;
174   bs->buffer = 0;
175   bs->buffLive = 0;
176   bs->mode = 'r';
177   return bs;
178}
179
180
181/*---------------------------------------------*/
182static BitStream* bsOpenWriteStream ( FILE* stream )
183{
184   BitStream *bs = malloc ( sizeof(BitStream) );
185   if (bs == NULL) mallocFail ( sizeof(BitStream) );
186   bs->handle = stream;
187   bs->buffer = 0;
188   bs->buffLive = 0;
189   bs->mode = 'w';
190   return bs;
191}
192
193
194/*---------------------------------------------*/
195static void bsPutBit ( BitStream* bs, Int32 bit )
196{
197   if (bs->buffLive == 8) {
198      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
199      if (retVal == EOF) writeError();
200      bytesOut++;
201      bs->buffLive = 1;
202      bs->buffer = bit & 0x1;
203   } else {
204      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
205      bs->buffLive++;
206   };
207}
208
209
210/*---------------------------------------------*/
211/*--
212   Returns 0 or 1, or 2 to indicate EOF.
213--*/
214static Int32 bsGetBit ( BitStream* bs )
215{
216   if (bs->buffLive > 0) {
217      bs->buffLive --;
218      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
219   } else {
220      Int32 retVal = getc ( bs->handle );
221      if ( retVal == EOF ) {
222         if (errno != 0) readError();
223         return 2;
224      }
225      bs->buffLive = 7;
226      bs->buffer = retVal;
227      return ( ((bs->buffer) >> 7) & 0x1 );
228   }
229}
230
231
232/*---------------------------------------------*/
233static void bsClose ( BitStream* bs )
234{
235   Int32 retVal;
236
237   if ( bs->mode == 'w' ) {
238      while ( bs->buffLive < 8 ) {
239         bs->buffLive++;
240         bs->buffer <<= 1;
241      };
242      retVal = putc ( (UChar) (bs->buffer), bs->handle );
243      if (retVal == EOF) writeError();
244      bytesOut++;
245      retVal = fflush ( bs->handle );
246      if (retVal == EOF) writeError();
247   }
248   retVal = fclose ( bs->handle );
249   if (retVal == EOF) {
250      if (bs->mode == 'w') writeError(); else readError();
251   }
252   free ( bs );
253}
254
255
256/*---------------------------------------------*/
257static void bsPutUChar ( BitStream* bs, UChar c )
258{
259   Int32 i;
260   for (i = 7; i >= 0; i--)
261      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
262}
263
264
265/*---------------------------------------------*/
266static void bsPutUInt32 ( BitStream* bs, UInt32 c )
267{
268   Int32 i;
269
270   for (i = 31; i >= 0; i--)
271      bsPutBit ( bs, (c >> i) & 0x1 );
272}
273
274
275/*---------------------------------------------*/
276static Bool endsInBz2 ( Char* name )
277{
278   Int32 n = strlen ( name );
279   if (n <= 4) return False;
280   return
281      (name[n-4] == '.' &&
282       name[n-3] == 'b' &&
283       name[n-2] == 'z' &&
284       name[n-1] == '2');
285}
286
287
288/*---------------------------------------------------*/
289/*---                                             ---*/
290/*---------------------------------------------------*/
291
292/* This logic isn't really right when it comes to Cygwin. */
293#ifdef _WIN32
294#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
295#else
296#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
297#endif
298
299#define BLOCK_HEADER_HI  0x00003141UL
300#define BLOCK_HEADER_LO  0x59265359UL
301
302#define BLOCK_ENDMARK_HI 0x00001772UL
303#define BLOCK_ENDMARK_LO 0x45385090UL
304
305/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
306   would have an uncompressed size of at least 40GB, so the chances
307   are low you'll need to up this.
308*/
309#define BZ_MAX_HANDLED_BLOCKS 50000
310
311MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
313MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
315
316Int32 main ( Int32 argc, Char** argv )
317{
318   FILE*       inFile;
319   FILE*       outFile;
320   BitStream*  bsIn, *bsWr;
321   Int32       b, wrBlock, currBlock, rbCtr;
322   MaybeUInt64 bitsRead;
323
324   UInt32      buffHi, buffLo, blockCRC;
325   Char*       p;
326
327   strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
328   progName[BZ_MAX_FILENAME-1]='\0';
329   inFileName[0] = outFileName[0] = 0;
330
331   fprintf ( stderr,
332             "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
333
334   if (argc != 2) {
335      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
336                        progName, progName );
337      switch (sizeof(MaybeUInt64)) {
338         case 8:
339            fprintf(stderr,
340                    "\trestrictions on size of recovered file: None\n");
341            break;
342         case 4:
343            fprintf(stderr,
344                    "\trestrictions on size of recovered file: 512 MB\n");
345            fprintf(stderr,
346                    "\tto circumvent, recompile with MaybeUInt64 as an\n"
347                    "\tunsigned 64-bit int.\n");
348            break;
349         default:
350            fprintf(stderr,
351                    "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
352                    "configuration error.\n");
353            break;
354      }
355      exit(1);
356   }
357
358   if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
359      fprintf ( stderr,
360                "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
361                progName, (int)strlen(argv[1]) );
362      exit(1);
363   }
364
365   strcpy ( inFileName, argv[1] );
366
367   inFile = fopen ( inFileName, "rb" );
368   if (inFile == NULL) {
369      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
370      exit(1);
371   }
372
373   bsIn = bsOpenReadStream ( inFile );
374   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
375
376   bitsRead = 0;
377   buffHi = buffLo = 0;
378   currBlock = 0;
379   bStart[currBlock] = 0;
380
381   rbCtr = 0;
382
383   while (True) {
384      b = bsGetBit ( bsIn );
385      bitsRead++;
386      if (b == 2) {
387         if (bitsRead >= bStart[currBlock] &&
388            (bitsRead - bStart[currBlock]) >= 40) {
389            bEnd[currBlock] = bitsRead-1;
390            if (currBlock > 0)
391               fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
392                                 " to " MaybeUInt64_FMT " (incomplete)\n",
393                         currBlock,  bStart[currBlock], bEnd[currBlock] );
394         } else
395            currBlock--;
396         break;
397      }
398      buffHi = (buffHi << 1) | (buffLo >> 31);
399      buffLo = (buffLo << 1) | (b & 1);
400      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
401             && buffLo == BLOCK_HEADER_LO)
402           ||
403           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
404             && buffLo == BLOCK_ENDMARK_LO)
405         ) {
406         if (bitsRead > 49) {
407            bEnd[currBlock] = bitsRead-49;
408         } else {
409            bEnd[currBlock] = 0;
410         }
411         if (currBlock > 0 &&
412	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
413            fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
414                              " to " MaybeUInt64_FMT "\n",
415                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
416            rbStart[rbCtr] = bStart[currBlock];
417            rbEnd[rbCtr] = bEnd[currBlock];
418            rbCtr++;
419         }
420         if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
421            tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
422         currBlock++;
423
424         bStart[currBlock] = bitsRead;
425      }
426   }
427
428   bsClose ( bsIn );
429
430   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
431
432   if (rbCtr < 1) {
433      fprintf ( stderr,
434                "%s: sorry, I couldn't find any block boundaries.\n",
435                progName );
436      exit(1);
437   };
438
439   fprintf ( stderr, "%s: splitting into blocks\n", progName );
440
441   inFile = fopen ( inFileName, "rb" );
442   if (inFile == NULL) {
443      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
444      exit(1);
445   }
446   bsIn = bsOpenReadStream ( inFile );
447
448   /*-- placate gcc's dataflow analyser --*/
449   blockCRC = 0; bsWr = 0;
450
451   bitsRead = 0;
452   outFile = NULL;
453   wrBlock = 0;
454   while (True) {
455      b = bsGetBit(bsIn);
456      if (b == 2) break;
457      buffHi = (buffHi << 1) | (buffLo >> 31);
458      buffLo = (buffLo << 1) | (b & 1);
459      if (bitsRead == 47+rbStart[wrBlock])
460         blockCRC = (buffHi << 16) | (buffLo >> 16);
461
462      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
463                          && bitsRead <= rbEnd[wrBlock]) {
464         bsPutBit ( bsWr, b );
465      }
466
467      bitsRead++;
468
469      if (bitsRead == rbEnd[wrBlock]+1) {
470         if (outFile != NULL) {
471            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
472            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
473            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
474            bsPutUInt32 ( bsWr, blockCRC );
475            bsClose ( bsWr );
476            outFile = NULL;
477         }
478         if (wrBlock >= rbCtr) break;
479         wrBlock++;
480      } else
481      if (bitsRead == rbStart[wrBlock]) {
482         /* Create the output file name, correctly handling leading paths.
483            (31.10.2001 by Sergey E. Kusikov) */
484         Char* split;
485         Int32 ofs, k;
486         for (k = 0; k < BZ_MAX_FILENAME; k++)
487            outFileName[k] = 0;
488         strcpy (outFileName, inFileName);
489         split = strrchr (outFileName, BZ_SPLIT_SYM);
490         if (split == NULL) {
491            split = outFileName;
492         } else {
493            ++split;
494	 }
495	 /* Now split points to the start of the basename. */
496         ofs  = split - outFileName;
497         sprintf (split, "rec%5d", wrBlock+1);
498         for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
499         strcat (outFileName, inFileName + ofs);
500
501         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
502
503         fprintf ( stderr, "   writing block %d to `%s' ...\n",
504                           wrBlock+1, outFileName );
505
506         outFile = fopen ( outFileName, "wb" );
507         if (outFile == NULL) {
508            fprintf ( stderr, "%s: can't write `%s'\n",
509                      progName, outFileName );
510            exit(1);
511         }
512         bsWr = bsOpenWriteStream ( outFile );
513         bsPutUChar ( bsWr, BZ_HDR_B );
514         bsPutUChar ( bsWr, BZ_HDR_Z );
515         bsPutUChar ( bsWr, BZ_HDR_h );
516         bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
517         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
518         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
519         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
520      }
521   }
522
523   fprintf ( stderr, "%s: finished\n", progName );
524   return 0;
525}
526
527
528
529/*-----------------------------------------------------------*/
530/*--- end                                  bzip2recover.c ---*/
531/*-----------------------------------------------------------*/
532