bzip2recover.c revision 1.1.1.2
1/* $NetBSD: bzip2recover.c,v 1.1.1.2 2012/05/07 00:41:46 wiz Exp $ */ 2 3/*-----------------------------------------------------------*/ 4/*--- Block recoverer program for bzip2 ---*/ 5/*--- bzip2recover.c ---*/ 6/*-----------------------------------------------------------*/ 7 8/* ------------------------------------------------------------------ 9 This file is part of bzip2/libbzip2, a program and library for 10 lossless, block-sorting data compression. 11 12 bzip2/libbzip2 version 1.0.6 of 6 September 2010 13 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 14 15 Please read the WARNING, DISCLAIMER and PATENTS sections in the 16 README file. 17 18 This program is released under the terms of the license contained 19 in the file LICENSE. 20 ------------------------------------------------------------------ */ 21 22/* This program is a complete hack and should be rewritten properly. 23 It isn't very complicated. */ 24 25#include <stdio.h> 26#include <errno.h> 27#include <stdlib.h> 28#include <string.h> 29 30 31/* This program records bit locations in the file to be recovered. 32 That means that if 64-bit ints are not supported, we will not 33 be able to recover .bz2 files over 512MB (2^32 bits) long. 34 On GNU supported platforms, we take advantage of the 64-bit 35 int support to circumvent this problem. Ditto MSVC. 36 37 This change occurred in version 1.0.2; all prior versions have 38 the 512MB limitation. 39*/ 40#ifdef __GNUC__ 41 typedef unsigned long long int MaybeUInt64; 42# define MaybeUInt64_FMT "%Lu" 43#else 44#ifdef _MSC_VER 45 typedef unsigned __int64 MaybeUInt64; 46# define MaybeUInt64_FMT "%I64u" 47#else 48 typedef unsigned int MaybeUInt64; 49# define MaybeUInt64_FMT "%u" 50#endif 51#endif 52 53typedef unsigned int UInt32; 54typedef int Int32; 55typedef unsigned char UChar; 56typedef char Char; 57typedef unsigned char Bool; 58#define True ((Bool)1) 59#define False ((Bool)0) 60 61 62#define BZ_MAX_FILENAME 2000 63 64Char inFileName[BZ_MAX_FILENAME]; 65Char outFileName[BZ_MAX_FILENAME]; 66Char progName[BZ_MAX_FILENAME]; 67 68MaybeUInt64 bytesOut = 0; 69MaybeUInt64 bytesIn = 0; 70 71 72/*---------------------------------------------------*/ 73/*--- Header bytes ---*/ 74/*---------------------------------------------------*/ 75 76#define BZ_HDR_B 0x42 /* 'B' */ 77#define BZ_HDR_Z 0x5a /* 'Z' */ 78#define BZ_HDR_h 0x68 /* 'h' */ 79#define BZ_HDR_0 0x30 /* '0' */ 80 81 82/*---------------------------------------------------*/ 83/*--- I/O errors ---*/ 84/*---------------------------------------------------*/ 85 86/*---------------------------------------------*/ 87static void readError ( void ) 88{ 89 fprintf ( stderr, 90 "%s: I/O error reading `%s', possible reason follows.\n", 91 progName, inFileName ); 92 perror ( progName ); 93 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 94 progName ); 95 exit ( 1 ); 96} 97 98 99/*---------------------------------------------*/ 100static void writeError ( void ) 101{ 102 fprintf ( stderr, 103 "%s: I/O error reading `%s', possible reason follows.\n", 104 progName, inFileName ); 105 perror ( progName ); 106 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 107 progName ); 108 exit ( 1 ); 109} 110 111 112/*---------------------------------------------*/ 113static void mallocFail ( Int32 n ) 114{ 115 fprintf ( stderr, 116 "%s: malloc failed on request for %d bytes.\n", 117 progName, n ); 118 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 119 progName ); 120 exit ( 1 ); 121} 122 123 124/*---------------------------------------------*/ 125static void tooManyBlocks ( Int32 max_handled_blocks ) 126{ 127 fprintf ( stderr, 128 "%s: `%s' appears to contain more than %d blocks\n", 129 progName, inFileName, max_handled_blocks ); 130 fprintf ( stderr, 131 "%s: and cannot be handled. To fix, increase\n", 132 progName ); 133 fprintf ( stderr, 134 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 135 progName ); 136 exit ( 1 ); 137} 138 139 140 141/*---------------------------------------------------*/ 142/*--- Bit stream I/O ---*/ 143/*---------------------------------------------------*/ 144 145typedef 146 struct { 147 FILE* handle; 148 Int32 buffer; 149 Int32 buffLive; 150 Char mode; 151 } 152 BitStream; 153 154 155/*---------------------------------------------*/ 156static BitStream* bsOpenReadStream ( FILE* stream ) 157{ 158 BitStream *bs = malloc ( sizeof(BitStream) ); 159 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 160 bs->handle = stream; 161 bs->buffer = 0; 162 bs->buffLive = 0; 163 bs->mode = 'r'; 164 return bs; 165} 166 167 168/*---------------------------------------------*/ 169static BitStream* bsOpenWriteStream ( FILE* stream ) 170{ 171 BitStream *bs = malloc ( sizeof(BitStream) ); 172 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 173 bs->handle = stream; 174 bs->buffer = 0; 175 bs->buffLive = 0; 176 bs->mode = 'w'; 177 return bs; 178} 179 180 181/*---------------------------------------------*/ 182static void bsPutBit ( BitStream* bs, Int32 bit ) 183{ 184 if (bs->buffLive == 8) { 185 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 186 if (retVal == EOF) writeError(); 187 bytesOut++; 188 bs->buffLive = 1; 189 bs->buffer = bit & 0x1; 190 } else { 191 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 192 bs->buffLive++; 193 }; 194} 195 196 197/*---------------------------------------------*/ 198/*-- 199 Returns 0 or 1, or 2 to indicate EOF. 200--*/ 201static Int32 bsGetBit ( BitStream* bs ) 202{ 203 if (bs->buffLive > 0) { 204 bs->buffLive --; 205 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 206 } else { 207 Int32 retVal = getc ( bs->handle ); 208 if ( retVal == EOF ) { 209 if (errno != 0) readError(); 210 return 2; 211 } 212 bs->buffLive = 7; 213 bs->buffer = retVal; 214 return ( ((bs->buffer) >> 7) & 0x1 ); 215 } 216} 217 218 219/*---------------------------------------------*/ 220static void bsClose ( BitStream* bs ) 221{ 222 Int32 retVal; 223 224 if ( bs->mode == 'w' ) { 225 while ( bs->buffLive < 8 ) { 226 bs->buffLive++; 227 bs->buffer <<= 1; 228 }; 229 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 230 if (retVal == EOF) writeError(); 231 bytesOut++; 232 retVal = fflush ( bs->handle ); 233 if (retVal == EOF) writeError(); 234 } 235 retVal = fclose ( bs->handle ); 236 if (retVal == EOF) { 237 if (bs->mode == 'w') writeError(); else readError(); 238 } 239 free ( bs ); 240} 241 242 243/*---------------------------------------------*/ 244static void bsPutUChar ( BitStream* bs, UChar c ) 245{ 246 Int32 i; 247 for (i = 7; i >= 0; i--) 248 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 249} 250 251 252/*---------------------------------------------*/ 253static void bsPutUInt32 ( BitStream* bs, UInt32 c ) 254{ 255 Int32 i; 256 257 for (i = 31; i >= 0; i--) 258 bsPutBit ( bs, (c >> i) & 0x1 ); 259} 260 261 262/*---------------------------------------------*/ 263static Bool endsInBz2 ( Char* name ) 264{ 265 Int32 n = strlen ( name ); 266 if (n <= 4) return False; 267 return 268 (name[n-4] == '.' && 269 name[n-3] == 'b' && 270 name[n-2] == 'z' && 271 name[n-1] == '2'); 272} 273 274 275/*---------------------------------------------------*/ 276/*--- ---*/ 277/*---------------------------------------------------*/ 278 279/* This logic isn't really right when it comes to Cygwin. */ 280#ifdef _WIN32 281# define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 282#else 283# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 284#endif 285 286#define BLOCK_HEADER_HI 0x00003141UL 287#define BLOCK_HEADER_LO 0x59265359UL 288 289#define BLOCK_ENDMARK_HI 0x00001772UL 290#define BLOCK_ENDMARK_LO 0x45385090UL 291 292/* Increase if necessary. However, a .bz2 file with > 50000 blocks 293 would have an uncompressed size of at least 40GB, so the chances 294 are low you'll need to up this. 295*/ 296#define BZ_MAX_HANDLED_BLOCKS 50000 297 298MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 299MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 300MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 301MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 302 303Int32 main ( Int32 argc, Char** argv ) 304{ 305 FILE* inFile; 306 FILE* outFile; 307 BitStream* bsIn, *bsWr; 308 Int32 b, wrBlock, currBlock, rbCtr; 309 MaybeUInt64 bitsRead; 310 311 UInt32 buffHi, buffLo, blockCRC; 312 Char* p; 313 314 strcpy ( progName, argv[0] ); 315 inFileName[0] = outFileName[0] = 0; 316 317 fprintf ( stderr, 318 "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" ); 319 320 if (argc != 2) { 321 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 322 progName, progName ); 323 switch (sizeof(MaybeUInt64)) { 324 case 8: 325 fprintf(stderr, 326 "\trestrictions on size of recovered file: None\n"); 327 break; 328 case 4: 329 fprintf(stderr, 330 "\trestrictions on size of recovered file: 512 MB\n"); 331 fprintf(stderr, 332 "\tto circumvent, recompile with MaybeUInt64 as an\n" 333 "\tunsigned 64-bit int.\n"); 334 break; 335 default: 336 fprintf(stderr, 337 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 338 "configuration error.\n"); 339 break; 340 } 341 exit(1); 342 } 343 344 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 345 fprintf ( stderr, 346 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 347 progName, (int)strlen(argv[1]) ); 348 exit(1); 349 } 350 351 strcpy ( inFileName, argv[1] ); 352 353 inFile = fopen ( inFileName, "rb" ); 354 if (inFile == NULL) { 355 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 356 exit(1); 357 } 358 359 bsIn = bsOpenReadStream ( inFile ); 360 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 361 362 bitsRead = 0; 363 buffHi = buffLo = 0; 364 currBlock = 0; 365 bStart[currBlock] = 0; 366 367 rbCtr = 0; 368 369 while (True) { 370 b = bsGetBit ( bsIn ); 371 bitsRead++; 372 if (b == 2) { 373 if (bitsRead >= bStart[currBlock] && 374 (bitsRead - bStart[currBlock]) >= 40) { 375 bEnd[currBlock] = bitsRead-1; 376 if (currBlock > 0) 377 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 378 " to " MaybeUInt64_FMT " (incomplete)\n", 379 currBlock, bStart[currBlock], bEnd[currBlock] ); 380 } else 381 currBlock--; 382 break; 383 } 384 buffHi = (buffHi << 1) | (buffLo >> 31); 385 buffLo = (buffLo << 1) | (b & 1); 386 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 387 && buffLo == BLOCK_HEADER_LO) 388 || 389 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 390 && buffLo == BLOCK_ENDMARK_LO) 391 ) { 392 if (bitsRead > 49) { 393 bEnd[currBlock] = bitsRead-49; 394 } else { 395 bEnd[currBlock] = 0; 396 } 397 if (currBlock > 0 && 398 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 399 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 400 " to " MaybeUInt64_FMT "\n", 401 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 402 rbStart[rbCtr] = bStart[currBlock]; 403 rbEnd[rbCtr] = bEnd[currBlock]; 404 rbCtr++; 405 } 406 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 407 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 408 currBlock++; 409 410 bStart[currBlock] = bitsRead; 411 } 412 } 413 414 bsClose ( bsIn ); 415 416 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 417 418 if (rbCtr < 1) { 419 fprintf ( stderr, 420 "%s: sorry, I couldn't find any block boundaries.\n", 421 progName ); 422 exit(1); 423 }; 424 425 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 426 427 inFile = fopen ( inFileName, "rb" ); 428 if (inFile == NULL) { 429 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 430 exit(1); 431 } 432 bsIn = bsOpenReadStream ( inFile ); 433 434 /*-- placate gcc's dataflow analyser --*/ 435 blockCRC = 0; bsWr = 0; 436 437 bitsRead = 0; 438 outFile = NULL; 439 wrBlock = 0; 440 while (True) { 441 b = bsGetBit(bsIn); 442 if (b == 2) break; 443 buffHi = (buffHi << 1) | (buffLo >> 31); 444 buffLo = (buffLo << 1) | (b & 1); 445 if (bitsRead == 47+rbStart[wrBlock]) 446 blockCRC = (buffHi << 16) | (buffLo >> 16); 447 448 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 449 && bitsRead <= rbEnd[wrBlock]) { 450 bsPutBit ( bsWr, b ); 451 } 452 453 bitsRead++; 454 455 if (bitsRead == rbEnd[wrBlock]+1) { 456 if (outFile != NULL) { 457 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 458 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 459 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 460 bsPutUInt32 ( bsWr, blockCRC ); 461 bsClose ( bsWr ); 462 } 463 if (wrBlock >= rbCtr) break; 464 wrBlock++; 465 } else 466 if (bitsRead == rbStart[wrBlock]) { 467 /* Create the output file name, correctly handling leading paths. 468 (31.10.2001 by Sergey E. Kusikov) */ 469 Char* split; 470 Int32 ofs, k; 471 for (k = 0; k < BZ_MAX_FILENAME; k++) 472 outFileName[k] = 0; 473 strcpy (outFileName, inFileName); 474 split = strrchr (outFileName, BZ_SPLIT_SYM); 475 if (split == NULL) { 476 split = outFileName; 477 } else { 478 ++split; 479 } 480 /* Now split points to the start of the basename. */ 481 ofs = split - outFileName; 482 sprintf (split, "rec%5d", wrBlock+1); 483 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 484 strcat (outFileName, inFileName + ofs); 485 486 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 487 488 fprintf ( stderr, " writing block %d to `%s' ...\n", 489 wrBlock+1, outFileName ); 490 491 outFile = fopen ( outFileName, "wb" ); 492 if (outFile == NULL) { 493 fprintf ( stderr, "%s: can't write `%s'\n", 494 progName, outFileName ); 495 exit(1); 496 } 497 bsWr = bsOpenWriteStream ( outFile ); 498 bsPutUChar ( bsWr, BZ_HDR_B ); 499 bsPutUChar ( bsWr, BZ_HDR_Z ); 500 bsPutUChar ( bsWr, BZ_HDR_h ); 501 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 502 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 503 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 504 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 505 } 506 } 507 508 fprintf ( stderr, "%s: finished\n", progName ); 509 return 0; 510} 511 512 513 514/*-----------------------------------------------------------*/ 515/*--- end bzip2recover.c ---*/ 516/*-----------------------------------------------------------*/ 517