1/* $NetBSD: bzip2recover.c,v 1.5 2019/07/21 11:52:14 maya Exp $ */ 2 3/*-----------------------------------------------------------*/ 4/*--- Block recoverer program for bzip2 ---*/ 5/*--- bzip2recover.c ---*/ 6/*-----------------------------------------------------------*/ 7 8/* ------------------------------------------------------------------ 9 This file is part of bzip2/libbzip2, a program and library for 10 lossless, block-sorting data compression. 11 12 bzip2/libbzip2 version 1.0.8 of 13 July 2019 13 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org> 14 15 Please read the WARNING, DISCLAIMER and PATENTS sections in the 16 README file. 17 18 This program is released under the terms of the license contained 19 in the file LICENSE. 20 ------------------------------------------------------------------ */ 21 22/* This program is a complete hack and should be rewritten properly. 23 It isn't very complicated. */ 24 25#include <inttypes.h> 26#include <stdio.h> 27#include <errno.h> 28#include <stdlib.h> 29#include <string.h> 30 31 32/* This program records bit locations in the file to be recovered. 33 That means that if 64-bit ints are not supported, we will not 34 be able to recover .bz2 files over 512MB (2^32 bits) long. 35 On GNU supported platforms, we take advantage of the 64-bit 36 int support to circumvent this problem. Ditto MSVC. 37 38 This change occurred in version 1.0.2; all prior versions have 39 the 512MB limitation. 40*/ 41#if 1 42 typedef uint64_t MaybeUInt64; 43# define MaybeUInt64_FMT "%" PRIu64 44#else 45#ifdef _MSC_VER 46 typedef unsigned __int64 MaybeUInt64; 47# define MaybeUInt64_FMT "%I64u" 48#else 49 typedef unsigned int MaybeUInt64; 50# define MaybeUInt64_FMT "%u" 51#endif 52#endif 53 54typedef unsigned int UInt32; 55typedef int Int32; 56typedef unsigned char UChar; 57typedef char Char; 58typedef unsigned char Bool; 59#define True ((Bool)1) 60#define False ((Bool)0) 61 62 63#define BZ_MAX_FILENAME 2000 64 65Char inFileName[BZ_MAX_FILENAME]; 66Char outFileName[BZ_MAX_FILENAME]; 67Char progName[BZ_MAX_FILENAME]; 68 69MaybeUInt64 bytesOut = 0; 70MaybeUInt64 bytesIn = 0; 71 72/*---------------------------------------------------*/ 73/*--- Bit stream I/O ---*/ 74/*---------------------------------------------------*/ 75 76typedef 77 struct { 78 FILE* handle; 79 Int32 buffer; 80 Int32 buffLive; 81 Char mode; 82 } 83 BitStream; 84 85static void readError ( void ); 86static void writeError ( void ); 87static void mallocFail ( Int32 n ); 88static BitStream* bsOpenReadStream ( FILE* stream ); 89static BitStream* bsOpenWriteStream ( FILE* stream ); 90static void bsPutBit ( BitStream* bs, Int32 bit ); 91static Int32 bsGetBit ( BitStream* bs ); 92static void bsClose ( BitStream* bs ); 93static void bsPutUChar ( BitStream* bs, UChar c ); 94static void bsPutUInt32 ( BitStream* bs, UInt32 c ); 95static Bool endsInBz2 ( Char* name ); 96static void tooManyBlocks ( Int32 max_handled_blocks ); 97 98 99/*---------------------------------------------------*/ 100/*--- Header bytes ---*/ 101/*---------------------------------------------------*/ 102 103#define BZ_HDR_B 0x42 /* 'B' */ 104#define BZ_HDR_Z 0x5a /* 'Z' */ 105#define BZ_HDR_h 0x68 /* 'h' */ 106#define BZ_HDR_0 0x30 /* '0' */ 107 108 109/*---------------------------------------------------*/ 110/*--- I/O errors ---*/ 111/*---------------------------------------------------*/ 112 113/*---------------------------------------------*/ 114__dead static void readError ( void ) 115{ 116 fprintf ( stderr, 117 "%s: I/O error reading `%s', possible reason follows.\n", 118 progName, inFileName ); 119 perror ( progName ); 120 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 121 progName ); 122 exit ( 1 ); 123} 124 125 126/*---------------------------------------------*/ 127__dead static void writeError ( void ) 128{ 129 fprintf ( stderr, 130 "%s: I/O error reading `%s', possible reason follows.\n", 131 progName, inFileName ); 132 perror ( progName ); 133 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 134 progName ); 135 exit ( 1 ); 136} 137 138 139/*---------------------------------------------*/ 140__dead static void mallocFail ( Int32 n ) 141{ 142 fprintf ( stderr, 143 "%s: malloc failed on request for %d bytes.\n", 144 progName, n ); 145 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 146 progName ); 147 exit ( 1 ); 148} 149 150 151/*---------------------------------------------*/ 152__dead static void tooManyBlocks ( Int32 max_handled_blocks ) 153{ 154 fprintf ( stderr, 155 "%s: `%s' appears to contain more than %d blocks\n", 156 progName, inFileName, max_handled_blocks ); 157 fprintf ( stderr, 158 "%s: and cannot be handled. To fix, increase\n", 159 progName ); 160 fprintf ( stderr, 161 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 162 progName ); 163 exit ( 1 ); 164} 165 166 167 168/*---------------------------------------------*/ 169static BitStream* bsOpenReadStream ( FILE* stream ) 170{ 171 BitStream *bs = malloc ( sizeof(BitStream) ); 172 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 173 bs->handle = stream; 174 bs->buffer = 0; 175 bs->buffLive = 0; 176 bs->mode = 'r'; 177 return bs; 178} 179 180 181/*---------------------------------------------*/ 182static BitStream* bsOpenWriteStream ( FILE* stream ) 183{ 184 BitStream *bs = malloc ( sizeof(BitStream) ); 185 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 186 bs->handle = stream; 187 bs->buffer = 0; 188 bs->buffLive = 0; 189 bs->mode = 'w'; 190 return bs; 191} 192 193 194/*---------------------------------------------*/ 195static void bsPutBit ( BitStream* bs, Int32 bit ) 196{ 197 if (bs->buffLive == 8) { 198 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 199 if (retVal == EOF) writeError(); 200 bytesOut++; 201 bs->buffLive = 1; 202 bs->buffer = bit & 0x1; 203 } else { 204 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 205 bs->buffLive++; 206 }; 207} 208 209 210/*---------------------------------------------*/ 211/*-- 212 Returns 0 or 1, or 2 to indicate EOF. 213--*/ 214static Int32 bsGetBit ( BitStream* bs ) 215{ 216 if (bs->buffLive > 0) { 217 bs->buffLive --; 218 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 219 } else { 220 Int32 retVal = getc ( bs->handle ); 221 if ( retVal == EOF ) { 222 if (errno != 0) readError(); 223 return 2; 224 } 225 bs->buffLive = 7; 226 bs->buffer = retVal; 227 return ( ((bs->buffer) >> 7) & 0x1 ); 228 } 229} 230 231 232/*---------------------------------------------*/ 233static void bsClose ( BitStream* bs ) 234{ 235 Int32 retVal; 236 237 if ( bs->mode == 'w' ) { 238 while ( bs->buffLive < 8 ) { 239 bs->buffLive++; 240 bs->buffer <<= 1; 241 }; 242 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 243 if (retVal == EOF) writeError(); 244 bytesOut++; 245 retVal = fflush ( bs->handle ); 246 if (retVal == EOF) writeError(); 247 } 248 retVal = fclose ( bs->handle ); 249 if (retVal == EOF) { 250 if (bs->mode == 'w') writeError(); else readError(); 251 } 252 free ( bs ); 253} 254 255 256/*---------------------------------------------*/ 257static void bsPutUChar ( BitStream* bs, UChar c ) 258{ 259 Int32 i; 260 for (i = 7; i >= 0; i--) 261 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 262} 263 264 265/*---------------------------------------------*/ 266static void bsPutUInt32 ( BitStream* bs, UInt32 c ) 267{ 268 Int32 i; 269 270 for (i = 31; i >= 0; i--) 271 bsPutBit ( bs, (c >> i) & 0x1 ); 272} 273 274 275/*---------------------------------------------*/ 276static Bool endsInBz2 ( Char* name ) 277{ 278 Int32 n = strlen ( name ); 279 if (n <= 4) return False; 280 return 281 (name[n-4] == '.' && 282 name[n-3] == 'b' && 283 name[n-2] == 'z' && 284 name[n-1] == '2'); 285} 286 287 288/*---------------------------------------------------*/ 289/*--- ---*/ 290/*---------------------------------------------------*/ 291 292/* This logic isn't really right when it comes to Cygwin. */ 293#ifdef _WIN32 294# define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 295#else 296# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 297#endif 298 299#define BLOCK_HEADER_HI 0x00003141UL 300#define BLOCK_HEADER_LO 0x59265359UL 301 302#define BLOCK_ENDMARK_HI 0x00001772UL 303#define BLOCK_ENDMARK_LO 0x45385090UL 304 305/* Increase if necessary. However, a .bz2 file with > 50000 blocks 306 would have an uncompressed size of at least 40GB, so the chances 307 are low you'll need to up this. 308*/ 309#define BZ_MAX_HANDLED_BLOCKS 50000 310 311MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 312MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 313MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 314MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 315 316Int32 main ( Int32 argc, Char** argv ) 317{ 318 FILE* inFile; 319 FILE* outFile; 320 BitStream* bsIn, *bsWr; 321 Int32 b, wrBlock, currBlock, rbCtr; 322 MaybeUInt64 bitsRead; 323 324 UInt32 buffHi, buffLo, blockCRC; 325 Char* p; 326 327 strncpy ( progName, argv[0], BZ_MAX_FILENAME-1); 328 progName[BZ_MAX_FILENAME-1]='\0'; 329 inFileName[0] = outFileName[0] = 0; 330 331 fprintf ( stderr, 332 "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" ); 333 334 if (argc != 2) { 335 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 336 progName, progName ); 337 switch (sizeof(MaybeUInt64)) { 338 case 8: 339 fprintf(stderr, 340 "\trestrictions on size of recovered file: None\n"); 341 break; 342 case 4: 343 fprintf(stderr, 344 "\trestrictions on size of recovered file: 512 MB\n"); 345 fprintf(stderr, 346 "\tto circumvent, recompile with MaybeUInt64 as an\n" 347 "\tunsigned 64-bit int.\n"); 348 break; 349 default: 350 fprintf(stderr, 351 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 352 "configuration error.\n"); 353 break; 354 } 355 exit(1); 356 } 357 358 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 359 fprintf ( stderr, 360 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 361 progName, (int)strlen(argv[1]) ); 362 exit(1); 363 } 364 365 strcpy ( inFileName, argv[1] ); 366 367 inFile = fopen ( inFileName, "rb" ); 368 if (inFile == NULL) { 369 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 370 exit(1); 371 } 372 373 bsIn = bsOpenReadStream ( inFile ); 374 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 375 376 bitsRead = 0; 377 buffHi = buffLo = 0; 378 currBlock = 0; 379 bStart[currBlock] = 0; 380 381 rbCtr = 0; 382 383 while (True) { 384 b = bsGetBit ( bsIn ); 385 bitsRead++; 386 if (b == 2) { 387 if (bitsRead >= bStart[currBlock] && 388 (bitsRead - bStart[currBlock]) >= 40) { 389 bEnd[currBlock] = bitsRead-1; 390 if (currBlock > 0) 391 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 392 " to " MaybeUInt64_FMT " (incomplete)\n", 393 currBlock, bStart[currBlock], bEnd[currBlock] ); 394 } else 395 currBlock--; 396 break; 397 } 398 buffHi = (buffHi << 1) | (buffLo >> 31); 399 buffLo = (buffLo << 1) | (b & 1); 400 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 401 && buffLo == BLOCK_HEADER_LO) 402 || 403 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 404 && buffLo == BLOCK_ENDMARK_LO) 405 ) { 406 if (bitsRead > 49) { 407 bEnd[currBlock] = bitsRead-49; 408 } else { 409 bEnd[currBlock] = 0; 410 } 411 if (currBlock > 0 && 412 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 413 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 414 " to " MaybeUInt64_FMT "\n", 415 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 416 rbStart[rbCtr] = bStart[currBlock]; 417 rbEnd[rbCtr] = bEnd[currBlock]; 418 rbCtr++; 419 } 420 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 421 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 422 currBlock++; 423 424 bStart[currBlock] = bitsRead; 425 } 426 } 427 428 bsClose ( bsIn ); 429 430 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 431 432 if (rbCtr < 1) { 433 fprintf ( stderr, 434 "%s: sorry, I couldn't find any block boundaries.\n", 435 progName ); 436 exit(1); 437 }; 438 439 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 440 441 inFile = fopen ( inFileName, "rb" ); 442 if (inFile == NULL) { 443 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 444 exit(1); 445 } 446 bsIn = bsOpenReadStream ( inFile ); 447 448 /*-- placate gcc's dataflow analyser --*/ 449 blockCRC = 0; bsWr = 0; 450 451 bitsRead = 0; 452 outFile = NULL; 453 wrBlock = 0; 454 while (True) { 455 b = bsGetBit(bsIn); 456 if (b == 2) break; 457 buffHi = (buffHi << 1) | (buffLo >> 31); 458 buffLo = (buffLo << 1) | (b & 1); 459 if (bitsRead == 47+rbStart[wrBlock]) 460 blockCRC = (buffHi << 16) | (buffLo >> 16); 461 462 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 463 && bitsRead <= rbEnd[wrBlock]) { 464 bsPutBit ( bsWr, b ); 465 } 466 467 bitsRead++; 468 469 if (bitsRead == rbEnd[wrBlock]+1) { 470 if (outFile != NULL) { 471 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 472 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 473 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 474 bsPutUInt32 ( bsWr, blockCRC ); 475 bsClose ( bsWr ); 476 outFile = NULL; 477 } 478 if (wrBlock >= rbCtr) break; 479 wrBlock++; 480 } else 481 if (bitsRead == rbStart[wrBlock]) { 482 /* Create the output file name, correctly handling leading paths. 483 (31.10.2001 by Sergey E. Kusikov) */ 484 Char* split; 485 Int32 ofs, k; 486 for (k = 0; k < BZ_MAX_FILENAME; k++) 487 outFileName[k] = 0; 488 strcpy (outFileName, inFileName); 489 split = strrchr (outFileName, BZ_SPLIT_SYM); 490 if (split == NULL) { 491 split = outFileName; 492 } else { 493 ++split; 494 } 495 /* Now split points to the start of the basename. */ 496 ofs = split - outFileName; 497 sprintf (split, "rec%5d", wrBlock+1); 498 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 499 strcat (outFileName, inFileName + ofs); 500 501 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 502 503 fprintf ( stderr, " writing block %d to `%s' ...\n", 504 wrBlock+1, outFileName ); 505 506 outFile = fopen ( outFileName, "wb" ); 507 if (outFile == NULL) { 508 fprintf ( stderr, "%s: can't write `%s'\n", 509 progName, outFileName ); 510 exit(1); 511 } 512 bsWr = bsOpenWriteStream ( outFile ); 513 bsPutUChar ( bsWr, BZ_HDR_B ); 514 bsPutUChar ( bsWr, BZ_HDR_Z ); 515 bsPutUChar ( bsWr, BZ_HDR_h ); 516 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 517 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 518 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 519 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 520 } 521 } 522 523 fprintf ( stderr, "%s: finished\n", progName ); 524 return 0; 525} 526 527 528 529/*-----------------------------------------------------------*/ 530/*--- end bzip2recover.c ---*/ 531/*-----------------------------------------------------------*/ 532