1// LzmaBench.cpp 2 3#include "StdAfx.h" 4 5#include "LzmaBench.h" 6 7#ifndef _WIN32 8#define USE_POSIX_TIME 9#define USE_POSIX_TIME2 10#endif 11 12#ifdef USE_POSIX_TIME 13#include <time.h> 14#ifdef USE_POSIX_TIME2 15#include <sys/time.h> 16#endif 17#endif 18 19#ifdef _WIN32 20#define USE_ALLOCA 21#endif 22 23#ifdef USE_ALLOCA 24#ifdef _WIN32 25#include <malloc.h> 26#else 27#include <stdlib.h> 28#endif 29#endif 30 31extern "C" 32{ 33#include "../../../../C/Alloc.h" 34#include "../../../../C/7zCrc.h" 35} 36#include "../../../Common/MyCom.h" 37#include "../../ICoder.h" 38 39#ifdef BENCH_MT 40#include "../../../Windows/Thread.h" 41#include "../../../Windows/Synchronization.h" 42#endif 43 44#ifdef EXTERNAL_LZMA 45#include "../../../Windows/PropVariant.h" 46#else 47#include "../LZMA/LZMADecoder.h" 48#include "../LZMA/LZMAEncoder.h" 49#endif 50 51static const UInt32 kUncompressMinBlockSize = 1 << 26; 52static const UInt32 kAdditionalSize = (1 << 16); 53static const UInt32 kCompressedAdditionalSize = (1 << 10); 54static const UInt32 kMaxLzmaPropSize = 5; 55 56class CBaseRandomGenerator 57{ 58 UInt32 A1; 59 UInt32 A2; 60public: 61 CBaseRandomGenerator() { Init(); } 62 void Init() { A1 = 362436069; A2 = 521288629;} 63 UInt32 GetRnd() 64 { 65 return 66 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + 67 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ); 68 } 69}; 70 71class CBenchBuffer 72{ 73public: 74 size_t BufferSize; 75 Byte *Buffer; 76 CBenchBuffer(): Buffer(0) {} 77 virtual ~CBenchBuffer() { Free(); } 78 void Free() 79 { 80 ::MidFree(Buffer); 81 Buffer = 0; 82 } 83 bool Alloc(size_t bufferSize) 84 { 85 if (Buffer != 0 && BufferSize == bufferSize) 86 return true; 87 Free(); 88 Buffer = (Byte *)::MidAlloc(bufferSize); 89 BufferSize = bufferSize; 90 return (Buffer != 0); 91 } 92}; 93 94class CBenchRandomGenerator: public CBenchBuffer 95{ 96 CBaseRandomGenerator *RG; 97public: 98 void Set(CBaseRandomGenerator *rg) { RG = rg; } 99 UInt32 GetVal(UInt32 &res, int numBits) 100 { 101 UInt32 val = res & (((UInt32)1 << numBits) - 1); 102 res >>= numBits; 103 return val; 104 } 105 UInt32 GetLen(UInt32 &res) 106 { 107 UInt32 len = GetVal(res, 2); 108 return GetVal(res, 1 + len); 109 } 110 void Generate() 111 { 112 UInt32 pos = 0; 113 UInt32 rep0 = 1; 114 while (pos < BufferSize) 115 { 116 UInt32 res = RG->GetRnd(); 117 res >>= 1; 118 if (GetVal(res, 1) == 0 || pos < 1024) 119 Buffer[pos++] = (Byte)(res & 0xFF); 120 else 121 { 122 UInt32 len; 123 len = 1 + GetLen(res); 124 if (GetVal(res, 3) != 0) 125 { 126 len += GetLen(res); 127 do 128 { 129 UInt32 ppp = GetVal(res, 5) + 6; 130 res = RG->GetRnd(); 131 if (ppp > 30) 132 continue; 133 rep0 = /* (1 << ppp) +*/ GetVal(res, ppp); 134 res = RG->GetRnd(); 135 } 136 while (rep0 >= pos); 137 rep0++; 138 } 139 140 for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++) 141 Buffer[pos] = Buffer[pos - rep0]; 142 } 143 } 144 } 145}; 146 147 148class CBenchmarkInStream: 149 public ISequentialInStream, 150 public CMyUnknownImp 151{ 152 const Byte *Data; 153 size_t Pos; 154 size_t Size; 155public: 156 MY_UNKNOWN_IMP 157 void Init(const Byte *data, size_t size) 158 { 159 Data = data; 160 Size = size; 161 Pos = 0; 162 } 163 STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize); 164}; 165 166STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize) 167{ 168 size_t remain = Size - Pos; 169 UInt32 kMaxBlockSize = (1 << 20); 170 if (size > kMaxBlockSize) 171 size = kMaxBlockSize; 172 if (size > remain) 173 size = (UInt32)remain; 174 for (UInt32 i = 0; i < size; i++) 175 ((Byte *)data)[i] = Data[Pos + i]; 176 Pos += size; 177 if(processedSize != NULL) 178 *processedSize = size; 179 return S_OK; 180} 181 182class CBenchmarkOutStream: 183 public ISequentialOutStream, 184 public CBenchBuffer, 185 public CMyUnknownImp 186{ 187 // bool _overflow; 188public: 189 UInt32 Pos; 190 // CBenchmarkOutStream(): _overflow(false) {} 191 void Init() 192 { 193 // _overflow = false; 194 Pos = 0; 195 } 196 MY_UNKNOWN_IMP 197 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); 198}; 199 200STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) 201{ 202 size_t curSize = BufferSize - Pos; 203 if (curSize > size) 204 curSize = size; 205 memcpy(Buffer + Pos, data, curSize); 206 Pos += (UInt32)curSize; 207 if(processedSize != NULL) 208 *processedSize = (UInt32)curSize; 209 if (curSize != size) 210 { 211 // _overflow = true; 212 return E_FAIL; 213 } 214 return S_OK; 215} 216 217class CCrcOutStream: 218 public ISequentialOutStream, 219 public CMyUnknownImp 220{ 221public: 222 UInt32 Crc; 223 MY_UNKNOWN_IMP 224 void Init() { Crc = CRC_INIT_VAL; } 225 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); 226}; 227 228STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) 229{ 230 Crc = CrcUpdate(Crc, data, size); 231 if (processedSize != NULL) 232 *processedSize = size; 233 return S_OK; 234} 235 236static UInt64 GetTimeCount() 237{ 238 #ifdef USE_POSIX_TIME 239 #ifdef USE_POSIX_TIME2 240 timeval v; 241 if (gettimeofday(&v, 0) == 0) 242 return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec; 243 return (UInt64)time(NULL) * 1000000; 244 #else 245 return time(NULL); 246 #endif 247 #else 248 /* 249 LARGE_INTEGER value; 250 if (::QueryPerformanceCounter(&value)) 251 return value.QuadPart; 252 */ 253 return GetTickCount(); 254 #endif 255} 256 257static UInt64 GetFreq() 258{ 259 #ifdef USE_POSIX_TIME 260 #ifdef USE_POSIX_TIME2 261 return 1000000; 262 #else 263 return 1; 264 #endif 265 #else 266 /* 267 LARGE_INTEGER value; 268 if (::QueryPerformanceFrequency(&value)) 269 return value.QuadPart; 270 */ 271 return 1000; 272 #endif 273} 274 275#ifndef USE_POSIX_TIME 276static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } 277#endif 278static UInt64 GetUserTime() 279{ 280 #ifdef USE_POSIX_TIME 281 return clock(); 282 #else 283 FILETIME creationTime, exitTime, kernelTime, userTime; 284 if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0) 285 return GetTime64(userTime) + GetTime64(kernelTime); 286 return (UInt64)GetTickCount() * 10000; 287 #endif 288} 289 290static UInt64 GetUserFreq() 291{ 292 #ifdef USE_POSIX_TIME 293 return CLOCKS_PER_SEC; 294 #else 295 return 10000000; 296 #endif 297} 298 299class CBenchProgressStatus 300{ 301 #ifdef BENCH_MT 302 NWindows::NSynchronization::CCriticalSection CS; 303 #endif 304public: 305 HRESULT Res; 306 bool EncodeMode; 307 void SetResult(HRESULT res) 308 { 309 #ifdef BENCH_MT 310 NWindows::NSynchronization::CCriticalSectionLock lock(CS); 311 #endif 312 Res = res; 313 } 314 HRESULT GetResult() 315 { 316 #ifdef BENCH_MT 317 NWindows::NSynchronization::CCriticalSectionLock lock(CS); 318 #endif 319 return Res; 320 } 321}; 322 323class CBenchProgressInfo: 324 public ICompressProgressInfo, 325 public CMyUnknownImp 326{ 327public: 328 CBenchProgressStatus *Status; 329 CBenchInfo BenchInfo; 330 HRESULT Res; 331 IBenchCallback *callback; 332 CBenchProgressInfo(): callback(0) {} 333 MY_UNKNOWN_IMP 334 STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize); 335}; 336 337void SetStartTime(CBenchInfo &bi) 338{ 339 bi.GlobalFreq = GetFreq(); 340 bi.UserFreq = GetUserFreq(); 341 bi.GlobalTime = ::GetTimeCount(); 342 bi.UserTime = ::GetUserTime(); 343} 344 345void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest) 346{ 347 dest.GlobalFreq = GetFreq(); 348 dest.UserFreq = GetUserFreq(); 349 dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime; 350 dest.UserTime = ::GetUserTime() - biStart.UserTime; 351} 352 353STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize) 354{ 355 HRESULT res = Status->GetResult(); 356 if (res != S_OK) 357 return res; 358 if (!callback) 359 return res; 360 CBenchInfo info = BenchInfo; 361 SetFinishTime(BenchInfo, info); 362 if (Status->EncodeMode) 363 { 364 info.UnpackSize = *inSize; 365 info.PackSize = *outSize; 366 res = callback->SetEncodeResult(info, false); 367 } 368 else 369 { 370 info.PackSize = BenchInfo.PackSize + *inSize; 371 info.UnpackSize = BenchInfo.UnpackSize + *outSize; 372 res = callback->SetDecodeResult(info, false); 373 } 374 if (res != S_OK) 375 Status->SetResult(res); 376 return res; 377} 378 379static const int kSubBits = 8; 380 381static UInt32 GetLogSize(UInt32 size) 382{ 383 for (int i = kSubBits; i < 32; i++) 384 for (UInt32 j = 0; j < (1 << kSubBits); j++) 385 if (size <= (((UInt32)1) << i) + (j << (i - kSubBits))) 386 return (i << kSubBits) + j; 387 return (32 << kSubBits); 388} 389 390static void NormalizeVals(UInt64 &v1, UInt64 &v2) 391{ 392 while (v1 > 1000000) 393 { 394 v1 >>= 1; 395 v2 >>= 1; 396 } 397} 398 399UInt64 GetUsage(const CBenchInfo &info) 400{ 401 UInt64 userTime = info.UserTime; 402 UInt64 userFreq = info.UserFreq; 403 UInt64 globalTime = info.GlobalTime; 404 UInt64 globalFreq = info.GlobalFreq; 405 NormalizeVals(userTime, userFreq); 406 NormalizeVals(globalFreq, globalTime); 407 if (userFreq == 0) 408 userFreq = 1; 409 if (globalTime == 0) 410 globalTime = 1; 411 return userTime * globalFreq * 1000000 / userFreq / globalTime; 412} 413 414UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating) 415{ 416 UInt64 userTime = info.UserTime; 417 UInt64 userFreq = info.UserFreq; 418 UInt64 globalTime = info.GlobalTime; 419 UInt64 globalFreq = info.GlobalFreq; 420 NormalizeVals(userFreq, userTime); 421 NormalizeVals(globalTime, globalFreq); 422 if (globalFreq == 0) 423 globalFreq = 1; 424 if (userTime == 0) 425 userTime = 1; 426 return userFreq * globalTime / globalFreq * rating / userTime; 427} 428 429static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq) 430{ 431 UInt64 elTime = elapsedTime; 432 NormalizeVals(freq, elTime); 433 if (elTime == 0) 434 elTime = 1; 435 return value * freq / elTime; 436} 437 438UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size) 439{ 440 UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits); 441 // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8 442 UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2 443 444 UInt64 numCommands = (UInt64)(size) * numCommandsForOne; 445 return MyMultDiv64(numCommands, elapsedTime, freq); 446} 447 448UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations) 449{ 450 // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8 451 UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2 452 return MyMultDiv64(numCommands, elapsedTime, freq); 453} 454 455#ifdef EXTERNAL_LZMA 456typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID, 457 const GUID *interfaceID, void **outObject); 458#endif 459 460struct CEncoderInfo; 461 462struct CEncoderInfo 463{ 464 #ifdef BENCH_MT 465 NWindows::CThread thread[2]; 466 #endif 467 CMyComPtr<ICompressCoder> encoder; 468 CBenchProgressInfo *progressInfoSpec[2]; 469 CMyComPtr<ICompressProgressInfo> progressInfo[2]; 470 UInt32 NumIterations; 471 #ifdef USE_ALLOCA 472 size_t AllocaSize; 473 #endif 474 475 struct CDecoderInfo 476 { 477 CEncoderInfo *Encoder; 478 UInt32 DecoderIndex; 479 #ifdef USE_ALLOCA 480 size_t AllocaSize; 481 #endif 482 bool CallbackMode; 483 }; 484 CDecoderInfo decodersInfo[2]; 485 486 CMyComPtr<ICompressCoder> decoders[2]; 487 HRESULT Results[2]; 488 CBenchmarkOutStream *outStreamSpec; 489 CMyComPtr<ISequentialOutStream> outStream; 490 IBenchCallback *callback; 491 UInt32 crc; 492 UInt32 kBufferSize; 493 UInt32 compressedSize; 494 CBenchRandomGenerator rg; 495 CBenchmarkOutStream *propStreamSpec; 496 CMyComPtr<ISequentialOutStream> propStream; 497 HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg); 498 HRESULT Encode(); 499 HRESULT Decode(UInt32 decoderIndex); 500 501 CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {} 502 503 #ifdef BENCH_MT 504 static THREAD_FUNC_DECL EncodeThreadFunction(void *param) 505 { 506 CEncoderInfo *encoder = (CEncoderInfo *)param; 507 #ifdef USE_ALLOCA 508 alloca(encoder->AllocaSize); 509 #endif 510 HRESULT res = encoder->Encode(); 511 encoder->Results[0] = res; 512 if (res != S_OK) 513 encoder->progressInfoSpec[0]->Status->SetResult(res); 514 515 return 0; 516 } 517 static THREAD_FUNC_DECL DecodeThreadFunction(void *param) 518 { 519 CDecoderInfo *decoder = (CDecoderInfo *)param; 520 #ifdef USE_ALLOCA 521 alloca(decoder->AllocaSize); 522 #endif 523 CEncoderInfo *encoder = decoder->Encoder; 524 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); 525 return 0; 526 } 527 528 HRESULT CreateEncoderThread() 529 { 530 return thread[0].Create(EncodeThreadFunction, this); 531 } 532 533 HRESULT CreateDecoderThread(int index, bool callbackMode 534 #ifdef USE_ALLOCA 535 , size_t allocaSize 536 #endif 537 ) 538 { 539 CDecoderInfo &decoder = decodersInfo[index]; 540 decoder.DecoderIndex = index; 541 decoder.Encoder = this; 542 #ifdef USE_ALLOCA 543 decoder.AllocaSize = allocaSize; 544 #endif 545 decoder.CallbackMode = callbackMode; 546 return thread[index].Create(DecodeThreadFunction, &decoder); 547 } 548 #endif 549}; 550 551HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc) 552{ 553 rg.Set(rgLoc); 554 kBufferSize = dictionarySize + kAdditionalSize; 555 UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize; 556 if (!rg.Alloc(kBufferSize)) 557 return E_OUTOFMEMORY; 558 rg.Generate(); 559 crc = CrcCalc(rg.Buffer, rg.BufferSize); 560 561 outStreamSpec = new CBenchmarkOutStream; 562 if (!outStreamSpec->Alloc(kCompressedBufferSize)) 563 return E_OUTOFMEMORY; 564 565 outStream = outStreamSpec; 566 567 propStreamSpec = 0; 568 if (!propStream) 569 { 570 propStreamSpec = new CBenchmarkOutStream; 571 propStream = propStreamSpec; 572 } 573 if (!propStreamSpec->Alloc(kMaxLzmaPropSize)) 574 return E_OUTOFMEMORY; 575 propStreamSpec->Init(); 576 577 PROPID propIDs[] = 578 { 579 NCoderPropID::kDictionarySize, 580 NCoderPropID::kMultiThread 581 }; 582 const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]); 583 PROPVARIANT properties[kNumProps]; 584 properties[0].vt = VT_UI4; 585 properties[0].ulVal = (UInt32)dictionarySize; 586 587 properties[1].vt = VT_BOOL; 588 properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE; 589 590 { 591 CMyComPtr<ICompressSetCoderProperties> setCoderProperties; 592 RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties)); 593 if (!setCoderProperties) 594 return E_FAIL; 595 RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps)); 596 597 CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties; 598 encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties); 599 if (writeCoderProperties) 600 { 601 RINOK(writeCoderProperties->WriteCoderProperties(propStream)); 602 } 603 } 604 return S_OK; 605} 606 607HRESULT CEncoderInfo::Encode() 608{ 609 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; 610 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; 611 inStreamSpec->Init(rg.Buffer, rg.BufferSize); 612 outStreamSpec->Init(); 613 614 RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0])); 615 compressedSize = outStreamSpec->Pos; 616 encoder.Release(); 617 return S_OK; 618} 619 620HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) 621{ 622 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; 623 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; 624 CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex]; 625 626 CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties; 627 decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties); 628 if (!compressSetDecoderProperties) 629 return E_FAIL; 630 631 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; 632 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec; 633 634 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; 635 pi->BenchInfo.UnpackSize = 0; 636 pi->BenchInfo.PackSize = 0; 637 638 for (UInt32 j = 0; j < NumIterations; j++) 639 { 640 inStreamSpec->Init(outStreamSpec->Buffer, compressedSize); 641 crcOutStreamSpec->Init(); 642 643 RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos)); 644 UInt64 outSize = kBufferSize; 645 RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex])); 646 if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) 647 return S_FALSE; 648 pi->BenchInfo.UnpackSize += kBufferSize; 649 pi->BenchInfo.PackSize += compressedSize; 650 } 651 decoder.Release(); 652 return S_OK; 653} 654 655static const UInt32 kNumThreadsMax = (1 << 16); 656 657struct CBenchEncoders 658{ 659 CEncoderInfo *encoders; 660 CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; } 661 ~CBenchEncoders() { delete []encoders; } 662}; 663 664HRESULT LzmaBench( 665 #ifdef EXTERNAL_LZMA 666 CCodecs *codecs, 667 #endif 668 UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback) 669{ 670 UInt32 numEncoderThreads = 671 #ifdef BENCH_MT 672 (numThreads > 1 ? numThreads / 2 : 1); 673 #else 674 1; 675 #endif 676 UInt32 numSubDecoderThreads = 677 #ifdef BENCH_MT 678 (numThreads > 1 ? 2 : 1); 679 #else 680 1; 681 #endif 682 if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax) 683 { 684 return E_INVALIDARG; 685 } 686 687 CBenchEncoders encodersSpec(numEncoderThreads); 688 CEncoderInfo *encoders = encodersSpec.encoders; 689 690 #ifdef EXTERNAL_LZMA 691 UString name = L"LZMA"; 692 #endif 693 694 UInt32 i; 695 for (i = 0; i < numEncoderThreads; i++) 696 { 697 CEncoderInfo &encoder = encoders[i]; 698 encoder.callback = (i == 0) ? callback : 0; 699 700 #ifdef EXTERNAL_LZMA 701 RINOK(codecs->CreateCoder(name, true, encoder.encoder)); 702 #else 703 encoder.encoder = new NCompress::NLZMA::CEncoder; 704 #endif 705 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 706 { 707 #ifdef EXTERNAL_LZMA 708 RINOK(codecs->CreateCoder(name, false, encoder.decoders[j])); 709 #else 710 encoder.decoders[j] = new NCompress::NLZMA::CDecoder; 711 #endif 712 } 713 } 714 715 CBaseRandomGenerator rg; 716 rg.Init(); 717 for (i = 0; i < numEncoderThreads; i++) 718 { 719 RINOK(encoders[i].Init(dictionarySize, numThreads, &rg)); 720 } 721 722 CBenchProgressStatus status; 723 status.Res = S_OK; 724 status.EncodeMode = true; 725 726 for (i = 0; i < numEncoderThreads; i++) 727 { 728 CEncoderInfo &encoder = encoders[i]; 729 for (int j = 0; j < 2; j++) 730 { 731 encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo; 732 encoder.progressInfoSpec[j]->Status = &status; 733 } 734 if (i == 0) 735 { 736 encoder.progressInfoSpec[0]->callback = callback; 737 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads; 738 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); 739 } 740 741 #ifdef BENCH_MT 742 if (numEncoderThreads > 1) 743 { 744 #ifdef USE_ALLOCA 745 encoder.AllocaSize = (i * 16 * 21) & 0x7FF; 746 #endif 747 RINOK(encoder.CreateEncoderThread()) 748 } 749 else 750 #endif 751 { 752 RINOK(encoder.Encode()); 753 } 754 } 755 #ifdef BENCH_MT 756 if (numEncoderThreads > 1) 757 for (i = 0; i < numEncoderThreads; i++) 758 encoders[i].thread[0].Wait(); 759 #endif 760 761 RINOK(status.Res); 762 763 CBenchInfo info; 764 765 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); 766 info.UnpackSize = 0; 767 info.PackSize = 0; 768 info.NumIterations = 1; // progressInfoSpec->NumIterations; 769 for (i = 0; i < numEncoderThreads; i++) 770 { 771 CEncoderInfo &encoder = encoders[i]; 772 info.UnpackSize += encoder.kBufferSize; 773 info.PackSize += encoder.compressedSize; 774 } 775 RINOK(callback->SetEncodeResult(info, true)); 776 777 778 status.Res = S_OK; 779 status.EncodeMode = false; 780 781 UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; 782 for (i = 0; i < numEncoderThreads; i++) 783 { 784 CEncoderInfo &encoder = encoders[i]; 785 encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize; 786 787 if (i == 0) 788 { 789 encoder.progressInfoSpec[0]->callback = callback; 790 encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads; 791 SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); 792 } 793 794 #ifdef BENCH_MT 795 if (numDecoderThreads > 1) 796 { 797 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 798 { 799 size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF; 800 HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) 801 #ifdef USE_ALLOCA 802 , allocaSize 803 #endif 804 ); 805 RINOK(res); 806 } 807 } 808 else 809 #endif 810 { 811 RINOK(encoder.Decode(0)); 812 } 813 } 814 #ifdef BENCH_MT 815 HRESULT res = S_OK; 816 if (numDecoderThreads > 1) 817 for (i = 0; i < numEncoderThreads; i++) 818 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 819 { 820 CEncoderInfo &encoder = encoders[i]; 821 encoder.thread[j].Wait(); 822 if (encoder.Results[j] != S_OK) 823 res = encoder.Results[j]; 824 } 825 RINOK(res); 826 #endif 827 RINOK(status.Res); 828 SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); 829 info.UnpackSize = 0; 830 info.PackSize = 0; 831 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; 832 for (i = 0; i < numEncoderThreads; i++) 833 { 834 CEncoderInfo &encoder = encoders[i]; 835 info.UnpackSize += encoder.kBufferSize; 836 info.PackSize += encoder.compressedSize; 837 } 838 RINOK(callback->SetDecodeResult(info, false)); 839 RINOK(callback->SetDecodeResult(info, true)); 840 return S_OK; 841} 842 843 844inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary) 845{ 846 UInt32 hs = dictionary - 1; 847 hs |= (hs >> 1); 848 hs |= (hs >> 2); 849 hs |= (hs >> 4); 850 hs |= (hs >> 8); 851 hs >>= 1; 852 hs |= 0xFFFF; 853 if (hs > (1 << 24)) 854 hs >>= 1; 855 hs++; 856 return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + 857 (1 << 20) + (multiThread ? (6 << 20) : 0); 858} 859 860UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary) 861{ 862 const UInt32 kBufferSize = dictionary; 863 const UInt32 kCompressedBufferSize = (kBufferSize / 2); 864 UInt32 numSubThreads = (numThreads > 1) ? 2 : 1; 865 UInt32 numBigThreads = numThreads / numSubThreads; 866 return (kBufferSize + kCompressedBufferSize + 867 GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads; 868} 869 870static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase) 871{ 872 for (UInt32 i = 0; i < numCycles; i++) 873 if (CrcCalc(data, size) != crcBase) 874 return false; 875 return true; 876} 877 878#ifdef BENCH_MT 879struct CCrcInfo 880{ 881 NWindows::CThread Thread; 882 const Byte *Data; 883 UInt32 Size; 884 UInt32 NumCycles; 885 UInt32 Crc; 886 bool Res; 887 void Wait() 888 { 889 Thread.Wait(); 890 Thread.Close(); 891 } 892}; 893 894static THREAD_FUNC_DECL CrcThreadFunction(void *param) 895{ 896 CCrcInfo *p = (CCrcInfo *)param; 897 p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc); 898 return 0; 899} 900 901struct CCrcThreads 902{ 903 UInt32 NumThreads; 904 CCrcInfo *Items; 905 CCrcThreads(): Items(0), NumThreads(0) {} 906 void WaitAll() 907 { 908 for (UInt32 i = 0; i < NumThreads; i++) 909 Items[i].Wait(); 910 NumThreads = 0; 911 } 912 ~CCrcThreads() 913 { 914 WaitAll(); 915 delete []Items; 916 } 917}; 918#endif 919 920static UInt32 CrcCalc1(const Byte *buf, UInt32 size) 921{ 922 UInt32 crc = CRC_INIT_VAL;; 923 for (UInt32 i = 0; i < size; i++) 924 crc = CRC_UPDATE_BYTE(crc, buf[i]); 925 return CRC_GET_DIGEST(crc); 926} 927 928static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) 929{ 930 for (UInt32 i = 0; i < size; i++) 931 buf[i] = (Byte)RG.GetRnd(); 932} 933 934static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) 935{ 936 RandGen(buf, size, RG); 937 return CrcCalc1(buf, size); 938} 939 940bool CrcInternalTest() 941{ 942 CBenchBuffer buffer; 943 const UInt32 kBufferSize0 = (1 << 8); 944 const UInt32 kBufferSize1 = (1 << 10); 945 const UInt32 kCheckSize = (1 << 5); 946 if (!buffer.Alloc(kBufferSize0 + kBufferSize1)) 947 return false; 948 Byte *buf = buffer.Buffer; 949 UInt32 i; 950 for (i = 0; i < kBufferSize0; i++) 951 buf[i] = (Byte)i; 952 UInt32 crc1 = CrcCalc1(buf, kBufferSize0); 953 if (crc1 != 0x29058C73) 954 return false; 955 CBaseRandomGenerator RG; 956 RandGen(buf + kBufferSize0, kBufferSize1, RG); 957 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) 958 for (UInt32 j = 0; j < kCheckSize; j++) 959 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) 960 return false; 961 return true; 962} 963 964HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed) 965{ 966 if (numThreads == 0) 967 numThreads = 1; 968 969 CBenchBuffer buffer; 970 size_t totalSize = (size_t)bufferSize * numThreads; 971 if (totalSize / numThreads != bufferSize) 972 return E_OUTOFMEMORY; 973 if (!buffer.Alloc(totalSize)) 974 return E_OUTOFMEMORY; 975 976 Byte *buf = buffer.Buffer; 977 CBaseRandomGenerator RG; 978 UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1; 979 980 UInt64 timeVal; 981 #ifdef BENCH_MT 982 CCrcThreads threads; 983 if (numThreads > 1) 984 { 985 threads.Items = new CCrcInfo[numThreads]; 986 UInt32 i; 987 for (i = 0; i < numThreads; i++) 988 { 989 CCrcInfo &info = threads.Items[i]; 990 Byte *data = buf + (size_t)bufferSize * i; 991 info.Data = data; 992 info.NumCycles = numCycles; 993 info.Size = bufferSize; 994 info.Crc = RandGenCrc(data, bufferSize, RG); 995 } 996 timeVal = GetTimeCount(); 997 for (i = 0; i < numThreads; i++) 998 { 999 CCrcInfo &info = threads.Items[i]; 1000 RINOK(info.Thread.Create(CrcThreadFunction, &info)); 1001 threads.NumThreads++; 1002 } 1003 threads.WaitAll(); 1004 for (i = 0; i < numThreads; i++) 1005 if (!threads.Items[i].Res) 1006 return S_FALSE; 1007 } 1008 else 1009 #endif 1010 { 1011 UInt32 crc = RandGenCrc(buf, bufferSize, RG); 1012 timeVal = GetTimeCount(); 1013 if (!CrcBig(buf, bufferSize, numCycles, crc)) 1014 return S_FALSE; 1015 } 1016 timeVal = GetTimeCount() - timeVal; 1017 if (timeVal == 0) 1018 timeVal = 1; 1019 1020 UInt64 size = (UInt64)numCycles * totalSize; 1021 speed = MyMultDiv64(size, timeVal, GetFreq()); 1022 return S_OK; 1023} 1024 1025