1// LzmaBench.cpp
2
3#include "StdAfx.h"
4
5#include "LzmaBench.h"
6
7#ifndef _WIN32
8#define USE_POSIX_TIME
9#define USE_POSIX_TIME2
10#endif
11
12#ifdef USE_POSIX_TIME
13#include <time.h>
14#ifdef USE_POSIX_TIME2
15#include <sys/time.h>
16#endif
17#endif
18
19#ifdef _WIN32
20#define USE_ALLOCA
21#endif
22
23#ifdef USE_ALLOCA
24#ifdef _WIN32
25#include <malloc.h>
26#else
27#include <stdlib.h>
28#endif
29#endif
30
31extern "C"
32{
33#include "../../../../C/Alloc.h"
34#include "../../../../C/7zCrc.h"
35}
36#include "../../../Common/MyCom.h"
37#include "../../ICoder.h"
38
39#ifdef BENCH_MT
40#include "../../../Windows/Thread.h"
41#include "../../../Windows/Synchronization.h"
42#endif
43
44#ifdef EXTERNAL_LZMA
45#include "../../../Windows/PropVariant.h"
46#else
47#include "../LZMA/LZMADecoder.h"
48#include "../LZMA/LZMAEncoder.h"
49#endif
50
51static const UInt32 kUncompressMinBlockSize = 1 << 26;
52static const UInt32 kAdditionalSize = (1 << 16);
53static const UInt32 kCompressedAdditionalSize = (1 << 10);
54static const UInt32 kMaxLzmaPropSize = 5;
55
56class CBaseRandomGenerator
57{
58  UInt32 A1;
59  UInt32 A2;
60public:
61  CBaseRandomGenerator() { Init(); }
62  void Init() { A1 = 362436069; A2 = 521288629;}
63  UInt32 GetRnd()
64  {
65    return
66      ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
67      ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
68  }
69};
70
71class CBenchBuffer
72{
73public:
74  size_t BufferSize;
75  Byte *Buffer;
76  CBenchBuffer(): Buffer(0) {}
77  virtual ~CBenchBuffer() { Free(); }
78  void Free()
79  {
80    ::MidFree(Buffer);
81    Buffer = 0;
82  }
83  bool Alloc(size_t bufferSize)
84  {
85    if (Buffer != 0 && BufferSize == bufferSize)
86      return true;
87    Free();
88    Buffer = (Byte *)::MidAlloc(bufferSize);
89    BufferSize = bufferSize;
90    return (Buffer != 0);
91  }
92};
93
94class CBenchRandomGenerator: public CBenchBuffer
95{
96  CBaseRandomGenerator *RG;
97public:
98  void Set(CBaseRandomGenerator *rg) { RG = rg; }
99  UInt32 GetVal(UInt32 &res, int numBits)
100  {
101    UInt32 val = res & (((UInt32)1 << numBits) - 1);
102    res >>= numBits;
103    return val;
104  }
105  UInt32 GetLen(UInt32 &res)
106  {
107    UInt32 len = GetVal(res, 2);
108    return GetVal(res, 1 + len);
109  }
110  void Generate()
111  {
112    UInt32 pos = 0;
113    UInt32 rep0 = 1;
114    while (pos < BufferSize)
115    {
116      UInt32 res = RG->GetRnd();
117      res >>= 1;
118      if (GetVal(res, 1) == 0 || pos < 1024)
119        Buffer[pos++] = (Byte)(res & 0xFF);
120      else
121      {
122        UInt32 len;
123        len = 1 + GetLen(res);
124        if (GetVal(res, 3) != 0)
125        {
126          len += GetLen(res);
127          do
128          {
129            UInt32 ppp = GetVal(res, 5) + 6;
130            res = RG->GetRnd();
131            if (ppp > 30)
132              continue;
133            rep0 = /* (1 << ppp) +*/  GetVal(res, ppp);
134            res = RG->GetRnd();
135          }
136          while (rep0 >= pos);
137          rep0++;
138        }
139
140        for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
141          Buffer[pos] = Buffer[pos - rep0];
142      }
143    }
144  }
145};
146
147
148class CBenchmarkInStream:
149  public ISequentialInStream,
150  public CMyUnknownImp
151{
152  const Byte *Data;
153  size_t Pos;
154  size_t Size;
155public:
156  MY_UNKNOWN_IMP
157  void Init(const Byte *data, size_t size)
158  {
159    Data = data;
160    Size = size;
161    Pos = 0;
162  }
163  STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
164};
165
166STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
167{
168  size_t remain = Size - Pos;
169  UInt32 kMaxBlockSize = (1 << 20);
170  if (size > kMaxBlockSize)
171    size = kMaxBlockSize;
172  if (size > remain)
173    size = (UInt32)remain;
174  for (UInt32 i = 0; i < size; i++)
175    ((Byte *)data)[i] = Data[Pos + i];
176  Pos += size;
177  if(processedSize != NULL)
178    *processedSize = size;
179  return S_OK;
180}
181
182class CBenchmarkOutStream:
183  public ISequentialOutStream,
184  public CBenchBuffer,
185  public CMyUnknownImp
186{
187  // bool _overflow;
188public:
189  UInt32 Pos;
190  // CBenchmarkOutStream(): _overflow(false) {}
191  void Init()
192  {
193    // _overflow = false;
194    Pos = 0;
195  }
196  MY_UNKNOWN_IMP
197  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
198};
199
200STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
201{
202  size_t curSize = BufferSize - Pos;
203  if (curSize > size)
204    curSize = size;
205  memcpy(Buffer + Pos, data, curSize);
206  Pos += (UInt32)curSize;
207  if(processedSize != NULL)
208    *processedSize = (UInt32)curSize;
209  if (curSize != size)
210  {
211    // _overflow = true;
212    return E_FAIL;
213  }
214  return S_OK;
215}
216
217class CCrcOutStream:
218  public ISequentialOutStream,
219  public CMyUnknownImp
220{
221public:
222  UInt32 Crc;
223  MY_UNKNOWN_IMP
224  void Init() { Crc = CRC_INIT_VAL; }
225  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
226};
227
228STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
229{
230  Crc = CrcUpdate(Crc, data, size);
231  if (processedSize != NULL)
232    *processedSize = size;
233  return S_OK;
234}
235
236static UInt64 GetTimeCount()
237{
238  #ifdef USE_POSIX_TIME
239  #ifdef USE_POSIX_TIME2
240  timeval v;
241  if (gettimeofday(&v, 0) == 0)
242    return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
243  return (UInt64)time(NULL) * 1000000;
244  #else
245  return time(NULL);
246  #endif
247  #else
248  /*
249  LARGE_INTEGER value;
250  if (::QueryPerformanceCounter(&value))
251    return value.QuadPart;
252  */
253  return GetTickCount();
254  #endif
255}
256
257static UInt64 GetFreq()
258{
259  #ifdef USE_POSIX_TIME
260  #ifdef USE_POSIX_TIME2
261  return 1000000;
262  #else
263  return 1;
264  #endif
265  #else
266  /*
267  LARGE_INTEGER value;
268  if (::QueryPerformanceFrequency(&value))
269    return value.QuadPart;
270  */
271  return 1000;
272  #endif
273}
274
275#ifndef USE_POSIX_TIME
276static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
277#endif
278static UInt64 GetUserTime()
279{
280  #ifdef USE_POSIX_TIME
281  return clock();
282  #else
283  FILETIME creationTime, exitTime, kernelTime, userTime;
284  if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0)
285    return GetTime64(userTime) + GetTime64(kernelTime);
286  return (UInt64)GetTickCount() * 10000;
287  #endif
288}
289
290static UInt64 GetUserFreq()
291{
292  #ifdef USE_POSIX_TIME
293  return CLOCKS_PER_SEC;
294  #else
295  return 10000000;
296  #endif
297}
298
299class CBenchProgressStatus
300{
301  #ifdef BENCH_MT
302  NWindows::NSynchronization::CCriticalSection CS;
303  #endif
304public:
305  HRESULT Res;
306  bool EncodeMode;
307  void SetResult(HRESULT res)
308  {
309    #ifdef BENCH_MT
310    NWindows::NSynchronization::CCriticalSectionLock lock(CS);
311    #endif
312    Res = res;
313  }
314  HRESULT GetResult()
315  {
316    #ifdef BENCH_MT
317    NWindows::NSynchronization::CCriticalSectionLock lock(CS);
318    #endif
319    return Res;
320  }
321};
322
323class CBenchProgressInfo:
324  public ICompressProgressInfo,
325  public CMyUnknownImp
326{
327public:
328  CBenchProgressStatus *Status;
329  CBenchInfo BenchInfo;
330  HRESULT Res;
331  IBenchCallback *callback;
332  CBenchProgressInfo(): callback(0) {}
333  MY_UNKNOWN_IMP
334  STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
335};
336
337void SetStartTime(CBenchInfo &bi)
338{
339  bi.GlobalFreq = GetFreq();
340  bi.UserFreq = GetUserFreq();
341  bi.GlobalTime = ::GetTimeCount();
342  bi.UserTime = ::GetUserTime();
343}
344
345void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
346{
347  dest.GlobalFreq = GetFreq();
348  dest.UserFreq = GetUserFreq();
349  dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
350  dest.UserTime = ::GetUserTime() - biStart.UserTime;
351}
352
353STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
354{
355  HRESULT res = Status->GetResult();
356  if (res != S_OK)
357    return res;
358  if (!callback)
359    return res;
360  CBenchInfo info = BenchInfo;
361  SetFinishTime(BenchInfo, info);
362  if (Status->EncodeMode)
363  {
364    info.UnpackSize = *inSize;
365    info.PackSize = *outSize;
366    res = callback->SetEncodeResult(info, false);
367  }
368  else
369  {
370    info.PackSize = BenchInfo.PackSize + *inSize;
371    info.UnpackSize = BenchInfo.UnpackSize + *outSize;
372    res = callback->SetDecodeResult(info, false);
373  }
374  if (res != S_OK)
375    Status->SetResult(res);
376  return res;
377}
378
379static const int kSubBits = 8;
380
381static UInt32 GetLogSize(UInt32 size)
382{
383  for (int i = kSubBits; i < 32; i++)
384    for (UInt32 j = 0; j < (1 << kSubBits); j++)
385      if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
386        return (i << kSubBits) + j;
387  return (32 << kSubBits);
388}
389
390static void NormalizeVals(UInt64 &v1, UInt64 &v2)
391{
392  while (v1 > 1000000)
393  {
394    v1 >>= 1;
395    v2 >>= 1;
396  }
397}
398
399UInt64 GetUsage(const CBenchInfo &info)
400{
401  UInt64 userTime = info.UserTime;
402  UInt64 userFreq = info.UserFreq;
403  UInt64 globalTime = info.GlobalTime;
404  UInt64 globalFreq = info.GlobalFreq;
405  NormalizeVals(userTime, userFreq);
406  NormalizeVals(globalFreq, globalTime);
407  if (userFreq == 0)
408    userFreq = 1;
409  if (globalTime == 0)
410    globalTime = 1;
411  return userTime * globalFreq * 1000000 / userFreq / globalTime;
412}
413
414UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
415{
416  UInt64 userTime = info.UserTime;
417  UInt64 userFreq = info.UserFreq;
418  UInt64 globalTime = info.GlobalTime;
419  UInt64 globalFreq = info.GlobalFreq;
420  NormalizeVals(userFreq, userTime);
421  NormalizeVals(globalTime, globalFreq);
422  if (globalFreq == 0)
423    globalFreq = 1;
424  if (userTime == 0)
425    userTime = 1;
426  return userFreq * globalTime / globalFreq *  rating / userTime;
427}
428
429static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
430{
431  UInt64 elTime = elapsedTime;
432  NormalizeVals(freq, elTime);
433  if (elTime == 0)
434    elTime = 1;
435  return value * freq / elTime;
436}
437
438UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
439{
440  UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
441  // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8
442  UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2
443
444  UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
445  return MyMultDiv64(numCommands, elapsedTime, freq);
446}
447
448UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
449{
450  // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8
451  UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2
452  return MyMultDiv64(numCommands, elapsedTime, freq);
453}
454
455#ifdef EXTERNAL_LZMA
456typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID,
457    const GUID *interfaceID, void **outObject);
458#endif
459
460struct CEncoderInfo;
461
462struct CEncoderInfo
463{
464  #ifdef BENCH_MT
465  NWindows::CThread thread[2];
466  #endif
467  CMyComPtr<ICompressCoder> encoder;
468  CBenchProgressInfo *progressInfoSpec[2];
469  CMyComPtr<ICompressProgressInfo> progressInfo[2];
470  UInt32 NumIterations;
471  #ifdef USE_ALLOCA
472  size_t AllocaSize;
473  #endif
474
475  struct CDecoderInfo
476  {
477    CEncoderInfo *Encoder;
478    UInt32 DecoderIndex;
479    #ifdef USE_ALLOCA
480    size_t AllocaSize;
481    #endif
482    bool CallbackMode;
483  };
484  CDecoderInfo decodersInfo[2];
485
486  CMyComPtr<ICompressCoder> decoders[2];
487  HRESULT Results[2];
488  CBenchmarkOutStream *outStreamSpec;
489  CMyComPtr<ISequentialOutStream> outStream;
490  IBenchCallback *callback;
491  UInt32 crc;
492  UInt32 kBufferSize;
493  UInt32 compressedSize;
494  CBenchRandomGenerator rg;
495  CBenchmarkOutStream *propStreamSpec;
496  CMyComPtr<ISequentialOutStream> propStream;
497  HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
498  HRESULT Encode();
499  HRESULT Decode(UInt32 decoderIndex);
500
501  CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
502
503  #ifdef BENCH_MT
504  static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
505  {
506    CEncoderInfo *encoder = (CEncoderInfo *)param;
507    #ifdef USE_ALLOCA
508    alloca(encoder->AllocaSize);
509    #endif
510    HRESULT res = encoder->Encode();
511    encoder->Results[0] = res;
512    if (res != S_OK)
513      encoder->progressInfoSpec[0]->Status->SetResult(res);
514
515    return 0;
516  }
517  static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
518  {
519    CDecoderInfo *decoder = (CDecoderInfo *)param;
520    #ifdef USE_ALLOCA
521    alloca(decoder->AllocaSize);
522    #endif
523    CEncoderInfo *encoder = decoder->Encoder;
524    encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
525    return 0;
526  }
527
528  HRESULT CreateEncoderThread()
529  {
530    return thread[0].Create(EncodeThreadFunction, this);
531  }
532
533  HRESULT CreateDecoderThread(int index, bool callbackMode
534      #ifdef USE_ALLOCA
535      , size_t allocaSize
536      #endif
537      )
538  {
539    CDecoderInfo &decoder = decodersInfo[index];
540    decoder.DecoderIndex = index;
541    decoder.Encoder = this;
542    #ifdef USE_ALLOCA
543    decoder.AllocaSize = allocaSize;
544    #endif
545    decoder.CallbackMode = callbackMode;
546    return thread[index].Create(DecodeThreadFunction, &decoder);
547  }
548  #endif
549};
550
551HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
552{
553  rg.Set(rgLoc);
554  kBufferSize = dictionarySize + kAdditionalSize;
555  UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
556  if (!rg.Alloc(kBufferSize))
557    return E_OUTOFMEMORY;
558  rg.Generate();
559  crc = CrcCalc(rg.Buffer, rg.BufferSize);
560
561  outStreamSpec = new CBenchmarkOutStream;
562  if (!outStreamSpec->Alloc(kCompressedBufferSize))
563    return E_OUTOFMEMORY;
564
565  outStream = outStreamSpec;
566
567  propStreamSpec = 0;
568  if (!propStream)
569  {
570    propStreamSpec = new CBenchmarkOutStream;
571    propStream = propStreamSpec;
572  }
573  if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
574    return E_OUTOFMEMORY;
575  propStreamSpec->Init();
576
577  PROPID propIDs[] =
578  {
579    NCoderPropID::kDictionarySize,
580    NCoderPropID::kMultiThread
581  };
582  const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
583  PROPVARIANT properties[kNumProps];
584  properties[0].vt = VT_UI4;
585  properties[0].ulVal = (UInt32)dictionarySize;
586
587  properties[1].vt = VT_BOOL;
588  properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE;
589
590  {
591    CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
592    RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
593    if (!setCoderProperties)
594      return E_FAIL;
595    RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps));
596
597    CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
598    encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
599    if (writeCoderProperties)
600    {
601      RINOK(writeCoderProperties->WriteCoderProperties(propStream));
602    }
603  }
604  return S_OK;
605}
606
607HRESULT CEncoderInfo::Encode()
608{
609  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
610  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
611  inStreamSpec->Init(rg.Buffer, rg.BufferSize);
612  outStreamSpec->Init();
613
614  RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
615  compressedSize = outStreamSpec->Pos;
616  encoder.Release();
617  return S_OK;
618}
619
620HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
621{
622  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
623  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
624  CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
625
626  CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
627  decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
628  if (!compressSetDecoderProperties)
629    return E_FAIL;
630
631  CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
632  CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
633
634  CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
635  pi->BenchInfo.UnpackSize = 0;
636  pi->BenchInfo.PackSize = 0;
637
638  for (UInt32 j = 0; j < NumIterations; j++)
639  {
640    inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
641    crcOutStreamSpec->Init();
642
643    RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
644    UInt64 outSize = kBufferSize;
645    RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
646    if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
647      return S_FALSE;
648    pi->BenchInfo.UnpackSize += kBufferSize;
649    pi->BenchInfo.PackSize += compressedSize;
650  }
651  decoder.Release();
652  return S_OK;
653}
654
655static const UInt32 kNumThreadsMax = (1 << 16);
656
657struct CBenchEncoders
658{
659  CEncoderInfo *encoders;
660  CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
661  ~CBenchEncoders() { delete []encoders; }
662};
663
664HRESULT LzmaBench(
665  #ifdef EXTERNAL_LZMA
666  CCodecs *codecs,
667  #endif
668  UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
669{
670  UInt32 numEncoderThreads =
671    #ifdef BENCH_MT
672    (numThreads > 1 ? numThreads / 2 : 1);
673    #else
674    1;
675    #endif
676  UInt32 numSubDecoderThreads =
677    #ifdef BENCH_MT
678    (numThreads > 1 ? 2 : 1);
679    #else
680    1;
681    #endif
682  if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
683  {
684    return E_INVALIDARG;
685  }
686
687  CBenchEncoders encodersSpec(numEncoderThreads);
688  CEncoderInfo *encoders = encodersSpec.encoders;
689
690  #ifdef EXTERNAL_LZMA
691  UString name = L"LZMA";
692  #endif
693
694  UInt32 i;
695  for (i = 0; i < numEncoderThreads; i++)
696  {
697    CEncoderInfo &encoder = encoders[i];
698    encoder.callback = (i == 0) ? callback : 0;
699
700    #ifdef EXTERNAL_LZMA
701    RINOK(codecs->CreateCoder(name, true, encoder.encoder));
702    #else
703    encoder.encoder = new NCompress::NLZMA::CEncoder;
704    #endif
705    for (UInt32 j = 0; j < numSubDecoderThreads; j++)
706    {
707      #ifdef EXTERNAL_LZMA
708      RINOK(codecs->CreateCoder(name, false, encoder.decoders[j]));
709      #else
710      encoder.decoders[j] = new NCompress::NLZMA::CDecoder;
711      #endif
712    }
713  }
714
715  CBaseRandomGenerator rg;
716  rg.Init();
717  for (i = 0; i < numEncoderThreads; i++)
718  {
719    RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
720  }
721
722  CBenchProgressStatus status;
723  status.Res = S_OK;
724  status.EncodeMode = true;
725
726  for (i = 0; i < numEncoderThreads; i++)
727  {
728    CEncoderInfo &encoder = encoders[i];
729    for (int j = 0; j < 2; j++)
730    {
731      encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
732      encoder.progressInfoSpec[j]->Status = &status;
733    }
734    if (i == 0)
735    {
736      encoder.progressInfoSpec[0]->callback = callback;
737      encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
738      SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
739    }
740
741    #ifdef BENCH_MT
742    if (numEncoderThreads > 1)
743    {
744      #ifdef USE_ALLOCA
745      encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
746      #endif
747      RINOK(encoder.CreateEncoderThread())
748    }
749    else
750    #endif
751    {
752      RINOK(encoder.Encode());
753    }
754  }
755  #ifdef BENCH_MT
756  if (numEncoderThreads > 1)
757    for (i = 0; i < numEncoderThreads; i++)
758      encoders[i].thread[0].Wait();
759  #endif
760
761  RINOK(status.Res);
762
763  CBenchInfo info;
764
765  SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
766  info.UnpackSize = 0;
767  info.PackSize = 0;
768  info.NumIterations = 1; // progressInfoSpec->NumIterations;
769  for (i = 0; i < numEncoderThreads; i++)
770  {
771    CEncoderInfo &encoder = encoders[i];
772    info.UnpackSize += encoder.kBufferSize;
773    info.PackSize += encoder.compressedSize;
774  }
775  RINOK(callback->SetEncodeResult(info, true));
776
777
778  status.Res = S_OK;
779  status.EncodeMode = false;
780
781  UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
782  for (i = 0; i < numEncoderThreads; i++)
783  {
784    CEncoderInfo &encoder = encoders[i];
785    encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
786
787    if (i == 0)
788    {
789      encoder.progressInfoSpec[0]->callback = callback;
790      encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
791      SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
792    }
793
794    #ifdef BENCH_MT
795    if (numDecoderThreads > 1)
796    {
797      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
798      {
799        size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF;
800        HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
801            #ifdef USE_ALLOCA
802            , allocaSize
803            #endif
804            );
805        RINOK(res);
806      }
807    }
808    else
809    #endif
810    {
811      RINOK(encoder.Decode(0));
812    }
813  }
814  #ifdef BENCH_MT
815  HRESULT res = S_OK;
816  if (numDecoderThreads > 1)
817    for (i = 0; i < numEncoderThreads; i++)
818      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
819      {
820        CEncoderInfo &encoder = encoders[i];
821        encoder.thread[j].Wait();
822        if (encoder.Results[j] != S_OK)
823          res = encoder.Results[j];
824      }
825  RINOK(res);
826  #endif
827  RINOK(status.Res);
828  SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
829  info.UnpackSize = 0;
830  info.PackSize = 0;
831  info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
832  for (i = 0; i < numEncoderThreads; i++)
833  {
834    CEncoderInfo &encoder = encoders[i];
835    info.UnpackSize += encoder.kBufferSize;
836    info.PackSize += encoder.compressedSize;
837  }
838  RINOK(callback->SetDecodeResult(info, false));
839  RINOK(callback->SetDecodeResult(info, true));
840  return S_OK;
841}
842
843
844inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
845{
846  UInt32 hs = dictionary - 1;
847  hs |= (hs >> 1);
848  hs |= (hs >> 2);
849  hs |= (hs >> 4);
850  hs |= (hs >> 8);
851  hs >>= 1;
852  hs |= 0xFFFF;
853  if (hs > (1 << 24))
854    hs >>= 1;
855  hs++;
856  return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
857      (1 << 20) + (multiThread ? (6 << 20) : 0);
858}
859
860UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
861{
862  const UInt32 kBufferSize = dictionary;
863  const UInt32 kCompressedBufferSize = (kBufferSize / 2);
864  UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
865  UInt32 numBigThreads = numThreads / numSubThreads;
866  return (kBufferSize + kCompressedBufferSize +
867    GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
868}
869
870static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
871{
872  for (UInt32 i = 0; i < numCycles; i++)
873    if (CrcCalc(data, size) != crcBase)
874      return false;
875  return true;
876}
877
878#ifdef BENCH_MT
879struct CCrcInfo
880{
881  NWindows::CThread Thread;
882  const Byte *Data;
883  UInt32 Size;
884  UInt32 NumCycles;
885  UInt32 Crc;
886  bool Res;
887  void Wait()
888  {
889    Thread.Wait();
890    Thread.Close();
891  }
892};
893
894static THREAD_FUNC_DECL CrcThreadFunction(void *param)
895{
896  CCrcInfo *p = (CCrcInfo *)param;
897  p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
898  return 0;
899}
900
901struct CCrcThreads
902{
903  UInt32 NumThreads;
904  CCrcInfo *Items;
905  CCrcThreads(): Items(0), NumThreads(0) {}
906  void WaitAll()
907  {
908    for (UInt32 i = 0; i < NumThreads; i++)
909      Items[i].Wait();
910    NumThreads = 0;
911  }
912  ~CCrcThreads()
913  {
914    WaitAll();
915    delete []Items;
916  }
917};
918#endif
919
920static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
921{
922  UInt32 crc = CRC_INIT_VAL;;
923  for (UInt32 i = 0; i < size; i++)
924    crc = CRC_UPDATE_BYTE(crc, buf[i]);
925  return CRC_GET_DIGEST(crc);
926}
927
928static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
929{
930  for (UInt32 i = 0; i < size; i++)
931    buf[i] = (Byte)RG.GetRnd();
932}
933
934static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
935{
936  RandGen(buf, size, RG);
937  return CrcCalc1(buf, size);
938}
939
940bool CrcInternalTest()
941{
942  CBenchBuffer buffer;
943  const UInt32 kBufferSize0 = (1 << 8);
944  const UInt32 kBufferSize1 = (1 << 10);
945  const UInt32 kCheckSize = (1 << 5);
946  if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
947    return false;
948  Byte *buf = buffer.Buffer;
949  UInt32 i;
950  for (i = 0; i < kBufferSize0; i++)
951    buf[i] = (Byte)i;
952  UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
953  if (crc1 != 0x29058C73)
954    return false;
955  CBaseRandomGenerator RG;
956  RandGen(buf + kBufferSize0, kBufferSize1, RG);
957  for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
958    for (UInt32 j = 0; j < kCheckSize; j++)
959      if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
960        return false;
961  return true;
962}
963
964HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
965{
966  if (numThreads == 0)
967    numThreads = 1;
968
969  CBenchBuffer buffer;
970  size_t totalSize = (size_t)bufferSize * numThreads;
971  if (totalSize / numThreads != bufferSize)
972    return E_OUTOFMEMORY;
973  if (!buffer.Alloc(totalSize))
974    return E_OUTOFMEMORY;
975
976  Byte *buf = buffer.Buffer;
977  CBaseRandomGenerator RG;
978  UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1;
979
980  UInt64 timeVal;
981  #ifdef BENCH_MT
982  CCrcThreads threads;
983  if (numThreads > 1)
984  {
985    threads.Items = new CCrcInfo[numThreads];
986    UInt32 i;
987    for (i = 0; i < numThreads; i++)
988    {
989      CCrcInfo &info = threads.Items[i];
990      Byte *data = buf + (size_t)bufferSize * i;
991      info.Data = data;
992      info.NumCycles = numCycles;
993      info.Size = bufferSize;
994      info.Crc = RandGenCrc(data, bufferSize, RG);
995    }
996    timeVal = GetTimeCount();
997    for (i = 0; i < numThreads; i++)
998    {
999      CCrcInfo &info = threads.Items[i];
1000      RINOK(info.Thread.Create(CrcThreadFunction, &info));
1001      threads.NumThreads++;
1002    }
1003    threads.WaitAll();
1004    for (i = 0; i < numThreads; i++)
1005      if (!threads.Items[i].Res)
1006        return S_FALSE;
1007  }
1008  else
1009  #endif
1010  {
1011    UInt32 crc = RandGenCrc(buf, bufferSize, RG);
1012    timeVal = GetTimeCount();
1013    if (!CrcBig(buf, bufferSize, numCycles, crc))
1014      return S_FALSE;
1015  }
1016  timeVal = GetTimeCount() - timeVal;
1017  if (timeVal == 0)
1018    timeVal = 1;
1019
1020  UInt64 size = (UInt64)numCycles * totalSize;
1021  speed = MyMultDiv64(size, timeVal, GetFreq());
1022  return S_OK;
1023}
1024
1025