1/************************************************************************************
2Includes
3************************************************************************************/
4#include "All.h"
5#include "BitArray.h"
6
7/************************************************************************************
8Declares
9************************************************************************************/
10#define BIT_ARRAY_ELEMENTS            (4096)                        // the number of elements in the bit array (4 MB)
11#define BIT_ARRAY_BYTES                (BIT_ARRAY_ELEMENTS * 4)    // the number of bytes in the bit array
12#define BIT_ARRAY_BITS                (BIT_ARRAY_BYTES    * 8)    // the number of bits in the bit array
13
14#define MAX_ELEMENT_BITS            128
15#define REFILL_BIT_THRESHOLD        (BIT_ARRAY_BITS - MAX_ELEMENT_BITS)
16
17#define CODE_BITS 32
18#define TOP_VALUE ((unsigned int) 1 << (CODE_BITS - 1))
19#define SHIFT_BITS (CODE_BITS - 9)
20#define EXTRA_BITS ((CODE_BITS - 2) % 8 + 1)
21#define BOTTOM_VALUE (TOP_VALUE >> 8)
22
23/************************************************************************************
24Lookup tables
25************************************************************************************/
26const uint32 K_SUM_MIN_BOUNDARY[32] = {0,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,1073741824,2147483648UL,0,0,0,0};
27
28#define MODEL_ELEMENTS                    64
29#define RANGE_OVERFLOW_TOTAL_WIDTH        65536
30#define RANGE_OVERFLOW_SHIFT            16
31
32const uint32 RANGE_TOTAL[64] = {0,19578,36160,48417,56323,60899,63265,64435,64971,65232,65351,65416,65447,65466,65476,65482,65485,65488,65490,65491,65492,65493,65494,65495,65496,65497,65498,65499,65500,65501,65502,65503,65504,65505,65506,65507,65508,65509,65510,65511,65512,65513,65514,65515,65516,65517,65518,65519,65520,65521,65522,65523,65524,65525,65526,65527,65528,65529,65530,65531,65532,65533,65534,65535,};
33const uint32 RANGE_WIDTH[64] = {19578,16582,12257,7906,4576,2366,1170,536,261,119,65,31,19,10,6,3,3,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
34
35#ifdef BUILD_RANGE_TABLE
36    int g_aryOverflows[256] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
37    int g_nTotalOverflow = 0;
38#endif
39
40/************************************************************************************
41Constructor
42************************************************************************************/
43CBitArray::CBitArray(CIO *pIO)
44{
45    // allocate memory for the bit array
46    m_pBitArray = new uint32 [BIT_ARRAY_ELEMENTS];
47    memset(m_pBitArray, 0, BIT_ARRAY_BYTES);
48
49    // initialize other variables
50    m_nCurrentBitIndex = 0;
51    m_pIO = pIO;
52}
53
54/************************************************************************************
55Destructor
56************************************************************************************/
57CBitArray::~CBitArray()
58{
59    // free the bit array
60    SAFE_ARRAY_DELETE(m_pBitArray)
61#ifdef BUILD_RANGE_TABLE
62    OutputRangeTable();
63#endif
64}
65
66/************************************************************************************
67Output the bit array via the CIO (typically saves to disk)
68************************************************************************************/
69int CBitArray::OutputBitArray(BOOL bFinalize)
70{
71    // write the entire file to disk
72    unsigned int nBytesWritten = 0;
73    unsigned int nBytesToWrite = 0;
74//    unsigned int nRetVal = 0;
75
76    if (bFinalize)
77    {
78        nBytesToWrite = ((m_nCurrentBitIndex >> 5) * 4) + 4;
79
80        m_MD5.AddData(m_pBitArray, nBytesToWrite);
81
82        RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
83
84        // reset the bit pointer
85        m_nCurrentBitIndex = 0;
86    }
87    else
88    {
89        nBytesToWrite = (m_nCurrentBitIndex >> 5) * 4;
90
91        m_MD5.AddData(m_pBitArray, nBytesToWrite);
92
93        RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
94
95        // move the last value to the front of the bit array
96        m_pBitArray[0] = m_pBitArray[m_nCurrentBitIndex >> 5];
97        m_nCurrentBitIndex = (m_nCurrentBitIndex & 31);
98
99        // zero the rest of the memory (may not need the +1 because of frame byte alignment)
100        memset(&m_pBitArray[1], 0, min((int)nBytesToWrite + 1, BIT_ARRAY_BYTES - 1));
101    }
102
103    // return a success
104    return ERROR_SUCCESS;
105}
106
107/************************************************************************************
108Range coding macros -- ugly, but outperform inline's (every cycle counts here)
109************************************************************************************/
110#define PUTC(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= ((VALUE) & 0xFF) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
111#define PUTC_NOCAP(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= (VALUE) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
112
113#define NORMALIZE_RANGE_CODER                                                                    \
114    while (m_RangeCoderInfo.range <= BOTTOM_VALUE)                                                \
115    {                                                                                            \
116        if (m_RangeCoderInfo.low < (0xFF << SHIFT_BITS))                                        \
117        {                                                                                        \
118            PUTC(m_RangeCoderInfo.buffer);                                                        \
119            for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) { PUTC_NOCAP(0xFF); }        \
120            m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS);                        \
121        }                                                                                        \
122        else if (m_RangeCoderInfo.low & TOP_VALUE)                                                \
123        {                                                                                        \
124            PUTC(m_RangeCoderInfo.buffer + 1);                                                    \
125            m_nCurrentBitIndex += (m_RangeCoderInfo.help * 8);                                    \
126            m_RangeCoderInfo.help = 0;                                                            \
127            m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS);                        \
128        }                                                                                        \
129        else                                                                                    \
130        {                                                                                        \
131            m_RangeCoderInfo.help++;                                                            \
132        }                                                                                        \
133                                                                                                \
134        m_RangeCoderInfo.low = (m_RangeCoderInfo.low << 8) & (TOP_VALUE - 1);                    \
135        m_RangeCoderInfo.range <<= 8;                                                            \
136    }
137
138#define ENCODE_FAST(RANGE_WIDTH, RANGE_TOTAL, SHIFT)                                            \
139    NORMALIZE_RANGE_CODER                                                                        \
140    const int nTemp = m_RangeCoderInfo.range >> (SHIFT);                                        \
141    m_RangeCoderInfo.range = nTemp * (RANGE_WIDTH);                                                \
142    m_RangeCoderInfo.low += nTemp * (RANGE_TOTAL);
143
144#define ENCODE_DIRECT(VALUE, SHIFT)                                                                \
145    NORMALIZE_RANGE_CODER                                                                        \
146    m_RangeCoderInfo.range = m_RangeCoderInfo.range >> (SHIFT);                                    \
147    m_RangeCoderInfo.low += m_RangeCoderInfo.range * (VALUE);
148
149/************************************************************************************
150Directly encode bits to the bitstream
151************************************************************************************/
152int CBitArray::EncodeBits(unsigned int nValue, int nBits)
153{
154    // make sure there is room for the data
155    // this is a little slower than ensuring a huge block to start with, but it's safer
156    if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
157    {
158        RETURN_ON_ERROR(OutputBitArray())
159    }
160
161    ENCODE_DIRECT(nValue, nBits);
162    return 0;
163}
164
165/************************************************************************************
166Encodes an unsigned int to the bit array (no rice coding)
167************************************************************************************/
168int CBitArray::EncodeUnsignedLong(unsigned int n)
169{
170    // make sure there are at least 8 bytes in the buffer
171    if (m_nCurrentBitIndex > (BIT_ARRAY_BYTES - 8))
172    {
173        RETURN_ON_ERROR(OutputBitArray())
174    }
175
176    // encode the value
177    uint32 nBitArrayIndex = m_nCurrentBitIndex >> 5;
178    int nBitIndex = m_nCurrentBitIndex & 31;
179
180    if (nBitIndex == 0)
181    {
182        m_pBitArray[nBitArrayIndex] = n;
183    }
184    else
185    {
186        m_pBitArray[nBitArrayIndex] |= n >> nBitIndex;
187        m_pBitArray[nBitArrayIndex + 1] = n << (32 - nBitIndex);
188    }
189
190    m_nCurrentBitIndex += 32;
191
192    return 0;
193}
194
195/************************************************************************************
196Advance to a byte boundary (for frame alignment)
197************************************************************************************/
198void CBitArray::AdvanceToByteBoundary()
199{
200    while (m_nCurrentBitIndex % 8)
201        m_nCurrentBitIndex++;
202}
203
204/************************************************************************************
205Encode a value
206************************************************************************************/
207int CBitArray::EncodeValue(int nEncode, BIT_ARRAY_STATE & BitArrayState)
208{
209    // make sure there is room for the data
210    // this is a little slower than ensuring a huge block to start with, but it's safer
211    if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
212    {
213        RETURN_ON_ERROR(OutputBitArray())
214    }
215
216    // convert to unsigned
217    nEncode = (nEncode > 0) ? nEncode * 2 - 1 : -nEncode * 2;
218
219    int nOriginalKSum = BitArrayState.nKSum;
220
221    // get the working k
222//    int nTempK = (BitArrayState.k) ? BitArrayState.k - 1 : 0;
223
224    // update nKSum
225    BitArrayState.nKSum += ((nEncode + 1) / 2) - ((BitArrayState.nKSum + 16) >> 5);
226
227    // update k
228    if (BitArrayState.nKSum < K_SUM_MIN_BOUNDARY[BitArrayState.k])
229        BitArrayState.k--;
230    else if (BitArrayState.nKSum >= K_SUM_MIN_BOUNDARY[BitArrayState.k + 1])
231        BitArrayState.k++;
232
233    // figure the pivot value
234    int nPivotValue = max(nOriginalKSum / 32, 1);
235    int nOverflow = nEncode / nPivotValue;
236    int nBase = nEncode - (nOverflow * nPivotValue);
237
238    // store the overflow
239    if (nOverflow < (MODEL_ELEMENTS - 1))
240    {
241        ENCODE_FAST(RANGE_WIDTH[nOverflow], RANGE_TOTAL[nOverflow], RANGE_OVERFLOW_SHIFT);
242
243        #ifdef BUILD_RANGE_TABLE
244            g_aryOverflows[nOverflow]++;
245            g_nTotalOverflow++;
246        #endif
247    }
248    else
249    {
250        // store the "special" overflow (tells that perfect k is encoded next)
251        ENCODE_FAST(RANGE_WIDTH[MODEL_ELEMENTS - 1], RANGE_TOTAL[MODEL_ELEMENTS - 1], RANGE_OVERFLOW_SHIFT);
252
253        #ifdef BUILD_RANGE_TABLE
254            g_aryOverflows[MODEL_ELEMENTS - 1]++;
255            g_nTotalOverflow++;
256        #endif
257
258        // code the overflow using straight bits
259        ENCODE_DIRECT((nOverflow >> 16) & 0xFFFF, 16);
260        ENCODE_DIRECT(nOverflow & 0xFFFF, 16);
261    }
262
263    // code the base
264    {
265        if (nPivotValue >= (1 << 16))
266        {
267            int nPivotValueBits = 0;
268            while ((nPivotValue >> nPivotValueBits) > 0) { nPivotValueBits++; }
269            int nSplitFactor = 1 << (nPivotValueBits - 16);
270
271            // we know that base is smaller than pivot coming into this
272            // however, after we divide both by an integer, they could be the same
273            // we account by adding one to the pivot, but this hurts compression
274            // by (1 / nSplitFactor) -- therefore we maximize the split factor
275            // that gets one added to it
276
277            // encode the pivot as two pieces
278            int nPivotValueA = (nPivotValue / nSplitFactor) + 1;
279            int nPivotValueB = nSplitFactor;
280
281            int nBaseA = nBase / nSplitFactor;
282            int nBaseB = nBase % nSplitFactor;
283
284            {
285                NORMALIZE_RANGE_CODER
286                const int nTemp = m_RangeCoderInfo.range / nPivotValueA;
287                m_RangeCoderInfo.range = nTemp;
288                m_RangeCoderInfo.low += nTemp * nBaseA;
289            }
290
291            {
292                NORMALIZE_RANGE_CODER
293                const int nTemp = m_RangeCoderInfo.range / nPivotValueB;
294                m_RangeCoderInfo.range = nTemp;
295                m_RangeCoderInfo.low += nTemp * nBaseB;
296            }
297        }
298        else
299        {
300
301            NORMALIZE_RANGE_CODER
302            const int nTemp = m_RangeCoderInfo.range / nPivotValue;
303            m_RangeCoderInfo.range = nTemp;
304            m_RangeCoderInfo.low += nTemp * nBase;
305        }
306    }
307
308    return 0;
309}
310
311/************************************************************************************
312Flush
313************************************************************************************/
314void CBitArray::FlushBitArray()
315{
316    // advance to a byte boundary (for alignment)
317    AdvanceToByteBoundary();
318
319    // the range coder
320    m_RangeCoderInfo.low = 0;  // full code range
321    m_RangeCoderInfo.range = TOP_VALUE;
322    m_RangeCoderInfo.buffer = 0;
323    m_RangeCoderInfo.help = 0;  // no bytes to follow
324}
325
326void CBitArray::FlushState(BIT_ARRAY_STATE & BitArrayState)
327{
328    // k and ksum
329    BitArrayState.k = 10;
330    BitArrayState.nKSum = (1 << BitArrayState.k) * 16;
331}
332
333/************************************************************************************
334Finalize
335************************************************************************************/
336void CBitArray::Finalize()
337{
338    NORMALIZE_RANGE_CODER
339
340    unsigned int nTemp = (m_RangeCoderInfo.low >> SHIFT_BITS) + 1;
341
342    if (nTemp > 0xFF) // we have a carry
343    {
344        PUTC(m_RangeCoderInfo.buffer + 1);
345        for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
346        {
347            PUTC(0);
348        }
349    }
350    else  // no carry
351    {
352        PUTC(m_RangeCoderInfo.buffer);
353        for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
354        {
355            PUTC(((unsigned char) 0xFF));
356        }
357    }
358
359    // we must output these bytes so the decoder can properly work at the end of the stream
360    PUTC(nTemp & 0xFF);
361    PUTC(0);
362    PUTC(0);
363    PUTC(0);
364}
365
366/************************************************************************************
367Build a range table (for development / debugging)
368************************************************************************************/
369#ifdef BUILD_RANGE_TABLE
370void CBitArray::OutputRangeTable()
371{
372    int z;
373
374    if (g_nTotalOverflow == 0) return;
375
376    int nTotal = 0;
377    int aryWidth[256]; ZeroMemory(aryWidth, 256 * 4);
378    for (z = 0; z < MODEL_ELEMENTS; z++)
379    {
380        aryWidth[z] = int(((float(g_aryOverflows[z]) * float(65536)) + (g_nTotalOverflow / 2)) / float(g_nTotalOverflow));
381        if (aryWidth[z] == 0) aryWidth[z] = 1;
382        nTotal += aryWidth[z];
383    }
384
385    z = 0;
386    while (nTotal > 65536)
387    {
388        if (aryWidth[z] != 1)
389        {
390            aryWidth[z]--;
391            nTotal--;
392        }
393        z++;
394        if (z == MODEL_ELEMENTS) z = 0;
395    }
396
397    z = 0;
398    while (nTotal < 65536)
399    {
400        aryWidth[z++]++;
401        nTotal++;
402        if (z == MODEL_ELEMENTS) z = 0;
403    }
404
405    int aryTotal[256]; ZeroMemory(aryTotal, 256 * 4);
406    for (z = 0; z < MODEL_ELEMENTS; z++)
407    {
408        for (int q = 0; q < z; q++)
409        {
410            aryTotal[z] += aryWidth[q];
411        }
412    }
413
414    TCHAR buf[1024];
415    _stprintf(buf, _T("const uint32 RANGE_TOTAL[%d] = {"), MODEL_ELEMENTS);
416    ODS(buf);
417    for (z = 0; z < MODEL_ELEMENTS; z++)
418    {
419        _stprintf(buf, _T("%d,"), aryTotal[z]);
420        OutputDebugString(buf);
421    }
422    ODS(_T("};\n"));
423
424    _stprintf(buf, _T("const uint32 RANGE_WIDTH[%d] = {"), MODEL_ELEMENTS);
425    ODS(buf);
426    for (z = 0; z < MODEL_ELEMENTS; z++)
427    {
428        _stprintf(buf, _T("%d,"), aryWidth[z]);
429        OutputDebugString(buf);
430    }
431    ODS(_T("};\n\n"));
432}
433#endif // #ifdef BUILD_RANGE_TABLE
434