1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2006, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// *       Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// *       Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// *       Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35
36//-----------------------------------------------------------------------------
37//
38//	class B44Compressor
39//
40//	This compressor is lossy for HALF channels; the compression rate
41//	is fixed at 32/14 (approximately 2.28).  FLOAT and UINT channels
42//	are not compressed; their data are preserved exactly.
43//
44//	Each HALF channel is split into blocks of 4 by 4 pixels.  An
45//	uncompressed block occupies 32 bytes, which are re-interpreted
46//	as sixteen 16-bit unsigned integers, t[0] ... t[15].  Compression
47//	shrinks the block to 14 bytes.  The compressed 14-byte block
48//	contains
49//
50//	 - t[0]
51//
52//	 - a 6-bit shift value
53//
54//	 - 15 densely packed 6-bit values, r[0] ... r[14], which are
55//         computed by subtracting adjacent pixel values and right-
56//	   shifting the differences according to the stored shift value.
57//
58//	   Differences between adjacent pixels are computed according
59//	   to the following diagram:
60//
61//		 0 -------->  1 -------->  2 -------->  3
62//               |     3            7           11
63//               |
64//               | 0
65//               |
66//               v
67//		 4 -------->  5 -------->  6 -------->  7
68//               |     4            8           12
69//               |
70//               | 1
71//               |
72//               v
73//		 8 -------->  9 --------> 10 --------> 11
74//               |     5            9           13
75//               |
76//               | 2
77//               |
78//               v
79//		12 --------> 13 --------> 14 --------> 15
80//                     6           10           14
81//
82//	    Here
83//
84//               5 ---------> 6
85//                     8
86//
87//	    means that r[8] is the difference between t[5] and t[6].
88//
89//	 - optionally, a 4-by-4 pixel block where all pixels have the
90//	   same value can be treated as a special case, where the
91//	   compressed block contains only 3 instead of 14 bytes:
92//	   t[0], followed by an "impossible" 6-bit shift value and
93//	   two padding bits.
94//
95//	This compressor can handle positive and negative pixel values.
96//	NaNs and infinities are replaced with zeroes before compression.
97//
98//-----------------------------------------------------------------------------
99
100#include <ImfB44Compressor.h>
101#include <ImfHeader.h>
102#include <ImfChannelList.h>
103#include <ImfMisc.h>
104#include <ImathFun.h>
105#include <ImathBox.h>
106#include <Iex.h>
107#include <ImfIO.h>
108#include <ImfXdr.h>
109#include <string.h>
110#include <assert.h>
111#include <algorithm>
112
113namespace Imf {
114
115using Imath::divp;
116using Imath::modp;
117using Imath::Box2i;
118using Imath::V2i;
119using std::min;
120
121namespace {
122
123//
124// Lookup tables for
125//	y = exp (x / 8)
126// and
127//	x = 8 * log (y)
128//
129
130#include "b44ExpLogTable.h"
131
132
133inline void
134convertFromLinear (unsigned short s[16])
135{
136    for (int i = 0; i < 16; ++i)
137	s[i] = expTable[s[i]];
138}
139
140
141inline void
142convertToLinear (unsigned short s[16])
143{
144    for (int i = 0; i < 16; ++i)
145	s[i] = logTable[s[i]];
146}
147
148
149inline int
150shiftAndRound (int x, int shift)
151{
152    //
153    // Compute
154    //
155    //     y = x * pow (2, -shift),
156    //
157    // then round y to the nearest integer.
158    // In case of a tie, where y is exactly
159    // halfway between two integers, round
160    // to the even one.
161    //
162
163    x <<= 1;
164    int a = (1 << shift) - 1;
165    shift += 1;
166    int b = (x >> shift) & 1;
167    return (x + a + b) >> shift;
168}
169
170
171int
172pack (const unsigned short s[16],
173      unsigned char b[14],
174      bool optFlatFields,
175      bool exactMax)
176{
177    //
178    // Pack a block of 4 by 4 16-bit pixels (32 bytes) into
179    // either 14 or 3 bytes.
180    //
181
182    //
183    // Integers s[0] ... s[15] represent floating-point numbers
184    // in what is essentially a sign-magnitude format.  Convert
185    // s[0] .. s[15] into a new set of integers, t[0] ... t[15],
186    // such that if t[i] is greater than t[j], the floating-point
187    // number that corresponds to s[i] is always greater than
188    // the floating-point number that corresponds to s[j].
189    //
190    // Also, replace any bit patterns that represent NaNs or
191    // infinities with bit patterns that represent floating-point
192    // zeroes.
193    //
194    //	bit pattern	floating-point		bit pattern
195    //	in s[i]		value			in t[i]
196    //
197    //  0x7fff		NAN			0x8000
198    //  0x7ffe		NAN			0x8000
199    //	  ...					  ...
200    //  0x7c01		NAN			0x8000
201    //  0x7c00		+infinity		0x8000
202    //  0x7bff		+HALF_MAX		0xfbff
203    //  0x7bfe					0xfbfe
204    //  0x7bfd					0xfbfd
205    //	  ...					  ...
206    //  0x0002		+2 * HALF_MIN		0x8002
207    //  0x0001		+HALF_MIN		0x8001
208    //  0x0000		+0.0			0x8000
209    //  0x8000		-0.0			0x7fff
210    //  0x8001		-HALF_MIN		0x7ffe
211    //  0x8002		-2 * HALF_MIN		0x7ffd
212    //	  ...					  ...
213    //  0xfbfd					0x0f02
214    //  0xfbfe					0x0401
215    //  0xfbff		-HALF_MAX		0x0400
216    //  0xfc00		-infinity		0x8000
217    //  0xfc01		NAN			0x8000
218    //	  ...					  ...
219    //  0xfffe		NAN			0x8000
220    //  0xffff		NAN			0x8000
221    //
222
223    unsigned short t[16];
224
225    for (int i = 0; i < 16; ++i)
226    {
227	if ((s[i] & 0x7c00) == 0x7c00)
228	    t[i] = 0x8000;
229	else if (s[i] & 0x8000)
230	    t[i] = ~s[i];
231	else
232	    t[i] = s[i] | 0x8000;
233    }
234
235    //
236    // Find the maximum, tMax, of t[0] ... t[15].
237    //
238
239    unsigned short tMax = 0;
240
241    for (int i = 0; i < 16; ++i)
242	if (tMax < t[i])
243	    tMax = t[i];
244
245    //
246    // Compute a set of running differences, r[0] ... r[14]:
247    // Find a shift value such that after rounding off the
248    // rightmost bits and shifting all differenes are between
249    // -32 and +31.  Then bias the differences so that they
250    // end up between 0 and 63.
251    //
252
253    int shift = -1;
254    int d[16];
255    int r[15];
256    int rMin;
257    int rMax;
258
259    const int bias = 0x20;
260
261    do
262    {
263	shift += 1;
264
265	//
266	// Compute absolute differences, d[0] ... d[15],
267	// between tMax and t[0] ... t[15].
268	//
269	// Shift and round the absolute differences.
270	//
271
272	for (int i = 0; i < 16; ++i)
273	    d[i] = shiftAndRound (tMax - t[i], shift);
274
275	//
276	// Convert d[0] .. d[15] into running differences
277	//
278
279	r[ 0] = d[ 0] - d[ 4] + bias;
280	r[ 1] = d[ 4] - d[ 8] + bias;
281	r[ 2] = d[ 8] - d[12] + bias;
282
283	r[ 3] = d[ 0] - d[ 1] + bias;
284	r[ 4] = d[ 4] - d[ 5] + bias;
285	r[ 5] = d[ 8] - d[ 9] + bias;
286	r[ 6] = d[12] - d[13] + bias;
287
288	r[ 7] = d[ 1] - d[ 2] + bias;
289	r[ 8] = d[ 5] - d[ 6] + bias;
290	r[ 9] = d[ 9] - d[10] + bias;
291	r[10] = d[13] - d[14] + bias;
292
293	r[11] = d[ 2] - d[ 3] + bias;
294	r[12] = d[ 6] - d[ 7] + bias;
295	r[13] = d[10] - d[11] + bias;
296	r[14] = d[14] - d[15] + bias;
297
298	rMin = r[0];
299	rMax = r[0];
300
301	for (int i = 1; i < 15; ++i)
302	{
303	    if (rMin > r[i])
304		rMin = r[i];
305
306	    if (rMax < r[i])
307		rMax = r[i];
308	}
309    }
310    while (rMin < 0 || rMax > 0x3f);
311
312    if (rMin == bias && rMax == bias && optFlatFields)
313    {
314	//
315	// Special case - all pixels have the same value.
316	// We encode this in 3 instead of 14 bytes by
317	// storing the value 0xfc in the third output byte,
318	// which cannot occur in the 14-byte encoding.
319	//
320
321	b[0] = (t[0] >> 8);
322	b[1] =  t[0];
323	b[2] = 0xfc;
324
325	return 3;
326    }
327
328    if (exactMax)
329    {
330	//
331	// Adjust t[0] so that the pixel whose value is equal
332	// to tMax gets represented as accurately as possible.
333	//
334
335	t[0] = tMax - (d[0] << shift);
336    }
337
338    //
339    // Pack t[0], shift and r[0] ... r[14] into 14 bytes:
340    //
341
342    b[ 0] = (t[0] >> 8);
343    b[ 1] =  t[0];
344
345    b[ 2] = (unsigned char) ((shift << 2) | (r[ 0] >> 4));
346    b[ 3] = (unsigned char) ((r[ 0] << 4) | (r[ 1] >> 2));
347    b[ 4] = (unsigned char) ((r[ 1] << 6) |  r[ 2]      );
348
349    b[ 5] = (unsigned char) ((r[ 3] << 2) | (r[ 4] >> 4));
350    b[ 6] = (unsigned char) ((r[ 4] << 4) | (r[ 5] >> 2));
351    b[ 7] = (unsigned char) ((r[ 5] << 6) |  r[ 6]      );
352
353    b[ 8] = (unsigned char) ((r[ 7] << 2) | (r[ 8] >> 4));
354    b[ 9] = (unsigned char) ((r[ 8] << 4) | (r[ 9] >> 2));
355    b[10] = (unsigned char) ((r[ 9] << 6) |  r[10]      );
356
357    b[11] = (unsigned char) ((r[11] << 2) | (r[12] >> 4));
358    b[12] = (unsigned char) ((r[12] << 4) | (r[13] >> 2));
359    b[13] = (unsigned char) ((r[13] << 6) |  r[14]      );
360
361    return 14;
362}
363
364
365inline
366void
367unpack14 (const unsigned char b[14], unsigned short s[16])
368{
369    //
370    // Unpack a 14-byte block into 4 by 4 16-bit pixels.
371    //
372
373    #if defined (DEBUG)
374	assert (b[2] != 0xfc);
375    #endif
376
377    s[ 0] = (b[0] << 8) | b[1];
378
379    unsigned short shift = (b[ 2] >> 2);
380    unsigned short bias = (0x20 << shift);
381
382    s[ 4] = s[ 0] + ((((b[ 2] << 4) | (b[ 3] >> 4)) & 0x3f) << shift) - bias;
383    s[ 8] = s[ 4] + ((((b[ 3] << 2) | (b[ 4] >> 6)) & 0x3f) << shift) - bias;
384    s[12] = s[ 8] +   ((b[ 4]                       & 0x3f) << shift) - bias;
385
386    s[ 1] = s[ 0] +   ((b[ 5] >> 2)                         << shift) - bias;
387    s[ 5] = s[ 4] + ((((b[ 5] << 4) | (b[ 6] >> 4)) & 0x3f) << shift) - bias;
388    s[ 9] = s[ 8] + ((((b[ 6] << 2) | (b[ 7] >> 6)) & 0x3f) << shift) - bias;
389    s[13] = s[12] +   ((b[ 7]                       & 0x3f) << shift) - bias;
390
391    s[ 2] = s[ 1] +   ((b[ 8] >> 2)                         << shift) - bias;
392    s[ 6] = s[ 5] + ((((b[ 8] << 4) | (b[ 9] >> 4)) & 0x3f) << shift) - bias;
393    s[10] = s[ 9] + ((((b[ 9] << 2) | (b[10] >> 6)) & 0x3f) << shift) - bias;
394    s[14] = s[13] +   ((b[10]                       & 0x3f) << shift) - bias;
395
396    s[ 3] = s[ 2] +   ((b[11] >> 2)                         << shift) - bias;
397    s[ 7] = s[ 6] + ((((b[11] << 4) | (b[12] >> 4)) & 0x3f) << shift) - bias;
398    s[11] = s[10] + ((((b[12] << 2) | (b[13] >> 6)) & 0x3f) << shift) - bias;
399    s[15] = s[14] +   ((b[13]                       & 0x3f) << shift) - bias;
400
401    for (int i = 0; i < 16; ++i)
402    {
403	if (s[i] & 0x8000)
404	    s[i] &= 0x7fff;
405	else
406	    s[i] = ~s[i];
407    }
408}
409
410
411inline
412void
413unpack3 (const unsigned char b[3], unsigned short s[16])
414{
415    //
416    // Unpack a 3-byte block into 4 by 4 identical 16-bit pixels.
417    //
418
419    #if defined (DEBUG)
420	assert (b[2] == 0xfc);
421    #endif
422
423    s[0] = (b[0] << 8) | b[1];
424
425    if (s[0] & 0x8000)
426	s[0] &= 0x7fff;
427    else
428	s[0] = ~s[0];
429
430    for (int i = 1; i < 16; ++i)
431	s[i] = s[0];
432}
433
434
435void
436notEnoughData ()
437{
438    throw Iex::InputExc ("Error decompressing data "
439			 "(input data are shorter than expected).");
440}
441
442
443void
444tooMuchData ()
445{
446    throw Iex::InputExc ("Error decompressing data "
447			 "(input data are longer than expected).");
448}
449
450} // namespace
451
452
453struct B44Compressor::ChannelData
454{
455    unsigned short *	start;
456    unsigned short *	end;
457    int			nx;
458    int			ny;
459    int			ys;
460    PixelType		type;
461    bool		pLinear;
462    int			size;
463};
464
465
466B44Compressor::B44Compressor
467    (const Header &hdr,
468     int maxScanLineSize,
469     int numScanLines,
470     bool optFlatFields)
471:
472    Compressor (hdr),
473    _maxScanLineSize (maxScanLineSize),
474    _optFlatFields (optFlatFields),
475    _format (XDR),
476    _numScanLines (numScanLines),
477    _tmpBuffer (0),
478    _outBuffer (0),
479    _numChans (0),
480    _channels (hdr.channels()),
481    _channelData (0)
482{
483    //
484    // Allocate buffers for compressed an uncompressed pixel data,
485    // allocate a set of ChannelData structs to help speed up the
486    // compress() and uncompress() functions, below, and determine
487    // if uncompressed pixel data should be in native or Xdr format.
488    //
489
490    _tmpBuffer = new unsigned short [maxScanLineSize * numScanLines];
491
492    const ChannelList &channels = header().channels();
493    int numHalfChans = 0;
494
495    for (ChannelList::ConstIterator c = channels.begin();
496	 c != channels.end();
497	 ++c)
498    {
499	assert (pixelTypeSize (c.channel().type) % pixelTypeSize (HALF) == 0);
500	++_numChans;
501
502	if (c.channel().type == HALF)
503	    ++numHalfChans;
504    }
505
506    //
507    // Compressed data may be larger than the input data
508    //
509
510    int padding = 12 * numHalfChans * (numScanLines + 3) / 4;
511
512    _outBuffer = new char [maxScanLineSize * numScanLines + padding];
513    _channelData = new ChannelData[_numChans];
514
515    int i = 0;
516
517    for (ChannelList::ConstIterator c = channels.begin();
518	 c != channels.end();
519	 ++c, ++i)
520    {
521	_channelData[i].ys = c.channel().ySampling;
522	_channelData[i].type = c.channel().type;
523	_channelData[i].pLinear = c.channel().pLinear;
524	_channelData[i].size =
525	    pixelTypeSize (c.channel().type) / pixelTypeSize (HALF);
526    }
527
528    const Box2i &dataWindow = hdr.dataWindow();
529
530    _minX = dataWindow.min.x;
531    _maxX = dataWindow.max.x;
532    _maxY = dataWindow.max.y;
533
534    //
535    // We can support uncompressed data in the machine's native
536    // format only if all image channels are of type HALF.
537    //
538
539    assert (sizeof (unsigned short) == pixelTypeSize (HALF));
540
541    if (_numChans == numHalfChans)
542	_format = NATIVE;
543}
544
545
546B44Compressor::~B44Compressor ()
547{
548    delete [] _tmpBuffer;
549    delete [] _outBuffer;
550    delete [] _channelData;
551}
552
553
554int
555B44Compressor::numScanLines () const
556{
557    return _numScanLines;
558}
559
560
561Compressor::Format
562B44Compressor::format () const
563{
564    return _format;
565}
566
567
568int
569B44Compressor::compress (const char *inPtr,
570			 int inSize,
571			 int minY,
572			 const char *&outPtr)
573{
574    return compress (inPtr,
575		     inSize,
576		     Box2i (V2i (_minX, minY),
577			    V2i (_maxX, minY + numScanLines() - 1)),
578		     outPtr);
579}
580
581
582int
583B44Compressor::compressTile (const char *inPtr,
584			     int inSize,
585			     Imath::Box2i range,
586			     const char *&outPtr)
587{
588    return compress (inPtr, inSize, range, outPtr);
589}
590
591
592int
593B44Compressor::uncompress (const char *inPtr,
594			   int inSize,
595			   int minY,
596			   const char *&outPtr)
597{
598    return uncompress (inPtr,
599		       inSize,
600		       Box2i (V2i (_minX, minY),
601			      V2i (_maxX, minY + numScanLines() - 1)),
602		       outPtr);
603}
604
605
606int
607B44Compressor::uncompressTile (const char *inPtr,
608			       int inSize,
609			       Imath::Box2i range,
610			       const char *&outPtr)
611{
612    return uncompress (inPtr, inSize, range, outPtr);
613}
614
615
616int
617B44Compressor::compress (const char *inPtr,
618			 int inSize,
619			 Imath::Box2i range,
620			 const char *&outPtr)
621{
622    //
623    // Compress a block of pixel data:  First copy the input pixels
624    // from the input buffer into _tmpBuffer, rearranging them such
625    // that blocks of 4x4 pixels of a single channel can be accessed
626    // conveniently.  Then compress each 4x4 block of HALF pixel data
627    // and append the result to the output buffer.  Copy UINT and
628    // FLOAT data to the output buffer without compressing them.
629    //
630
631    outPtr = _outBuffer;
632
633    if (inSize == 0)
634    {
635	//
636	// Special case - empty input buffer.
637	//
638
639	return 0;
640    }
641
642    //
643    // For each channel, detemine how many pixels are stored
644    // in the input buffer, and where those pixels will be
645    // placed in _tmpBuffer.
646    //
647
648    int minX = range.min.x;
649    int maxX = min (range.max.x, _maxX);
650    int minY = range.min.y;
651    int maxY = min (range.max.y, _maxY);
652
653    unsigned short *tmpBufferEnd = _tmpBuffer;
654    int i = 0;
655
656    for (ChannelList::ConstIterator c = _channels.begin();
657	 c != _channels.end();
658	 ++c, ++i)
659    {
660	ChannelData &cd = _channelData[i];
661
662	cd.start = tmpBufferEnd;
663	cd.end = cd.start;
664
665	cd.nx = numSamples (c.channel().xSampling, minX, maxX);
666	cd.ny = numSamples (c.channel().ySampling, minY, maxY);
667
668	tmpBufferEnd += cd.nx * cd.ny * cd.size;
669    }
670
671    if (_format == XDR)
672    {
673	//
674	// The data in the input buffer are in the machine-independent
675	// Xdr format.  Copy the HALF channels into _tmpBuffer and
676	// convert them back into native format for compression.
677	// Copy UINT and FLOAT channels verbatim into _tmpBuffer.
678	//
679
680	for (int y = minY; y <= maxY; ++y)
681	{
682	    for (int i = 0; i < _numChans; ++i)
683	    {
684		ChannelData &cd = _channelData[i];
685
686		if (modp (y, cd.ys) != 0)
687		    continue;
688
689		if (cd.type == HALF)
690		{
691		    for (int x = cd.nx; x > 0; --x)
692		    {
693			Xdr::read <CharPtrIO> (inPtr, *cd.end);
694			++cd.end;
695		    }
696		}
697		else
698		{
699		    int n = cd.nx * cd.size;
700		    memcpy (cd.end, inPtr, n * sizeof (unsigned short));
701		    inPtr += n * sizeof (unsigned short);
702		    cd.end += n;
703		}
704	    }
705	}
706    }
707    else
708    {
709	//
710	// The input buffer contains only HALF channels, and they
711	// are in native, machine-dependent format.  Copy the pixels
712	// into _tmpBuffer.
713	//
714
715	for (int y = minY; y <= maxY; ++y)
716	{
717	    for (int i = 0; i < _numChans; ++i)
718	    {
719		ChannelData &cd = _channelData[i];
720
721		#if defined (DEBUG)
722		    assert (cd.type == HALF);
723		#endif
724
725		if (modp (y, cd.ys) != 0)
726		    continue;
727
728		int n = cd.nx * cd.size;
729		memcpy (cd.end, inPtr, n * sizeof (unsigned short));
730		inPtr  += n * sizeof (unsigned short);
731		cd.end += n;
732	    }
733	}
734    }
735
736    //
737    // The pixels for each channel have been packed into a contiguous
738    // block in _tmpBuffer.  HALF channels are in native format; UINT
739    // and FLOAT channels are in Xdr format.
740    //
741
742    #if defined (DEBUG)
743
744	for (int i = 1; i < _numChans; ++i)
745	    assert (_channelData[i-1].end == _channelData[i].start);
746
747	assert (_channelData[_numChans-1].end == tmpBufferEnd);
748
749    #endif
750
751    //
752    // For each HALF channel, split the data in _tmpBuffer into 4x4
753    // pixel blocks.  Compress each block and append the compressed
754    // data to the output buffer.
755    //
756    // UINT and FLOAT channels are copied from _tmpBuffer into the
757    // output buffer without further processing.
758    //
759
760    char *outEnd = _outBuffer;
761
762    for (int i = 0; i < _numChans; ++i)
763    {
764	ChannelData &cd = _channelData[i];
765
766	if (cd.type != HALF)
767	{
768	    //
769	    // UINT or FLOAT channel.
770	    //
771
772	    int n = cd.nx * cd.ny * cd.size * sizeof (unsigned short);
773	    memcpy (outEnd, cd.start, n);
774	    outEnd += n;
775
776	    continue;
777	}
778
779	//
780	// HALF channel
781	//
782
783	for (int y = 0; y < cd.ny; y += 4)
784	{
785	    //
786	    // Copy the next 4x4 pixel block into array s.
787	    // If the width, cd.nx, or the height, cd.ny, of
788	    // the pixel data in _tmpBuffer is not divisible
789	    // by 4, then pad the data by repeating the
790	    // rightmost column and the bottom row.
791	    //
792
793	    unsigned short *row0 = cd.start + y * cd.nx;
794	    unsigned short *row1 = row0 + cd.nx;
795	    unsigned short *row2 = row1 + cd.nx;
796	    unsigned short *row3 = row2 + cd.nx;
797
798	    if (y + 3 >= cd.ny)
799	    {
800		if (y + 1 >= cd.ny)
801		    row1 = row0;
802
803		if (y + 2 >= cd.ny)
804		    row2 = row1;
805
806		row3 = row2;
807	    }
808
809	    for (int x = 0; x < cd.nx; x += 4)
810	    {
811		unsigned short s[16];
812
813		if (x + 3 >= cd.nx)
814		{
815		    int n = cd.nx - x;
816
817		    for (int i = 0; i < 4; ++i)
818		    {
819			int j = min (i, n - 1);
820
821			s[i +  0] = row0[j];
822			s[i +  4] = row1[j];
823			s[i +  8] = row2[j];
824			s[i + 12] = row3[j];
825		    }
826		}
827		else
828		{
829		    memcpy (&s[ 0], row0, 4 * sizeof (unsigned short));
830		    memcpy (&s[ 4], row1, 4 * sizeof (unsigned short));
831		    memcpy (&s[ 8], row2, 4 * sizeof (unsigned short));
832		    memcpy (&s[12], row3, 4 * sizeof (unsigned short));
833		}
834
835		row0 += 4;
836		row1 += 4;
837		row2 += 4;
838		row3 += 4;
839
840		//
841		// Compress the contents of array s and append the
842		// results to the output buffer.
843		//
844
845		if (cd.pLinear)
846		    convertFromLinear (s);
847
848		outEnd += pack (s, (unsigned char *) outEnd,
849				_optFlatFields, !cd.pLinear);
850	    }
851	}
852    }
853
854    return outEnd - _outBuffer;
855}
856
857
858int
859B44Compressor::uncompress (const char *inPtr,
860			   int inSize,
861			   Imath::Box2i range,
862			   const char *&outPtr)
863{
864    //
865    // This function is the reverse of the compress() function,
866    // above.  First all pixels are moved from the input buffer
867    // into _tmpBuffer.  UINT and FLOAT channels are copied
868    // verbatim; HALF channels are uncompressed in blocks of
869    // 4x4 pixels.  Then the pixels in _tmpBuffer are copied
870    // into the output buffer and rearranged such that the data
871    // for for each scan line form a contiguous block.
872    //
873
874    outPtr = _outBuffer;
875
876    if (inSize == 0)
877    {
878	return 0;
879    }
880
881    int minX = range.min.x;
882    int maxX = min (range.max.x, _maxX);
883    int minY = range.min.y;
884    int maxY = min (range.max.y, _maxY);
885
886    unsigned short *tmpBufferEnd = _tmpBuffer;
887    int i = 0;
888
889    for (ChannelList::ConstIterator c = _channels.begin();
890	 c != _channels.end();
891	 ++c, ++i)
892    {
893	ChannelData &cd = _channelData[i];
894
895	cd.start = tmpBufferEnd;
896	cd.end = cd.start;
897
898	cd.nx = numSamples (c.channel().xSampling, minX, maxX);
899	cd.ny = numSamples (c.channel().ySampling, minY, maxY);
900
901	tmpBufferEnd += cd.nx * cd.ny * cd.size;
902    }
903
904    for (int i = 0; i < _numChans; ++i)
905    {
906	ChannelData &cd = _channelData[i];
907
908	if (cd.type != HALF)
909	{
910	    //
911	    // UINT or FLOAT channel.
912	    //
913
914	    int n = cd.nx * cd.ny * cd.size * sizeof (unsigned short);
915
916	    if (inSize < n)
917		notEnoughData();
918
919	    memcpy (cd.start, inPtr, n);
920	    inPtr += n;
921	    inSize -= n;
922
923	    continue;
924	}
925
926	//
927	// HALF channel
928	//
929
930	for (int y = 0; y < cd.ny; y += 4)
931	{
932	    unsigned short *row0 = cd.start + y * cd.nx;
933	    unsigned short *row1 = row0 + cd.nx;
934	    unsigned short *row2 = row1 + cd.nx;
935	    unsigned short *row3 = row2 + cd.nx;
936
937	    for (int x = 0; x < cd.nx; x += 4)
938	    {
939		unsigned short s[16];
940
941		if (inSize < 3)
942		    notEnoughData();
943
944		if (((const unsigned char *)inPtr)[2] == 0xfc)
945		{
946		    unpack3 ((const unsigned char *)inPtr, s);
947		    inPtr += 3;
948		    inSize -= 3;
949		}
950		else
951		{
952		    if (inSize < 14)
953			notEnoughData();
954
955		    unpack14 ((const unsigned char *)inPtr, s);
956		    inPtr += 14;
957		    inSize -= 14;
958		}
959
960		if (cd.pLinear)
961		    convertToLinear (s);
962
963		int n = (x + 3 < cd.nx)?
964			    4 * sizeof (unsigned short) :
965			    (cd.nx - x) * sizeof (unsigned short);
966
967		if (y + 3 < cd.ny)
968		{
969		    memcpy (row0, &s[ 0], n);
970		    memcpy (row1, &s[ 4], n);
971		    memcpy (row2, &s[ 8], n);
972		    memcpy (row3, &s[12], n);
973		}
974		else
975		{
976		    memcpy (row0, &s[ 0], n);
977
978		    if (y + 1 < cd.ny)
979			memcpy (row1, &s[ 4], n);
980
981		    if (y + 2 < cd.ny)
982			memcpy (row2, &s[ 8], n);
983		}
984
985		row0 += 4;
986		row1 += 4;
987		row2 += 4;
988		row3 += 4;
989	    }
990	}
991    }
992
993    char *outEnd = _outBuffer;
994
995    if (_format == XDR)
996    {
997	for (int y = minY; y <= maxY; ++y)
998	{
999	    for (int i = 0; i < _numChans; ++i)
1000	    {
1001		ChannelData &cd = _channelData[i];
1002
1003		if (modp (y, cd.ys) != 0)
1004		    continue;
1005
1006		if (cd.type == HALF)
1007		{
1008		    for (int x = cd.nx; x > 0; --x)
1009		    {
1010			Xdr::write <CharPtrIO> (outEnd, *cd.end);
1011			++cd.end;
1012		    }
1013		}
1014		else
1015		{
1016		    int n = cd.nx * cd.size;
1017		    memcpy (outEnd, cd.end, n * sizeof (unsigned short));
1018		    outEnd += n * sizeof (unsigned short);
1019		    cd.end += n;
1020		}
1021	    }
1022	}
1023    }
1024    else
1025    {
1026	for (int y = minY; y <= maxY; ++y)
1027	{
1028	    for (int i = 0; i < _numChans; ++i)
1029	    {
1030		ChannelData &cd = _channelData[i];
1031
1032		#if defined (DEBUG)
1033		    assert (cd.type == HALF);
1034		#endif
1035
1036		if (modp (y, cd.ys) != 0)
1037		    continue;
1038
1039		int n = cd.nx * cd.size;
1040		memcpy (outEnd, cd.end, n * sizeof (unsigned short));
1041		outEnd += n * sizeof (unsigned short);
1042		cd.end += n;
1043	    }
1044	}
1045    }
1046
1047    #if defined (DEBUG)
1048
1049	for (int i = 1; i < _numChans; ++i)
1050	    assert (_channelData[i-1].end == _channelData[i].start);
1051
1052	assert (_channelData[_numChans-1].end == tmpBufferEnd);
1053
1054    #endif
1055
1056    if (inSize > 0)
1057	tooMuchData();
1058
1059    outPtr = _outBuffer;
1060    return outEnd - _outBuffer;
1061}
1062
1063
1064} // namespace Imf
1065