openexr/ilmimf/ImfB44Compressor.cpp

///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2006, Industrial Light & Magic, a division of Lucas
// Digital Ltd. LLC
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// *       Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// *       Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// *       Neither the name of Industrial Light & Magic nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////


//-----------------------------------------------------------------------------
//
//	class B44Compressor
//
//	This compressor is lossy for HALF channels; the compression rate
//	is fixed at 32/14 (approximately 2.28).  FLOAT and UINT channels
//	are not compressed; their data are preserved exactly.
//
//	Each HALF channel is split into blocks of 4 by 4 pixels.  An
//	uncompressed block occupies 32 bytes, which are re-interpreted
//	as sixteen 16-bit unsigned integers, t[0] ... t[15].  Compression
//	shrinks the block to 14 bytes.  The compressed 14-byte block
//	contains
//
//	 - t[0]
//
//	 - a 6-bit shift value
//
//	 - 15 densely packed 6-bit values, r[0] ... r[14], which are
//         computed by subtracting adjacent pixel values and right-
//	   shifting the differences according to the stored shift value.
//
//	   Differences between adjacent pixels are computed according
//	   to the following diagram:
//
//		 0 -------->  1 -------->  2 -------->  3
//               |     3            7           11
//               |
//               | 0
//               |
//               v
//		 4 -------->  5 -------->  6 -------->  7
//               |     4            8           12
//               |
//               | 1
//               |
//               v
//		 8 -------->  9 --------> 10 --------> 11
//               |     5            9           13
//               |
//               | 2
//               |
//               v
//		12 --------> 13 --------> 14 --------> 15
//                     6           10           14
//
//	    Here
//
//               5 ---------> 6
//                     8
//
//	    means that r[8] is the difference between t[5] and t[6].
//
//	 - optionally, a 4-by-4 pixel block where all pixels have the
//	   same value can be treated as a special case, where the
//	   compressed block contains only 3 instead of 14 bytes:
//	   t[0], followed by an "impossible" 6-bit shift value and
//	   two padding bits.
//
//	This compressor can handle positive and negative pixel values.
//	NaNs and infinities are replaced with zeroes before compression.
//
//-----------------------------------------------------------------------------

#include <ImfB44Compressor.h>
#include <ImfHeader.h>
#include <ImfChannelList.h>
#include <ImfMisc.h>
#include <ImathFun.h>
#include <ImathBox.h>
#include <Iex.h>
#include <ImfIO.h>
#include <ImfXdr.h>
#include <string.h>
#include <assert.h>
#include <algorithm>

namespace Imf {

using Imath::divp;
using Imath::modp;
using Imath::Box2i;
using Imath::V2i;
using std::min;

namespace {

//
// Lookup tables for
//	y = exp (x / 8)
// and
//	x = 8 * log (y)
//

#include "b44ExpLogTable.h"


inline void
convertFromLinear (unsigned short s[16])
{
    for (int i = 0; i < 16; ++i)
	s[i] = expTable[s[i]];
}


inline void
convertToLinear (unsigned short s[16])
{
    for (int i = 0; i < 16; ++i)
	s[i] = logTable[s[i]];
}


inline int
shiftAndRound (int x, int shift)
{
    //
    // Compute
    //
    //     y = x * pow (2, -shift),
    //
    // then round y to the nearest integer.
    // In case of a tie, where y is exactly
    // halfway between two integers, round
    // to the even one.
    //

    x <<= 1;
    int a = (1 << shift) - 1;
    shift += 1;
    int b = (x >> shift) & 1;
    return (x + a + b) >> shift;
}


int
pack (const unsigned short s[16],
      unsigned char b[14],
      bool optFlatFields,
      bool exactMax)
{
    //
    // Pack a block of 4 by 4 16-bit pixels (32 bytes) into
    // either 14 or 3 bytes.
    //

    //
    // Integers s[0] ... s[15] represent floating-point numbers
    // in what is essentially a sign-magnitude format.  Convert
    // s[0] .. s[15] into a new set of integers, t[0] ... t[15],
    // such that if t[i] is greater than t[j], the floating-point
    // number that corresponds to s[i] is always greater than
    // the floating-point number that corresponds to s[j].
    //
    // Also, replace any bit patterns that represent NaNs or
    // infinities with bit patterns that represent floating-point
    // zeroes.
    //
    //	bit pattern	floating-point		bit pattern
    //	in s[i]		value			in t[i]
    //
    //  0x7fff		NAN			0x8000
    //  0x7ffe		NAN			0x8000
    //	  ...					  ...
    //  0x7c01		NAN			0x8000
    //  0x7c00		+infinity		0x8000
    //  0x7bff		+HALF_MAX		0xfbff
    //  0x7bfe					0xfbfe
    //  0x7bfd					0xfbfd
    //	  ...					  ...
    //  0x0002		+2 * HALF_MIN		0x8002
    //  0x0001		+HALF_MIN		0x8001
    //  0x0000		+0.0			0x8000
    //  0x8000		-0.0			0x7fff
    //  0x8001		-HALF_MIN		0x7ffe
    //  0x8002		-2 * HALF_MIN		0x7ffd
    //	  ...					  ...
    //  0xfbfd					0x0f02
    //  0xfbfe					0x0401
    //  0xfbff		-HALF_MAX		0x0400
    //  0xfc00		-infinity		0x8000
    //  0xfc01		NAN			0x8000
    //	  ...					  ...
    //  0xfffe		NAN			0x8000
    //  0xffff		NAN			0x8000
    //

    unsigned short t[16];

    for (int i = 0; i < 16; ++i)
    {
	if ((s[i] & 0x7c00) == 0x7c00)
	    t[i] = 0x8000;
	else if (s[i] & 0x8000)
	    t[i] = ~s[i];
	else
	    t[i] = s[i] | 0x8000;
    }

    //
    // Find the maximum, tMax, of t[0] ... t[15].
    //

    unsigned short tMax = 0;

    for (int i = 0; i < 16; ++i)
	if (tMax < t[i])
	    tMax = t[i];

    //
    // Compute a set of running differences, r[0] ... r[14]:
    // Find a shift value such that after rounding off the
    // rightmost bits and shifting all differenes are between
    // -32 and +31.  Then bias the differences so that they
    // end up between 0 and 63.
    //

    int shift = -1;
    int d[16];
    int r[15];
    int rMin;
    int rMax;

    const int bias = 0x20;

    do
    {
	shift += 1;

	//
	// Compute absolute differences, d[0] ... d[15],
	// between tMax and t[0] ... t[15].
	//
	// Shift and round the absolute differences.
	//

	for (int i = 0; i < 16; ++i)
	    d[i] = shiftAndRound (tMax - t[i], shift);

	//
	// Convert d[0] .. d[15] into running differences
	//

	r[ 0] = d[ 0] - d[ 4] + bias;
	r[ 1] = d[ 4] - d[ 8] + bias;
	r[ 2] = d[ 8] - d[12] + bias;

	r[ 3] = d[ 0] - d[ 1] + bias;
	r[ 4] = d[ 4] - d[ 5] + bias;
	r[ 5] = d[ 8] - d[ 9] + bias;
	r[ 6] = d[12] - d[13] + bias;

	r[ 7] = d[ 1] - d[ 2] + bias;
	r[ 8] = d[ 5] - d[ 6] + bias;
	r[ 9] = d[ 9] - d[10] + bias;
	r[10] = d[13] - d[14] + bias;

	r[11] = d[ 2] - d[ 3] + bias;
	r[12] = d[ 6] - d[ 7] + bias;
	r[13] = d[10] - d[11] + bias;
	r[14] = d[14] - d[15] + bias;

	rMin = r[0];
	rMax = r[0];

	for (int i = 1; i < 15; ++i)
	{
	    if (rMin > r[i])
		rMin = r[i];

	    if (rMax < r[i])
		rMax = r[i];
	}
    }
    while (rMin < 0 || rMax > 0x3f);

    if (rMin == bias && rMax == bias && optFlatFields)
    {
	//
	// Special case - all pixels have the same value.
	// We encode this in 3 instead of 14 bytes by
	// storing the value 0xfc in the third output byte,
	// which cannot occur in the 14-byte encoding.
	//

	b[0] = (t[0] >> 8);
	b[1] =  t[0];
	b[2] = 0xfc;

	return 3;
    }

    if (exactMax)
    {
	//
	// Adjust t[0] so that the pixel whose value is equal
	// to tMax gets represented as accurately as possible.
	//

	t[0] = tMax - (d[0] << shift);
    }

    //
    // Pack t[0], shift and r[0] ... r[14] into 14 bytes:
    //

    b[ 0] = (t[0] >> 8);
    b[ 1] =  t[0];

    b[ 2] = (unsigned char) ((shift << 2) | (r[ 0] >> 4));
    b[ 3] = (unsigned char) ((r[ 0] << 4) | (r[ 1] >> 2));
    b[ 4] = (unsigned char) ((r[ 1] << 6) |  r[ 2]      );

    b[ 5] = (unsigned char) ((r[ 3] << 2) | (r[ 4] >> 4));
    b[ 6] = (unsigned char) ((r[ 4] << 4) | (r[ 5] >> 2));
    b[ 7] = (unsigned char) ((r[ 5] << 6) |  r[ 6]      );

    b[ 8] = (unsigned char) ((r[ 7] << 2) | (r[ 8] >> 4));
    b[ 9] = (unsigned char) ((r[ 8] << 4) | (r[ 9] >> 2));
    b[10] = (unsigned char) ((r[ 9] << 6) |  r[10]      );

    b[11] = (unsigned char) ((r[11] << 2) | (r[12] >> 4));
    b[12] = (unsigned char) ((r[12] << 4) | (r[13] >> 2));
    b[13] = (unsigned char) ((r[13] << 6) |  r[14]      );

    return 14;
}


inline
void
unpack14 (const unsigned char b[14], unsigned short s[16])
{
    //
    // Unpack a 14-byte block into 4 by 4 16-bit pixels.
    //

    #if defined (DEBUG)
	assert (b[2] != 0xfc);
    #endif

    s[ 0] = (b[0] << 8) | b[1];

    unsigned short shift = (b[ 2] >> 2);
    unsigned short bias = (0x20 << shift);

    s[ 4] = s[ 0] + ((((b[ 2] << 4) | (b[ 3] >> 4)) & 0x3f) << shift) - bias;
    s[ 8] = s[ 4] + ((((b[ 3] << 2) | (b[ 4] >> 6)) & 0x3f) << shift) - bias;
    s[12] = s[ 8] +   ((b[ 4]                       & 0x3f) << shift) - bias;

    s[ 1] = s[ 0] +   ((b[ 5] >> 2)                         << shift) - bias;
    s[ 5] = s[ 4] + ((((b[ 5] << 4) | (b[ 6] >> 4)) & 0x3f) << shift) - bias;
    s[ 9] = s[ 8] + ((((b[ 6] << 2) | (b[ 7] >> 6)) & 0x3f) << shift) - bias;
    s[13] = s[12] +   ((b[ 7]                       & 0x3f) << shift) - bias;

    s[ 2] = s[ 1] +   ((b[ 8] >> 2)                         << shift) - bias;
    s[ 6] = s[ 5] + ((((b[ 8] << 4) | (b[ 9] >> 4)) & 0x3f) << shift) - bias;
    s[10] = s[ 9] + ((((b[ 9] << 2) | (b[10] >> 6)) & 0x3f) << shift) - bias;
    s[14] = s[13] +   ((b[10]                       & 0x3f) << shift) - bias;

    s[ 3] = s[ 2] +   ((b[11] >> 2)                         << shift) - bias;
    s[ 7] = s[ 6] + ((((b[11] << 4) | (b[12] >> 4)) & 0x3f) << shift) - bias;
    s[11] = s[10] + ((((b[12] << 2) | (b[13] >> 6)) & 0x3f) << shift) - bias;
    s[15] = s[14] +   ((b[13]                       & 0x3f) << shift) - bias;

    for (int i = 0; i < 16; ++i)
    {
	if (s[i] & 0x8000)
	    s[i] &= 0x7fff;
	else
	    s[i] = ~s[i];
    }
}


inline
void
unpack3 (const unsigned char b[3], unsigned short s[16])
{
    //
    // Unpack a 3-byte block into 4 by 4 identical 16-bit pixels.
    //

    #if defined (DEBUG)
	assert (b[2] == 0xfc);
    #endif

    s[0] = (b[0] << 8) | b[1];

    if (s[0] & 0x8000)
	s[0] &= 0x7fff;
    else
	s[0] = ~s[0];

    for (int i = 1; i < 16; ++i)
	s[i] = s[0];
}


void
notEnoughData ()
{
    throw Iex::InputExc ("Error decompressing data "
			 "(input data are shorter than expected).");
}


void
tooMuchData ()
{
    throw Iex::InputExc ("Error decompressing data "
			 "(input data are longer than expected).");
}

} // namespace


struct B44Compressor::ChannelData
{
    unsigned short *	start;
    unsigned short *	end;
    int			nx;
    int			ny;
    int			ys;
    PixelType		type;
    bool		pLinear;
    int			size;
};


B44Compressor::B44Compressor
    (const Header &hdr,
     int maxScanLineSize,
     int numScanLines,
     bool optFlatFields)
:
    Compressor (hdr),
    _maxScanLineSize (maxScanLineSize),
    _optFlatFields (optFlatFields),
    _format (XDR),
    _numScanLines (numScanLines),
    _tmpBuffer (0),
    _outBuffer (0),
    _numChans (0),
    _channels (hdr.channels()),
    _channelData (0)
{
    //
    // Allocate buffers for compressed an uncompressed pixel data,
    // allocate a set of ChannelData structs to help speed up the
    // compress() and uncompress() functions, below, and determine
    // if uncompressed pixel data should be in native or Xdr format.
    //

    _tmpBuffer = new unsigned short [maxScanLineSize * numScanLines];

    const ChannelList &channels = header().channels();
    int numHalfChans = 0;

    for (ChannelList::ConstIterator c = channels.begin();
	 c != channels.end();
	 ++c)
    {
	assert (pixelTypeSize (c.channel().type) % pixelTypeSize (HALF) == 0);
	++_numChans;

	if (c.channel().type == HALF)
	    ++numHalfChans;
    }

    //
    // Compressed data may be larger than the input data
    //

    int padding = 12 * numHalfChans * (numScanLines + 3) / 4;

    _outBuffer = new char [maxScanLineSize * numScanLines + padding];
    _channelData = new ChannelData[_numChans];

    int i = 0;

    for (ChannelList::ConstIterator c = channels.begin();
	 c != channels.end();
	 ++c, ++i)
    {
	_channelData[i].ys = c.channel().ySampling;
	_channelData[i].type = c.channel().type;
	_channelData[i].pLinear = c.channel().pLinear;
	_channelData[i].size =
	    pixelTypeSize (c.channel().type) / pixelTypeSize (HALF);
    }

    const Box2i &dataWindow = hdr.dataWindow();

    _minX = dataWindow.min.x;
    _maxX = dataWindow.max.x;
    _maxY = dataWindow.max.y;

    //
    // We can support uncompressed data in the machine's native
    // format only if all image channels are of type HALF.
    //

    assert (sizeof (unsigned short) == pixelTypeSize (HALF));

    if (_numChans == numHalfChans)
	_format = NATIVE;
}


B44Compressor::~B44Compressor ()
{
    delete [] _tmpBuffer;
    delete [] _outBuffer;
    delete [] _channelData;
}


int
B44Compressor::numScanLines () const
{
    return _numScanLines;
}


Compressor::Format
B44Compressor::format () const
{
    return _format;
}


int
B44Compressor::compress (const char *inPtr,
			 int inSize,
			 int minY,
			 const char *&outPtr)
{
    return compress (inPtr,
		     inSize,
		     Box2i (V2i (_minX, minY),
			    V2i (_maxX, minY + numScanLines() - 1)),
		     outPtr);
}


int
B44Compressor::compressTile (const char *inPtr,
			     int inSize,
			     Imath::Box2i range,
			     const char *&outPtr)
{
    return compress (inPtr, inSize, range, outPtr);
}


int
B44Compressor::uncompress (const char *inPtr,
			   int inSize,
			   int minY,
			   const char *&outPtr)
{
    return uncompress (inPtr,
		       inSize,
		       Box2i (V2i (_minX, minY),
			      V2i (_maxX, minY + numScanLines() - 1)),
		       outPtr);
}


int
B44Compressor::uncompressTile (const char *inPtr,
			       int inSize,
			       Imath::Box2i range,
			       const char *&outPtr)
{
    return uncompress (inPtr, inSize, range, outPtr);
}


int
B44Compressor::compress (const char *inPtr,
			 int inSize,
			 Imath::Box2i range,
			 const char *&outPtr)
{
    //
    // Compress a block of pixel data:  First copy the input pixels
    // from the input buffer into _tmpBuffer, rearranging them such
    // that blocks of 4x4 pixels of a single channel can be accessed
    // conveniently.  Then compress each 4x4 block of HALF pixel data
    // and append the result to the output buffer.  Copy UINT and
    // FLOAT data to the output buffer without compressing them.
    //

    outPtr = _outBuffer;

    if (inSize == 0)
    {
	//
	// Special case - empty input buffer.
	//

	return 0;
    }

    //
    // For each channel, detemine how many pixels are stored
    // in the input buffer, and where those pixels will be
    // placed in _tmpBuffer.
    //

    int minX = range.min.x;
    int maxX = min (range.max.x, _maxX);
    int minY = range.min.y;
    int maxY = min (range.max.y, _maxY);

    unsigned short *tmpBufferEnd = _tmpBuffer;
    int i = 0;

    for (ChannelList::ConstIterator c = _channels.begin();
	 c != _channels.end();
	 ++c, ++i)
    {
	ChannelData &cd = _channelData[i];

	cd.start = tmpBufferEnd;
	cd.end = cd.start;

	cd.nx = numSamples (c.channel().xSampling, minX, maxX);
	cd.ny = numSamples (c.channel().ySampling, minY, maxY);

	tmpBufferEnd += cd.nx * cd.ny * cd.size;
    }

    if (_format == XDR)
    {
	//
	// The data in the input buffer are in the machine-independent
	// Xdr format.  Copy the HALF channels into _tmpBuffer and
	// convert them back into native format for compression.
	// Copy UINT and FLOAT channels verbatim into _tmpBuffer.
	//

	for (int y = minY; y <= maxY; ++y)
	{
	    for (int i = 0; i < _numChans; ++i)
	    {
		ChannelData &cd = _channelData[i];

		if (modp (y, cd.ys) != 0)
		    continue;

		if (cd.type == HALF)
		{
		    for (int x = cd.nx; x > 0; --x)
		    {
			Xdr::read <CharPtrIO> (inPtr, *cd.end);
			++cd.end;
		    }
		}
		else
		{
		    int n = cd.nx * cd.size;
		    memcpy (cd.end, inPtr, n * sizeof (unsigned short));
		    inPtr += n * sizeof (unsigned short);
		    cd.end += n;
		}
	    }
	}
    }
    else
    {
	//
	// The input buffer contains only HALF channels, and they
	// are in native, machine-dependent format.  Copy the pixels
	// into _tmpBuffer.
	//

	for (int y = minY; y <= maxY; ++y)
	{
	    for (int i = 0; i < _numChans; ++i)
	    {
		ChannelData &cd = _channelData[i];

		#if defined (DEBUG)
		    assert (cd.type == HALF);
		#endif

		if (modp (y, cd.ys) != 0)
		    continue;

		int n = cd.nx * cd.size;
		memcpy (cd.end, inPtr, n * sizeof (unsigned short));
		inPtr  += n * sizeof (unsigned short);
		cd.end += n;
	    }
	}
    }

    //
    // The pixels for each channel have been packed into a contiguous
    // block in _tmpBuffer.  HALF channels are in native format; UINT
    // and FLOAT channels are in Xdr format.
    //

    #if defined (DEBUG)

	for (int i = 1; i < _numChans; ++i)
	    assert (_channelData[i-1].end == _channelData[i].start);

	assert (_channelData[_numChans-1].end == tmpBufferEnd);

    #endif

    //
    // For each HALF channel, split the data in _tmpBuffer into 4x4
    // pixel blocks.  Compress each block and append the compressed
    // data to the output buffer.
    //
    // UINT and FLOAT channels are copied from _tmpBuffer into the
    // output buffer without further processing.
    //

    char *outEnd = _outBuffer;

    for (int i = 0; i < _numChans; ++i)
    {
	ChannelData &cd = _channelData[i];

	if (cd.type != HALF)
	{
	    //
	    // UINT or FLOAT channel.
	    //

	    int n = cd.nx * cd.ny * cd.size * sizeof (unsigned short);
	    memcpy (outEnd, cd.start, n);
	    outEnd += n;

	    continue;
	}

	//
	// HALF channel
	//

	for (int y = 0; y < cd.ny; y += 4)
	{
	    //
	    // Copy the next 4x4 pixel block into array s.
	    // If the width, cd.nx, or the height, cd.ny, of
	    // the pixel data in _tmpBuffer is not divisible
	    // by 4, then pad the data by repeating the
	    // rightmost column and the bottom row.
	    //

	    unsigned short *row0 = cd.start + y * cd.nx;
	    unsigned short *row1 = row0 + cd.nx;
	    unsigned short *row2 = row1 + cd.nx;
	    unsigned short *row3 = row2 + cd.nx;

	    if (y + 3 >= cd.ny)
	    {
		if (y + 1 >= cd.ny)
		    row1 = row0;

		if (y + 2 >= cd.ny)
		    row2 = row1;

		row3 = row2;
	    }

	    for (int x = 0; x < cd.nx; x += 4)
	    {
		unsigned short s[16];

		if (x + 3 >= cd.nx)
		{
		    int n = cd.nx - x;

		    for (int i = 0; i < 4; ++i)
		    {
			int j = min (i, n - 1);

			s[i +  0] = row0[j];
			s[i +  4] = row1[j];
			s[i +  8] = row2[j];
			s[i + 12] = row3[j];
		    }
		}
		else
		{
		    memcpy (&s[ 0], row0, 4 * sizeof (unsigned short));
		    memcpy (&s[ 4], row1, 4 * sizeof (unsigned short));
		    memcpy (&s[ 8], row2, 4 * sizeof (unsigned short));
		    memcpy (&s[12], row3, 4 * sizeof (unsigned short));
		}

		row0 += 4;
		row1 += 4;
		row2 += 4;
		row3 += 4;

		//
		// Compress the contents of array s and append the
		// results to the output buffer.
		//

		if (cd.pLinear)
		    convertFromLinear (s);

		outEnd += pack (s, (unsigned char *) outEnd,
				_optFlatFields, !cd.pLinear);
	    }
	}
    }

    return outEnd - _outBuffer;
}


int
B44Compressor::uncompress (const char *inPtr,
			   int inSize,
			   Imath::Box2i range,
			   const char *&outPtr)
{
    //
    // This function is the reverse of the compress() function,
    // above.  First all pixels are moved from the input buffer
    // into _tmpBuffer.  UINT and FLOAT channels are copied
    // verbatim; HALF channels are uncompressed in blocks of
    // 4x4 pixels.  Then the pixels in _tmpBuffer are copied
    // into the output buffer and rearranged such that the data
    // for for each scan line form a contiguous block.
    //

    outPtr = _outBuffer;

    if (inSize == 0)
    {
	return 0;
    }

    int minX = range.min.x;
    int maxX = min (range.max.x, _maxX);
    int minY = range.min.y;
    int maxY = min (range.max.y, _maxY);

    unsigned short *tmpBufferEnd = _tmpBuffer;
    int i = 0;

    for (ChannelList::ConstIterator c = _channels.begin();
	 c != _channels.end();
	 ++c, ++i)
    {
	ChannelData &cd = _channelData[i];

	cd.start = tmpBufferEnd;
	cd.end = cd.start;

	cd.nx = numSamples (c.channel().xSampling, minX, maxX);
	cd.ny = numSamples (c.channel().ySampling, minY, maxY);

	tmpBufferEnd += cd.nx * cd.ny * cd.size;
    }

    for (int i = 0; i < _numChans; ++i)
    {
	ChannelData &cd = _channelData[i];

	if (cd.type != HALF)
	{
	    //
	    // UINT or FLOAT channel.
	    //

	    int n = cd.nx * cd.ny * cd.size * sizeof (unsigned short);

	    if (inSize < n)
		notEnoughData();

	    memcpy (cd.start, inPtr, n);
	    inPtr += n;
	    inSize -= n;

	    continue;
	}

	//
	// HALF channel
	//

	for (int y = 0; y < cd.ny; y += 4)
	{
	    unsigned short *row0 = cd.start + y * cd.nx;
	    unsigned short *row1 = row0 + cd.nx;
	    unsigned short *row2 = row1 + cd.nx;
	    unsigned short *row3 = row2 + cd.nx;

	    for (int x = 0; x < cd.nx; x += 4)
	    {
		unsigned short s[16];

		if (inSize < 3)
		    notEnoughData();

		if (((const unsigned char *)inPtr)[2] == 0xfc)
		{
		    unpack3 ((const unsigned char *)inPtr, s);
		    inPtr += 3;
		    inSize -= 3;
		}
		else
		{
		    if (inSize < 14)
			notEnoughData();

		    unpack14 ((const unsigned char *)inPtr, s);
		    inPtr += 14;
		    inSize -= 14;
		}

		if (cd.pLinear)
		    convertToLinear (s);

		int n = (x + 3 < cd.nx)?
			    4 * sizeof (unsigned short) :
			    (cd.nx - x) * sizeof (unsigned short);

		if (y + 3 < cd.ny)
		{
		    memcpy (row0, &s[ 0], n);
		    memcpy (row1, &s[ 4], n);
		    memcpy (row2, &s[ 8], n);
		    memcpy (row3, &s[12], n);
		}
		else
		{
		    memcpy (row0, &s[ 0], n);

		    if (y + 1 < cd.ny)
			memcpy (row1, &s[ 4], n);

		    if (y + 2 < cd.ny)
			memcpy (row2, &s[ 8], n);
		}

		row0 += 4;
		row1 += 4;
		row2 += 4;
		row3 += 4;
	    }
	}
    }

    char *outEnd = _outBuffer;

    if (_format == XDR)
    {
	for (int y = minY; y <= maxY; ++y)
	{
	    for (int i = 0; i < _numChans; ++i)
	    {
		ChannelData &cd = _channelData[i];

		if (modp (y, cd.ys) != 0)
		    continue;

		if (cd.type == HALF)
		{
		    for (int x = cd.nx; x > 0; --x)
		    {
			Xdr::write <CharPtrIO> (outEnd, *cd.end);
			++cd.end;
		    }
		}
		else
		{
		    int n = cd.nx * cd.size;
		    memcpy (outEnd, cd.end, n * sizeof (unsigned short));
		    outEnd += n * sizeof (unsigned short);
		    cd.end += n;
		}
	    }
	}
    }
    else
    {
	for (int y = minY; y <= maxY; ++y)
	{
	    for (int i = 0; i < _numChans; ++i)
	    {
		ChannelData &cd = _channelData[i];

		#if defined (DEBUG)
		    assert (cd.type == HALF);
		#endif

		if (modp (y, cd.ys) != 0)
		    continue;

		int n = cd.nx * cd.size;
		memcpy (outEnd, cd.end, n * sizeof (unsigned short));
		outEnd += n * sizeof (unsigned short);
		cd.end += n;
	    }
	}
    }

    #if defined (DEBUG)

	for (int i = 1; i < _numChans; ++i)
	    assert (_channelData[i-1].end == _channelData[i].start);

	assert (_channelData[_numChans-1].end == tmpBufferEnd);

    #endif

    if (inSize > 0)
	tooMuchData();

    outPtr = _outBuffer;
    return outEnd - _outBuffer;
}


} // namespace Imf