// Written in the D programming language. /** * Compress/decompress data using the $(HTTP www._zlib.net, _zlib library). * * Examples: * * If you have a small buffer you can use $(LREF compress) and * $(LREF uncompress) directly. * * ------- * import std.zlib; * * auto src = * "the quick brown fox jumps over the lazy dog\r * the quick brown fox jumps over the lazy dog\r"; * * ubyte[] dst; * ubyte[] result; * * dst = compress(src); * result = cast(ubyte[]) uncompress(dst); * assert(result == src); * ------- * * When the data to be compressed doesn't fit in one buffer, use * $(LREF Compress) and $(LREF UnCompress). * * ------- * import std.zlib; * import std.stdio; * import std.conv : to; * import std.algorithm.iteration : map; * * UnCompress decmp = new UnCompress; * foreach (chunk; stdin.byChunk(4096).map!(x => decmp.uncompress(x))) * { * chunk.to!string.write; * } * ------- * * References: * $(HTTP en.wikipedia.org/wiki/Zlib, Wikipedia) * * Copyright: Copyright Digital Mars 2000 - 2011. * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). * Authors: $(HTTP digitalmars.com, Walter Bright) * Source: $(PHOBOSSRC std/_zlib.d) */ /* Copyright Digital Mars 2000 - 2011. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ module std.zlib; //debug=zlib; // uncomment to turn on debugging printf's import etc.c.zlib; // Values for 'mode' enum { Z_NO_FLUSH = 0, Z_SYNC_FLUSH = 2, Z_FULL_FLUSH = 3, Z_FINISH = 4, } /************************************* * Errors throw a ZlibException. */ class ZlibException : Exception { this(int errnum) { string msg; switch (errnum) { case Z_STREAM_END: msg = "stream end"; break; case Z_NEED_DICT: msg = "need dict"; break; case Z_ERRNO: msg = "errno"; break; case Z_STREAM_ERROR: msg = "stream error"; break; case Z_DATA_ERROR: msg = "data error"; break; case Z_MEM_ERROR: msg = "mem error"; break; case Z_BUF_ERROR: msg = "buf error"; break; case Z_VERSION_ERROR: msg = "version error"; break; default: msg = "unknown error"; break; } super(msg); } } /** * $(P Compute the Adler-32 checksum of a buffer's worth of data.) * * Params: * adler = the starting checksum for the computation. Use 1 * for a new checksum. Use the output of this function * for a cumulative checksum. * buf = buffer containing input data * * Returns: * A $(D uint) checksum for the provided input data and starting checksum * * See_Also: * $(LINK http://en.wikipedia.org/wiki/Adler-32) */ uint adler32(uint adler, const(void)[] buf) { import std.range : chunks; foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000)) { adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint) chunk.length); } return adler; } /// @system unittest { static ubyte[] data = [1,2,3,4,5,6,7,8,9,10]; uint adler = adler32(0u, data); assert(adler == 0xdc0037); } @system unittest { static string data = "test"; uint adler = adler32(1, data); assert(adler == 0x045d01c1); } /** * $(P Compute the CRC32 checksum of a buffer's worth of data.) * * Params: * crc = the starting checksum for the computation. Use 0 * for a new checksum. Use the output of this function * for a cumulative checksum. * buf = buffer containing input data * * Returns: * A $(D uint) checksum for the provided input data and starting checksum * * See_Also: * $(LINK http://en.wikipedia.org/wiki/Cyclic_redundancy_check) */ uint crc32(uint crc, const(void)[] buf) { import std.range : chunks; foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000)) { crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint) chunk.length); } return crc; } @system unittest { static ubyte[] data = [1,2,3,4,5,6,7,8,9,10]; uint crc; debug(zlib) printf("D.zlib.crc32.unittest\n"); crc = crc32(0u, cast(void[]) data); debug(zlib) printf("crc = %x\n", crc); assert(crc == 0x2520577b); } /** * $(P Compress data) * * Params: * srcbuf = buffer containing the data to compress * level = compression level. Legal values are -1 .. 9, with -1 indicating * the default level (6), 0 indicating no compression, 1 being the * least compression and 9 being the most. * * Returns: * the compressed data */ ubyte[] compress(const(void)[] srcbuf, int level) in { assert(-1 <= level && level <= 9); } body { import core.memory : GC; auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12; auto destbuf = new ubyte[destlen]; auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *) srcbuf.ptr, srcbuf.length, level); if (err) { GC.free(destbuf.ptr); throw new ZlibException(err); } destbuf.length = destlen; return destbuf; } /********************************************* * ditto */ ubyte[] compress(const(void)[] srcbuf) { return compress(srcbuf, Z_DEFAULT_COMPRESSION); } /********************************************* * Decompresses the data in srcbuf[]. * Params: * srcbuf = buffer containing the compressed data. * destlen = size of the uncompressed data. * It need not be accurate, but the decompression will be faster * if the exact size is supplied. * winbits = the base two logarithm of the maximum window size. * Returns: the decompressed data. */ void[] uncompress(const(void)[] srcbuf, size_t destlen = 0u, int winbits = 15) { import std.conv : to; int err; ubyte[] destbuf; if (!destlen) destlen = srcbuf.length * 2 + 1; etc.c.zlib.z_stream zs; zs.next_in = cast(typeof(zs.next_in)) srcbuf.ptr; zs.avail_in = to!uint(srcbuf.length); err = etc.c.zlib.inflateInit2(&zs, winbits); if (err) { throw new ZlibException(err); } size_t olddestlen = 0u; loop: while (true) { destbuf.length = destlen; zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen]; zs.avail_out = to!uint(destlen - olddestlen); olddestlen = destlen; err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH); switch (err) { case Z_OK: destlen = destbuf.length * 2; continue loop; case Z_STREAM_END: destbuf.length = zs.total_out; err = etc.c.zlib.inflateEnd(&zs); if (err != Z_OK) throw new ZlibException(err); return destbuf; default: etc.c.zlib.inflateEnd(&zs); throw new ZlibException(err); } } assert(0); } @system unittest { auto src = "the quick brown fox jumps over the lazy dog\r the quick brown fox jumps over the lazy dog\r "; ubyte[] dst; ubyte[] result; //arrayPrint(src); dst = compress(src); //arrayPrint(dst); result = cast(ubyte[]) uncompress(dst); //arrayPrint(result); assert(result == src); } @system unittest { ubyte[] src = new ubyte[1000000]; ubyte[] dst; ubyte[] result; src[] = 0x80; dst = compress(src); assert(dst.length*2 + 1 < src.length); result = cast(ubyte[]) uncompress(dst); assert(result == src); } /+ void arrayPrint(ubyte[] array) { //printf("array %p,%d\n", cast(void*) array, array.length); for (size_t i = 0; i < array.length; i++) { printf("%02x ", array[i]); if (((i + 1) & 15) == 0) printf("\n"); } printf("\n\n"); } +/ /// the header format the compressed stream is wrapped in enum HeaderFormat { deflate, /// a standard zlib header gzip, /// a gzip file format header determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data } /********************************************* * Used when the data to be compressed is not all in one buffer. */ class Compress { import std.conv : to; private: z_stream zs; int level = Z_DEFAULT_COMPRESSION; int inited; immutable bool gzip; void error(int err) { if (inited) { deflateEnd(&zs); inited = 0; } throw new ZlibException(err); } public: /** * Constructor. * * Params: * level = compression level. Legal values are 1 .. 9, with 1 being the least * compression and 9 being the most. The default value is 6. * header = sets the compression type to one of the options available * in $(LREF HeaderFormat). Defaults to HeaderFormat.deflate. * * See_Also: * $(LREF compress), $(LREF HeaderFormat) */ this(int level, HeaderFormat header = HeaderFormat.deflate) in { assert(1 <= level && level <= 9); } body { this.level = level; this.gzip = header == HeaderFormat.gzip; } /// ditto this(HeaderFormat header = HeaderFormat.deflate) { this.gzip = header == HeaderFormat.gzip; } ~this() { int err; if (inited) { inited = 0; deflateEnd(&zs); } } /** * Compress the data in buf and return the compressed data. * Params: * buf = data to compress * * Returns: * the compressed data. The buffers returned from successive calls to this should be concatenated together. * */ const(void)[] compress(const(void)[] buf) { import core.memory : GC; int err; ubyte[] destbuf; if (buf.length == 0) return null; if (!inited) { err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY); if (err) error(err); inited = 1; } destbuf = new ubyte[zs.avail_in + buf.length]; zs.next_out = destbuf.ptr; zs.avail_out = to!uint(destbuf.length); if (zs.avail_in) buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf; zs.next_in = cast(typeof(zs.next_in)) buf.ptr; zs.avail_in = to!uint(buf.length); err = deflate(&zs, Z_NO_FLUSH); if (err != Z_STREAM_END && err != Z_OK) { GC.free(destbuf.ptr); error(err); } destbuf.length = destbuf.length - zs.avail_out; return destbuf; } /*** * Compress and return any remaining data. * The returned data should be appended to that returned by compress(). * Params: * mode = one of the following: * $(DL $(DT Z_SYNC_FLUSH ) $(DD Syncs up flushing to the next byte boundary. Used when more data is to be compressed later on.) $(DT Z_FULL_FLUSH ) $(DD Syncs up flushing to the next byte boundary. Used when more data is to be compressed later on, and the decompressor needs to be restartable at this point.) $(DT Z_FINISH) $(DD (default) Used when finished compressing the data. ) ) */ void[] flush(int mode = Z_FINISH) in { assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH); } body { import core.memory : GC; ubyte[] destbuf; ubyte[512] tmpbuf = void; int err; if (!inited) return null; /* may be zs.avail_out+ * zs.avail_out is set nonzero by deflate in previous compress() */ //tmpbuf = new void[zs.avail_out]; zs.next_out = tmpbuf.ptr; zs.avail_out = tmpbuf.length; while ( (err = deflate(&zs, mode)) != Z_STREAM_END) { if (err == Z_OK) { if (zs.avail_out != 0 && mode != Z_FINISH) break; else if (zs.avail_out == 0) { destbuf ~= tmpbuf; zs.next_out = tmpbuf.ptr; zs.avail_out = tmpbuf.length; continue; } err = Z_BUF_ERROR; } GC.free(destbuf.ptr); error(err); } destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)]; if (mode == Z_FINISH) { err = deflateEnd(&zs); inited = 0; if (err) error(err); } return destbuf; } } /****** * Used when the data to be decompressed is not all in one buffer. */ class UnCompress { import std.conv : to; private: z_stream zs; int inited; int done; size_t destbufsize; HeaderFormat format; void error(int err) { if (inited) { inflateEnd(&zs); inited = 0; } throw new ZlibException(err); } public: /** * Construct. destbufsize is the same as for D.zlib.uncompress(). */ this(uint destbufsize) { this.destbufsize = destbufsize; } /** ditto */ this(HeaderFormat format = HeaderFormat.determineFromData) { this.format = format; } ~this() { int err; if (inited) { inited = 0; inflateEnd(&zs); } done = 1; } /** * Decompress the data in buf and return the decompressed data. * The buffers returned from successive calls to this should be concatenated * together. */ const(void)[] uncompress(const(void)[] buf) in { assert(!done); } body { import core.memory : GC; int err; ubyte[] destbuf; if (buf.length == 0) return null; if (!inited) { int windowBits = 15; if (format == HeaderFormat.gzip) windowBits += 16; else if (format == HeaderFormat.determineFromData) windowBits += 32; err = inflateInit2(&zs, windowBits); if (err) error(err); inited = 1; } if (!destbufsize) destbufsize = to!uint(buf.length) * 2; destbuf = new ubyte[zs.avail_in * 2 + destbufsize]; zs.next_out = destbuf.ptr; zs.avail_out = to!uint(destbuf.length); if (zs.avail_in) buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf; zs.next_in = cast(ubyte*) buf.ptr; zs.avail_in = to!uint(buf.length); err = inflate(&zs, Z_NO_FLUSH); if (err != Z_STREAM_END && err != Z_OK) { GC.free(destbuf.ptr); error(err); } destbuf.length = destbuf.length - zs.avail_out; return destbuf; } /** * Decompress and return any remaining data. * The returned data should be appended to that returned by uncompress(). * The UnCompress object cannot be used further. */ void[] flush() in { assert(!done); } out { assert(done); } body { import core.memory : GC; ubyte[] extra; ubyte[] destbuf; int err; done = 1; if (!inited) return null; L1: destbuf = new ubyte[zs.avail_in * 2 + 100]; zs.next_out = destbuf.ptr; zs.avail_out = to!uint(destbuf.length); err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH); if (err == Z_OK && zs.avail_out == 0) { extra ~= destbuf; goto L1; } if (err != Z_STREAM_END) { GC.free(destbuf.ptr); if (err == Z_OK) err = Z_BUF_ERROR; error(err); } destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr]; err = etc.c.zlib.inflateEnd(&zs); inited = 0; if (err) error(err); if (extra.length) destbuf = extra ~ destbuf; return destbuf; } } /* ========================== unittest ========================= */ import std.random; import std.stdio; @system unittest // by Dave { debug(zlib) writeln("std.zlib.unittest"); bool CompressThenUncompress (void[] src) { ubyte[] dst = std.zlib.compress(src); double ratio = (dst.length / cast(double) src.length); debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio); ubyte[] uncompressedBuf; uncompressedBuf = cast(ubyte[]) std.zlib.uncompress(dst); assert(src.length == uncompressedBuf.length); assert(src == uncompressedBuf); return true; } // smallish buffers for (int idx = 0; idx < 25; idx++) { char[] buf = new char[uniform(0, 100)]; // Alternate between more & less compressible foreach (ref char c; buf) c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2))); if (CompressThenUncompress(buf)) { debug(zlib) writeln("; Success."); } else { return; } } // larger buffers for (int idx = 0; idx < 25; idx++) { char[] buf = new char[uniform(0, 1000/*0000*/)]; // Alternate between more & less compressible foreach (ref char c; buf) c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10))); if (CompressThenUncompress(buf)) { debug(zlib) writefln("; Success."); } else { return; } } debug(zlib) writefln("PASSED std.zlib.unittest"); } @system unittest // by Artem Rebrov { Compress cmp = new Compress; UnCompress decmp = new UnCompress; const(void)[] input; input = "tesatdffadf"; const(void)[] buf = cmp.compress(input); buf ~= cmp.flush(); const(void)[] output = decmp.uncompress(buf); //writefln("input = '%s'", cast(char[]) input); //writefln("output = '%s'", cast(char[]) output); assert( output[] == input[] ); } @system unittest { static assert(__traits(compiles, etc.c.zlib.gzclose(null))); // bugzilla 15457 }