compressedStream.cpp revision 6412:53a41e7cbe05
139215Sgibbs/* 2107178Snjl * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 339215Sgibbs * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4107178Snjl * 539215Sgibbs * This code is free software; you can redistribute it and/or modify it 639215Sgibbs * under the terms of the GNU General Public License version 2 only, as 739215Sgibbs * published by the Free Software Foundation. 839215Sgibbs * 939215Sgibbs * This code is distributed in the hope that it will be useful, but WITHOUT 1039215Sgibbs * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1139215Sgibbs * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1239215Sgibbs * version 2 for more details (a copy is included in the LICENSE file that 1339215Sgibbs * accompanied this code). 1439215Sgibbs * 1539215Sgibbs * You should have received a copy of the GNU General Public License version 1639215Sgibbs * 2 along with this work; if not, write to the Free Software Foundation, 1739215Sgibbs * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1839215Sgibbs * 1939215Sgibbs * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2039215Sgibbs * or visit www.oracle.com if you need additional information or have any 2139215Sgibbs * questions. 2239215Sgibbs * 2339215Sgibbs */ 2439215Sgibbs 2539215Sgibbs#include "precompiled.hpp" 2639215Sgibbs#include "code/compressedStream.hpp" 2739215Sgibbs#include "utilities/ostream.hpp" 2850476Speter 2939215Sgibbs// 32-bit one-to-one sign encoding taken from Pack200 3039215Sgibbs// converts leading sign bits into leading zeroes with trailing sign bit 3139215Sgibbsinline juint CompressedStream::encode_sign(jint value) { 32121184Ssimokawa return (value << 1) ^ (value >> 31); 3344498Sgibbs} 34107178Snjlinline jint CompressedStream::decode_sign(juint value) { 3539215Sgibbs return (value >> 1) ^ -(jint)(value & 1); 3644498Sgibbs} 3739215Sgibbs 3839215Sgibbs// 32-bit self-inverse encoding of float bits 3939215Sgibbs// converts trailing zeroes (common in floats) to leading zeroes 40107178Snjlinline juint CompressedStream::reverse_int(juint i) { 4139215Sgibbs // Hacker's Delight, Figure 7-1 4239215Sgibbs i = (i & 0x55555555) << 1 | (i >> 1) & 0x55555555; 43107178Snjl i = (i & 0x33333333) << 2 | (i >> 2) & 0x33333333; 44109161Snjl i = (i & 0x0f0f0f0f) << 4 | (i >> 4) & 0x0f0f0f0f; 45107178Snjl i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24); 46107178Snjl return i; 47107178Snjl} 48107178Snjl 49120428Ssimokawa 50107178Snjljint CompressedReadStream::read_signed_int() { 5139215Sgibbs return decode_sign(read_int()); 52107178Snjl} 5339215Sgibbs 54107178Snjl// Compressing floats is simple, because the only common pattern 5539215Sgibbs// is trailing zeroes. (Compare leading sign bits on ints.) 56107178Snjl// Since floats are left-justified, as opposed to right-justified 57107178Snjl// ints, we can bit-reverse them in order to take advantage of int 58107178Snjl// compression. 59162704Smjacob 60107178Snjljfloat CompressedReadStream::read_float() { 61107178Snjl int rf = read_int(); 62107178Snjl int f = reverse_int(rf); 63107178Snjl return jfloat_cast(f); 64107178Snjl} 65162704Smjacob 66121184Ssimokawajdouble CompressedReadStream::read_double() { 67121184Ssimokawa jint rh = read_int(); 68107178Snjl jint rl = read_int(); 69107178Snjl jint h = reverse_int(rh); 70107178Snjl jint l = reverse_int(rl); 71107178Snjl return jdouble_cast(jlong_from(h, l)); 72107178Snjl} 73107178Snjl 74107178Snjljlong CompressedReadStream::read_long() { 75107178Snjl jint low = read_signed_int(); 76107178Snjl jint high = read_signed_int(); 77107178Snjl return jlong_from(high, low); 7844498Sgibbs} 7944498Sgibbs 8044498SgibbsCompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStream(NULL, 0) { 8144498Sgibbs _buffer = NEW_RESOURCE_ARRAY(u_char, initial_size); 8239215Sgibbs _size = initial_size; 83107178Snjl _position = 0; 84107178Snjl} 85107178Snjl 86107178Snjlvoid CompressedWriteStream::grow() { 87107178Snjl u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2); 88107178Snjl memcpy(_new_buffer, _buffer, _position); 89107178Snjl _buffer = _new_buffer; 90162704Smjacob _size = _size * 2; 91237601Sken} 92107178Snjl 93107178Snjlvoid CompressedWriteStream::write_signed_int(jint value) { 94107178Snjl // this encoding, called SIGNED5, is taken from Pack200 95107178Snjl write_int(encode_sign(value)); 96107178Snjl} 97107178Snjl 98107178Snjlvoid CompressedWriteStream::write_float(jfloat value) { 9939215Sgibbs juint f = jint_cast(value); 10039215Sgibbs juint rf = reverse_int(f); 10139215Sgibbs assert(f == reverse_int(rf), "can re-read same bits"); 10239215Sgibbs write_int(rf); 103228481Sed} 104228481Sed 105107178Snjlvoid CompressedWriteStream::write_double(jdouble value) { 106107178Snjl juint h = high(jlong_cast(value)); 10739215Sgibbs juint l = low( jlong_cast(value)); 108107178Snjl juint rh = reverse_int(h); 109107178Snjl juint rl = reverse_int(l); 110107178Snjl assert(h == reverse_int(rh), "can re-read same bits"); 111107178Snjl assert(l == reverse_int(rl), "can re-read same bits"); 112107178Snjl write_int(rh); 113107178Snjl write_int(rl); 114107178Snjl} 115107178Snjl 116107178Snjlvoid CompressedWriteStream::write_long(jlong value) { 117107178Snjl write_signed_int(low(value)); 118107178Snjl write_signed_int(high(value)); 119107178Snjl} 120107178Snjl 121162704Smjacob 12239215Sgibbs/// The remaining details 123107178Snjl 124107178Snjl#ifndef PRODUCT 12539215Sgibbs// set this to trigger unit test 126107178Snjlvoid test_compressed_stream(int trace); 127107178Snjlbool test_compressed_stream_enabled = false; 12839215Sgibbs#endif 129107178Snjl 130107178Snjl// This encoding, called UNSIGNED5, is taken from J2SE Pack200. 13144498Sgibbs// It assumes that most values have lots of leading zeroes. 132107178Snjl// Very small values, in the range [0..191], code in one byte. 133107178Snjl// Any 32-bit value (including negatives) can be coded, in 13444498Sgibbs// up to five bytes. The grammar is: 135107178Snjl// low_byte = [0..191] 136107178Snjl// high_byte = [192..255] 137107178Snjl// any_byte = low_byte | high_byte 138107178Snjl// coding = low_byte 13944498Sgibbs// | high_byte low_byte 140107178Snjl// | high_byte high_byte low_byte 141107178Snjl// | high_byte high_byte high_byte low_byte 142107178Snjl// | high_byte high_byte high_byte high_byte any_byte 143107178Snjl// Each high_byte contributes six bits of payload. 14463185Smjacob// The encoding is one-to-one (except for integer overflow) 145107178Snjl// and easy to parse and unparse. 146121184Ssimokawa 147121184Ssimokawajint CompressedReadStream::read_int_mb(jint b0) { 148121184Ssimokawa int pos = position() - 1; 149121184Ssimokawa u_char* buf = buffer() + pos; 150121184Ssimokawa assert(buf[0] == b0 && b0 >= L, "correctly called"); 151121184Ssimokawa jint sum = b0; 152121184Ssimokawa // must collect more bytes: b[1]...b[4] 153121184Ssimokawa int lg_H_i = lg_H; 154121184Ssimokawa for (int i = 0; ; ) { 155121184Ssimokawa jint b_i = buf[++i]; // b_i = read(); ++i; 156121184Ssimokawa sum += b_i << lg_H_i; // sum += b[i]*(64**i) 157121184Ssimokawa if (b_i < L || i == MAX_i) { 158121184Ssimokawa set_position(pos+i+1); 159121184Ssimokawa return sum; 160121184Ssimokawa } 161121184Ssimokawa lg_H_i += lg_H; 162121184Ssimokawa } 163121184Ssimokawa} 164121184Ssimokawa 165121184Ssimokawavoid CompressedWriteStream::write_int_mb(jint value) { 166121184Ssimokawa debug_only(int pos1 = position()); 167121184Ssimokawa juint sum = value; 168121184Ssimokawa for (int i = 0; ; ) { 169121184Ssimokawa if (sum < L || i == MAX_i) { 170121184Ssimokawa // remainder is either a "low code" or the 5th byte 171121184Ssimokawa assert(sum == (u_char)sum, "valid byte"); 172121184Ssimokawa write((u_char)sum); 173107178Snjl break; 174121184Ssimokawa } 175107178Snjl sum -= L; 176107178Snjl int b_i = L + (sum % H); // this is a "high code" 177107178Snjl sum >>= lg_H; // extracted 6 bits 178121184Ssimokawa write(b_i); ++i; 179107178Snjl } 180107178Snjl 181107178Snjl#ifndef PRODUCT 182107178Snjl if (test_compressed_stream_enabled) { // hack to enable this stress test 183107178Snjl test_compressed_stream_enabled = false; 184107178Snjl test_compressed_stream(0); 185107178Snjl } 186107178Snjl#endif 187107178Snjl} 188107178Snjl 189107178Snjl 190107178Snjl#ifndef PRODUCT 191107178Snjl/// a unit test (can be run by hand from a debugger) 192107178Snjl 193107178Snjl// Avoid a VS2005 compiler stack overflow w/ fastdebug build. 194107178Snjl// The following pragma optimize turns off optimization ONLY 195107178Snjl// for this block (a matching directive turns it back on later). 196107178Snjl// These directives can be removed once the MS VS.NET 2005 197162704Smjacob// compiler stack overflow is fixed. 198162704Smjacob#if defined(_MSC_VER) && _MSC_VER >=1400 && !defined(_WIN64) 199162704Smjacob#pragma optimize("", off) 20039215Sgibbs#pragma warning(disable: 4748) 20139215Sgibbs#endif 20239215Sgibbs 20339215Sgibbs// generator for an "interesting" set of critical values 20439215Sgibbsenum { stretch_limit = (1<<16) * (64-16+1) }; 20539215Sgibbsstatic jlong stretch(jint x, int bits) { 20639215Sgibbs // put x[high 4] into place 207107178Snjl jlong h = (jlong)((x >> (16-4))) << (bits - 4); 208107178Snjl // put x[low 12] into place, sign extended 20939215Sgibbs jlong l = ((jlong)x << (64-12)) >> (64-12); 21039215Sgibbs // move l upwards, maybe 211107178Snjl l <<= (x >> 16); 212107178Snjl return h ^ l; 213107178Snjl} 214107178Snjl 215107178SnjlPRAGMA_DIAG_PUSH 216107178SnjlPRAGMA_FORMAT_IGNORED // Someone needs to deal with this. 217107178Snjlvoid test_compressed_stream(int trace) { 218107178Snjl CompressedWriteStream bytes(stretch_limit * 100); 21944498Sgibbs jint n; 22044498Sgibbs int step = 0, fails = 0; 22144498Sgibbs#define CHECKXY(x, y, fmt) { \ 222107178Snjl ++step; \ 223107178Snjl int xlen = (pos = decode.position()) - lastpos; lastpos = pos; \ 224107178Snjl if (trace > 0 && (step % trace) == 0) { \ 22544498Sgibbs tty->print_cr("step %d, n=%08x: value=" fmt " (len=%d)", \ 226107178Snjl step, n, x, xlen); } \ 227107178Snjl if (x != y) { \ 228107178Snjl tty->print_cr("step %d, n=%d: " fmt " != " fmt, step, n, x, y); \ 229196955Ssbruno fails++; \ 23044498Sgibbs } } 231107178Snjl for (n = 0; n < (1<<8); n++) { 232107178Snjl jbyte x = (jbyte)n; 233107178Snjl bytes.write_byte(x); ++step; 234107178Snjl } 235107178Snjl for (n = 0; n < stretch_limit; n++) { 236107178Snjl jint x = (jint)stretch(n, 32); 237120428Ssimokawa bytes.write_int(x); ++step; 238120428Ssimokawa bytes.write_signed_int(x); ++step; 239120428Ssimokawa bytes.write_float(jfloat_cast(x)); ++step; 240120428Ssimokawa } 241120428Ssimokawa for (n = 0; n < stretch_limit; n++) { 242120428Ssimokawa jlong x = stretch(n, 64); 243120428Ssimokawa bytes.write_long(x); ++step; 244120428Ssimokawa bytes.write_double(jdouble_cast(x)); ++step; 245120428Ssimokawa } 246120428Ssimokawa int length = bytes.position(); 247120428Ssimokawa if (trace != 0) 248120428Ssimokawa tty->print_cr("set up test of %d stream values, size %d", step, length); 249107178Snjl step = 0; 250107178Snjl // now decode it all 25144498Sgibbs CompressedReadStream decode(bytes.buffer()); 252121184Ssimokawa int pos, lastpos = decode.position(); 253162704Smjacob for (n = 0; n < (1<<8); n++) { 254121184Ssimokawa jbyte x = (jbyte)n; 255121184Ssimokawa jbyte y = decode.read_byte(); 256107178Snjl CHECKXY(x, y, "%db"); 257107178Snjl } 25844498Sgibbs for (n = 0; n < stretch_limit; n++) { 259162704Smjacob jint x = (jint)stretch(n, 32); 260109161Snjl jint y1 = decode.read_int(); 261109161Snjl CHECKXY(x, y1, "%du"); 262162704Smjacob jint y2 = decode.read_signed_int(); 263109161Snjl CHECKXY(x, y2, "%di"); 264109161Snjl jint y3 = jint_cast(decode.read_float()); 265109161Snjl CHECKXY(x, y3, "%df"); 266109161Snjl } 267109161Snjl for (n = 0; n < stretch_limit; n++) { 268109161Snjl jlong x = stretch(n, 64); 269109161Snjl jlong y1 = decode.read_long(); 270109161Snjl CHECKXY(x, y1, INT64_FORMAT "l"); 271109161Snjl jlong y2 = jlong_cast(decode.read_double()); 272162704Smjacob CHECKXY(x, y2, INT64_FORMAT "d"); 273162704Smjacob } 274162704Smjacob int length2 = decode.position(); 275162704Smjacob if (trace != 0) 276162704Smjacob tty->print_cr("finished test of %d stream values, size %d", step, length2); 277162704Smjacob guarantee(length == length2, "bad length"); 278162704Smjacob guarantee(fails == 0, "test failures"); 279162704Smjacob} 280109161SnjlPRAGMA_DIAG_POP 281109161Snjl 282162704Smjacob#if defined(_MSC_VER) &&_MSC_VER >=1400 && !defined(_WIN64) 283109161Snjl#pragma warning(default: 4748) 284109161Snjl#pragma optimize("", on) 285109161Snjl#endif 286228481Sed 287107178Snjl#endif // PRODUCT 288228481Sed