compressedStream.cpp revision 6412:53a41e7cbe05
139215Sgibbs/*
2107178Snjl * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
339215Sgibbs * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4107178Snjl *
539215Sgibbs * This code is free software; you can redistribute it and/or modify it
639215Sgibbs * under the terms of the GNU General Public License version 2 only, as
739215Sgibbs * published by the Free Software Foundation.
839215Sgibbs *
939215Sgibbs * This code is distributed in the hope that it will be useful, but WITHOUT
1039215Sgibbs * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1139215Sgibbs * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1239215Sgibbs * version 2 for more details (a copy is included in the LICENSE file that
1339215Sgibbs * accompanied this code).
1439215Sgibbs *
1539215Sgibbs * You should have received a copy of the GNU General Public License version
1639215Sgibbs * 2 along with this work; if not, write to the Free Software Foundation,
1739215Sgibbs * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1839215Sgibbs *
1939215Sgibbs * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2039215Sgibbs * or visit www.oracle.com if you need additional information or have any
2139215Sgibbs * questions.
2239215Sgibbs *
2339215Sgibbs */
2439215Sgibbs
2539215Sgibbs#include "precompiled.hpp"
2639215Sgibbs#include "code/compressedStream.hpp"
2739215Sgibbs#include "utilities/ostream.hpp"
2850476Speter
2939215Sgibbs// 32-bit one-to-one sign encoding taken from Pack200
3039215Sgibbs// converts leading sign bits into leading zeroes with trailing sign bit
3139215Sgibbsinline juint CompressedStream::encode_sign(jint  value) {
32121184Ssimokawa  return (value << 1) ^ (value >> 31);
3344498Sgibbs}
34107178Snjlinline jint  CompressedStream::decode_sign(juint value) {
3539215Sgibbs  return (value >> 1) ^ -(jint)(value & 1);
3644498Sgibbs}
3739215Sgibbs
3839215Sgibbs// 32-bit self-inverse encoding of float bits
3939215Sgibbs// converts trailing zeroes (common in floats) to leading zeroes
40107178Snjlinline juint CompressedStream::reverse_int(juint i) {
4139215Sgibbs  // Hacker's Delight, Figure 7-1
4239215Sgibbs  i = (i & 0x55555555) << 1 | (i >> 1) & 0x55555555;
43107178Snjl  i = (i & 0x33333333) << 2 | (i >> 2) & 0x33333333;
44109161Snjl  i = (i & 0x0f0f0f0f) << 4 | (i >> 4) & 0x0f0f0f0f;
45107178Snjl  i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24);
46107178Snjl  return i;
47107178Snjl}
48107178Snjl
49120428Ssimokawa
50107178Snjljint CompressedReadStream::read_signed_int() {
5139215Sgibbs  return decode_sign(read_int());
52107178Snjl}
5339215Sgibbs
54107178Snjl// Compressing floats is simple, because the only common pattern
5539215Sgibbs// is trailing zeroes.  (Compare leading sign bits on ints.)
56107178Snjl// Since floats are left-justified, as opposed to right-justified
57107178Snjl// ints, we can bit-reverse them in order to take advantage of int
58107178Snjl// compression.
59162704Smjacob
60107178Snjljfloat CompressedReadStream::read_float() {
61107178Snjl  int rf = read_int();
62107178Snjl  int f  = reverse_int(rf);
63107178Snjl  return jfloat_cast(f);
64107178Snjl}
65162704Smjacob
66121184Ssimokawajdouble CompressedReadStream::read_double() {
67121184Ssimokawa  jint rh = read_int();
68107178Snjl  jint rl = read_int();
69107178Snjl  jint h  = reverse_int(rh);
70107178Snjl  jint l  = reverse_int(rl);
71107178Snjl  return jdouble_cast(jlong_from(h, l));
72107178Snjl}
73107178Snjl
74107178Snjljlong CompressedReadStream::read_long() {
75107178Snjl  jint low  = read_signed_int();
76107178Snjl  jint high = read_signed_int();
77107178Snjl  return jlong_from(high, low);
7844498Sgibbs}
7944498Sgibbs
8044498SgibbsCompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStream(NULL, 0) {
8144498Sgibbs  _buffer   = NEW_RESOURCE_ARRAY(u_char, initial_size);
8239215Sgibbs  _size     = initial_size;
83107178Snjl  _position = 0;
84107178Snjl}
85107178Snjl
86107178Snjlvoid CompressedWriteStream::grow() {
87107178Snjl  u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
88107178Snjl  memcpy(_new_buffer, _buffer, _position);
89107178Snjl  _buffer = _new_buffer;
90162704Smjacob  _size   = _size * 2;
91237601Sken}
92107178Snjl
93107178Snjlvoid CompressedWriteStream::write_signed_int(jint value) {
94107178Snjl  // this encoding, called SIGNED5, is taken from Pack200
95107178Snjl  write_int(encode_sign(value));
96107178Snjl}
97107178Snjl
98107178Snjlvoid CompressedWriteStream::write_float(jfloat value) {
9939215Sgibbs  juint f = jint_cast(value);
10039215Sgibbs  juint rf = reverse_int(f);
10139215Sgibbs  assert(f == reverse_int(rf), "can re-read same bits");
10239215Sgibbs  write_int(rf);
103228481Sed}
104228481Sed
105107178Snjlvoid CompressedWriteStream::write_double(jdouble value) {
106107178Snjl  juint h  = high(jlong_cast(value));
10739215Sgibbs  juint l  = low( jlong_cast(value));
108107178Snjl  juint rh = reverse_int(h);
109107178Snjl  juint rl = reverse_int(l);
110107178Snjl  assert(h == reverse_int(rh), "can re-read same bits");
111107178Snjl  assert(l == reverse_int(rl), "can re-read same bits");
112107178Snjl  write_int(rh);
113107178Snjl  write_int(rl);
114107178Snjl}
115107178Snjl
116107178Snjlvoid CompressedWriteStream::write_long(jlong value) {
117107178Snjl  write_signed_int(low(value));
118107178Snjl  write_signed_int(high(value));
119107178Snjl}
120107178Snjl
121162704Smjacob
12239215Sgibbs/// The remaining details
123107178Snjl
124107178Snjl#ifndef PRODUCT
12539215Sgibbs// set this to trigger unit test
126107178Snjlvoid test_compressed_stream(int trace);
127107178Snjlbool test_compressed_stream_enabled = false;
12839215Sgibbs#endif
129107178Snjl
130107178Snjl// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
13144498Sgibbs// It assumes that most values have lots of leading zeroes.
132107178Snjl// Very small values, in the range [0..191], code in one byte.
133107178Snjl// Any 32-bit value (including negatives) can be coded, in
13444498Sgibbs// up to five bytes.  The grammar is:
135107178Snjl//    low_byte  = [0..191]
136107178Snjl//    high_byte = [192..255]
137107178Snjl//    any_byte  = low_byte | high_byte
138107178Snjl//    coding = low_byte
13944498Sgibbs//           | high_byte low_byte
140107178Snjl//           | high_byte high_byte low_byte
141107178Snjl//           | high_byte high_byte high_byte low_byte
142107178Snjl//           | high_byte high_byte high_byte high_byte any_byte
143107178Snjl// Each high_byte contributes six bits of payload.
14463185Smjacob// The encoding is one-to-one (except for integer overflow)
145107178Snjl// and easy to parse and unparse.
146121184Ssimokawa
147121184Ssimokawajint CompressedReadStream::read_int_mb(jint b0) {
148121184Ssimokawa  int     pos = position() - 1;
149121184Ssimokawa  u_char* buf = buffer() + pos;
150121184Ssimokawa  assert(buf[0] == b0 && b0 >= L, "correctly called");
151121184Ssimokawa  jint    sum = b0;
152121184Ssimokawa  // must collect more bytes:  b[1]...b[4]
153121184Ssimokawa  int lg_H_i = lg_H;
154121184Ssimokawa  for (int i = 0; ; ) {
155121184Ssimokawa    jint b_i = buf[++i]; // b_i = read(); ++i;
156121184Ssimokawa    sum += b_i << lg_H_i;  // sum += b[i]*(64**i)
157121184Ssimokawa    if (b_i < L || i == MAX_i) {
158121184Ssimokawa      set_position(pos+i+1);
159121184Ssimokawa      return sum;
160121184Ssimokawa    }
161121184Ssimokawa    lg_H_i += lg_H;
162121184Ssimokawa  }
163121184Ssimokawa}
164121184Ssimokawa
165121184Ssimokawavoid CompressedWriteStream::write_int_mb(jint value) {
166121184Ssimokawa  debug_only(int pos1 = position());
167121184Ssimokawa  juint sum = value;
168121184Ssimokawa  for (int i = 0; ; ) {
169121184Ssimokawa    if (sum < L || i == MAX_i) {
170121184Ssimokawa      // remainder is either a "low code" or the 5th byte
171121184Ssimokawa      assert(sum == (u_char)sum, "valid byte");
172121184Ssimokawa      write((u_char)sum);
173107178Snjl      break;
174121184Ssimokawa    }
175107178Snjl    sum -= L;
176107178Snjl    int b_i = L + (sum % H);  // this is a "high code"
177107178Snjl    sum >>= lg_H;             // extracted 6 bits
178121184Ssimokawa    write(b_i); ++i;
179107178Snjl  }
180107178Snjl
181107178Snjl#ifndef PRODUCT
182107178Snjl  if (test_compressed_stream_enabled) {  // hack to enable this stress test
183107178Snjl    test_compressed_stream_enabled = false;
184107178Snjl    test_compressed_stream(0);
185107178Snjl  }
186107178Snjl#endif
187107178Snjl}
188107178Snjl
189107178Snjl
190107178Snjl#ifndef PRODUCT
191107178Snjl/// a unit test (can be run by hand from a debugger)
192107178Snjl
193107178Snjl// Avoid a VS2005 compiler stack overflow w/ fastdebug build.
194107178Snjl// The following pragma optimize turns off optimization ONLY
195107178Snjl// for this block (a matching directive turns it back on later).
196107178Snjl// These directives can be removed once the MS VS.NET 2005
197162704Smjacob// compiler stack overflow is fixed.
198162704Smjacob#if defined(_MSC_VER) && _MSC_VER >=1400 && !defined(_WIN64)
199162704Smjacob#pragma optimize("", off)
20039215Sgibbs#pragma warning(disable: 4748)
20139215Sgibbs#endif
20239215Sgibbs
20339215Sgibbs// generator for an "interesting" set of critical values
20439215Sgibbsenum { stretch_limit = (1<<16) * (64-16+1) };
20539215Sgibbsstatic jlong stretch(jint x, int bits) {
20639215Sgibbs  // put x[high 4] into place
207107178Snjl  jlong h = (jlong)((x >> (16-4))) << (bits - 4);
208107178Snjl  // put x[low 12] into place, sign extended
20939215Sgibbs  jlong l = ((jlong)x << (64-12)) >> (64-12);
21039215Sgibbs  // move l upwards, maybe
211107178Snjl  l <<= (x >> 16);
212107178Snjl  return h ^ l;
213107178Snjl}
214107178Snjl
215107178SnjlPRAGMA_DIAG_PUSH
216107178SnjlPRAGMA_FORMAT_IGNORED // Someone needs to deal with this.
217107178Snjlvoid test_compressed_stream(int trace) {
218107178Snjl  CompressedWriteStream bytes(stretch_limit * 100);
21944498Sgibbs  jint n;
22044498Sgibbs  int step = 0, fails = 0;
22144498Sgibbs#define CHECKXY(x, y, fmt) { \
222107178Snjl    ++step; \
223107178Snjl    int xlen = (pos = decode.position()) - lastpos; lastpos = pos; \
224107178Snjl    if (trace > 0 && (step % trace) == 0) { \
22544498Sgibbs      tty->print_cr("step %d, n=%08x: value=" fmt " (len=%d)", \
226107178Snjl                    step, n, x, xlen); } \
227107178Snjl    if (x != y) {                                                     \
228107178Snjl      tty->print_cr("step %d, n=%d: " fmt " != " fmt, step, n, x, y); \
229196955Ssbruno      fails++; \
23044498Sgibbs    } }
231107178Snjl  for (n = 0; n < (1<<8); n++) {
232107178Snjl    jbyte x = (jbyte)n;
233107178Snjl    bytes.write_byte(x); ++step;
234107178Snjl  }
235107178Snjl  for (n = 0; n < stretch_limit; n++) {
236107178Snjl    jint x = (jint)stretch(n, 32);
237120428Ssimokawa    bytes.write_int(x); ++step;
238120428Ssimokawa    bytes.write_signed_int(x); ++step;
239120428Ssimokawa    bytes.write_float(jfloat_cast(x)); ++step;
240120428Ssimokawa  }
241120428Ssimokawa  for (n = 0; n < stretch_limit; n++) {
242120428Ssimokawa    jlong x = stretch(n, 64);
243120428Ssimokawa    bytes.write_long(x); ++step;
244120428Ssimokawa    bytes.write_double(jdouble_cast(x)); ++step;
245120428Ssimokawa  }
246120428Ssimokawa  int length = bytes.position();
247120428Ssimokawa  if (trace != 0)
248120428Ssimokawa    tty->print_cr("set up test of %d stream values, size %d", step, length);
249107178Snjl  step = 0;
250107178Snjl  // now decode it all
25144498Sgibbs  CompressedReadStream decode(bytes.buffer());
252121184Ssimokawa  int pos, lastpos = decode.position();
253162704Smjacob  for (n = 0; n < (1<<8); n++) {
254121184Ssimokawa    jbyte x = (jbyte)n;
255121184Ssimokawa    jbyte y = decode.read_byte();
256107178Snjl    CHECKXY(x, y, "%db");
257107178Snjl  }
25844498Sgibbs  for (n = 0; n < stretch_limit; n++) {
259162704Smjacob    jint x = (jint)stretch(n, 32);
260109161Snjl    jint y1 = decode.read_int();
261109161Snjl    CHECKXY(x, y1, "%du");
262162704Smjacob    jint y2 = decode.read_signed_int();
263109161Snjl    CHECKXY(x, y2, "%di");
264109161Snjl    jint y3 = jint_cast(decode.read_float());
265109161Snjl    CHECKXY(x, y3, "%df");
266109161Snjl  }
267109161Snjl  for (n = 0; n < stretch_limit; n++) {
268109161Snjl    jlong x = stretch(n, 64);
269109161Snjl    jlong y1 = decode.read_long();
270109161Snjl    CHECKXY(x, y1, INT64_FORMAT "l");
271109161Snjl    jlong y2 = jlong_cast(decode.read_double());
272162704Smjacob    CHECKXY(x, y2, INT64_FORMAT "d");
273162704Smjacob  }
274162704Smjacob  int length2 = decode.position();
275162704Smjacob  if (trace != 0)
276162704Smjacob    tty->print_cr("finished test of %d stream values, size %d", step, length2);
277162704Smjacob  guarantee(length == length2, "bad length");
278162704Smjacob  guarantee(fails == 0, "test failures");
279162704Smjacob}
280109161SnjlPRAGMA_DIAG_POP
281109161Snjl
282162704Smjacob#if defined(_MSC_VER) &&_MSC_VER >=1400 && !defined(_WIN64)
283109161Snjl#pragma warning(default: 4748)
284109161Snjl#pragma optimize("", on)
285109161Snjl#endif
286228481Sed
287107178Snjl#endif // PRODUCT
288228481Sed