1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 2000,2008 Oracle.  All rights reserved.
5 *
6 * $Id: TupleInput.java,v 12.7 2008/01/08 20:58:36 bostic Exp $
7 */
8
9package com.sleepycat.bind.tuple;
10
11import java.math.BigInteger;
12
13import com.sleepycat.util.FastInputStream;
14import com.sleepycat.util.PackedInteger;
15import com.sleepycat.util.UtfOps;
16
17/**
18 * An <code>InputStream</code> with <code>DataInput</code>-like methods for
19 * reading tuple fields.  It is used by <code>TupleBinding</code>.
20 *
21 * <p>This class has many methods that have the same signatures as methods in
22 * the {@link java.io.DataInput} interface.  The reason this class does not
23 * implement {@link java.io.DataInput} is because it would break the interface
24 * contract for those methods because of data format differences.</p>
25 *
26 * <p>Signed numbers are stored in the buffer in MSB (most significant byte
27 * first) order with their sign bit (high-order bit) inverted to cause negative
28 * numbers to be sorted first when comparing values as unsigned byte arrays,
29 * as done in a database.  Unsigned numbers, including characters, are stored
30 * in MSB order with no change to their sign bit.  BigInteger values are stored
31 * with a preceding length having the same sign as the value.</p>
32 *
33 * <p>Strings and character arrays are stored either as a fixed length array of
34 * unicode characters, where the length must be known by the application, or as
35 * a null-terminated UTF byte array.</p>
36 * <ul>
37 * <li>Null strings are UTF encoded as { 0xFF }, which is not allowed in a
38 * standard UTF encoding.  This allows null strings, as distinct from empty or
39 * zero length strings, to be represented in a tuple.  Using the default
40 * comparator, null strings will be ordered last.</li>
41 * <li>Zero (0x0000) character values are UTF encoded as non-zero values, and
42 * therefore embedded zeros in the string are supported.  The sequence { 0xC0,
43 * 0x80 } is used to encode a zero character.  This UTF encoding is the same
44 * one used by native Java UTF libraries.  However, this encoding of zero does
45 * impact the lexicographical ordering, and zeros will not be sorted first (the
46 * natural order) or last.  For all character values other than zero, the
47 * default UTF byte ordering is the same as the Unicode lexicographical
48 * character ordering.</li>
49 * </ul>
50 *
51 * <p>Floats and doubles are stored using two different representations: sorted
52 * representation and integer-bit (IEEE 754) representation.  If you use
53 * negative floating point numbers in a key, you should use sorted
54 * representation; alternatively you may use integer-bit representation but you
55 * will need to implement and configure a custom comparator to get correct
56 * numeric ordering for negative numbers.</p>
57 *
58 * <p>To use sorted representation use this set of methods:</p>
59 * <ul>
60 * <li>{@link TupleOutput#writeSortedFloat}</li>
61 * <li>{@link TupleInput#readSortedFloat}</li>
62 * <li>{@link TupleOutput#writeSortedDouble}</li>
63 * <li>{@link TupleInput#readSortedDouble}</li>
64 * </ul>
65 *
66 * <p>To use integer-bit representation use this set of methods:</p>
67 * <ul>
68 * <li>{@link TupleOutput#writeFloat}</li>
69 * <li>{@link TupleInput#readFloat}</li>
70 * <li>{@link TupleOutput#writeDouble}</li>
71 * <li>{@link TupleInput#readDouble}</li>
72 * </ul>
73 *
74 * @author Mark Hayes
75 */
76public class TupleInput extends FastInputStream {
77
78    /**
79     * Creates a tuple input object for reading a byte array of tuple data.  A
80     * reference to the byte array will be kept by this object (it will not be
81     * copied) and therefore the byte array should not be modified while this
82     * object is in use.
83     *
84     * @param buffer is the byte array to be read and should contain data in
85     * tuple format.
86     */
87    public TupleInput(byte[] buffer) {
88
89        super(buffer);
90    }
91
92    /**
93     * Creates a tuple input object for reading a byte array of tuple data at
94     * a given offset for a given length.  A reference to the byte array will
95     * be kept by this object (it will not be copied) and therefore the byte
96     * array should not be modified while this object is in use.
97     *
98     * @param buffer is the byte array to be read and should contain data in
99     * tuple format.
100     *
101     * @param offset is the byte offset at which to begin reading.
102     *
103     * @param length is the number of bytes to be read.
104     */
105    public TupleInput(byte[] buffer, int offset, int length) {
106
107        super(buffer, offset, length);
108    }
109
110    /**
111     * Creates a tuple input object from the data contained in a tuple output
112     * object.  A reference to the tuple output's byte array will be kept by
113     * this object (it will not be copied) and therefore the tuple output
114     * object should not be modified while this object is in use.
115     *
116     * @param output is the tuple output object containing the data to be read.
117     */
118    public TupleInput(TupleOutput output) {
119
120        super(output.getBufferBytes(), output.getBufferOffset(),
121              output.getBufferLength());
122    }
123
124    // --- begin DataInput compatible methods ---
125
126    /**
127     * Reads a null-terminated UTF string from the data buffer and converts
128     * the data from UTF to Unicode.
129     * Reads values that were written using {@link
130     * TupleOutput#writeString(String)}.
131     *
132     * @return the converted string.
133     *
134     * @throws IndexOutOfBoundsException if no null terminating byte is found
135     * in the buffer.
136     *
137     * @throws IllegalArgumentException malformed UTF data is encountered.
138     */
139    public final String readString()
140        throws IndexOutOfBoundsException, IllegalArgumentException  {
141
142        byte[] myBuf = buf;
143        int myOff = off;
144        if (available() >= 2 &&
145            myBuf[myOff] == TupleOutput.NULL_STRING_UTF_VALUE &&
146            myBuf[myOff + 1] == 0) {
147            skip(2);
148            return null;
149        } else {
150            int byteLen = UtfOps.getZeroTerminatedByteLength(myBuf, myOff);
151            skip(byteLen + 1);
152            return UtfOps.bytesToString(myBuf, myOff, byteLen);
153        }
154    }
155
156    /**
157     * Reads a char (two byte) unsigned value from the buffer.
158     * Reads values that were written using {@link TupleOutput#writeChar}.
159     *
160     * @return the value read from the buffer.
161     *
162     * @throws IndexOutOfBoundsException if not enough bytes are available in
163     * the buffer.
164     */
165    public final char readChar()
166        throws IndexOutOfBoundsException {
167
168        return (char) readUnsignedShort();
169    }
170
171    /**
172     * Reads a boolean (one byte) unsigned value from the buffer and returns
173     * true if it is non-zero and false if it is zero.
174     * Reads values that were written using {@link TupleOutput#writeBoolean}.
175     *
176     * @return the value read from the buffer.
177     *
178     * @throws IndexOutOfBoundsException if not enough bytes are available in
179     * the buffer.
180     */
181    public final boolean readBoolean()
182        throws IndexOutOfBoundsException {
183
184        int c = readFast();
185        if (c < 0) {
186            throw new IndexOutOfBoundsException();
187        }
188        return (c != 0);
189    }
190
191    /**
192     * Reads a signed byte (one byte) value from the buffer.
193     * Reads values that were written using {@link TupleOutput#writeByte}.
194     *
195     * @return the value read from the buffer.
196     *
197     * @throws IndexOutOfBoundsException if not enough bytes are available in
198     * the buffer.
199     */
200    public final byte readByte()
201        throws IndexOutOfBoundsException {
202
203        return (byte) (readUnsignedByte() ^ 0x80);
204    }
205
206    /**
207     * Reads a signed short (two byte) value from the buffer.
208     * Reads values that were written using {@link TupleOutput#writeShort}.
209     *
210     * @return the value read from the buffer.
211     *
212     * @throws IndexOutOfBoundsException if not enough bytes are available in
213     * the buffer.
214     */
215    public final short readShort()
216        throws IndexOutOfBoundsException {
217
218        return (short) (readUnsignedShort() ^ 0x8000);
219    }
220
221    /**
222     * Reads a signed int (four byte) value from the buffer.
223     * Reads values that were written using {@link TupleOutput#writeInt}.
224     *
225     * @return the value read from the buffer.
226     *
227     * @throws IndexOutOfBoundsException if not enough bytes are available in
228     * the buffer.
229     */
230    public final int readInt()
231        throws IndexOutOfBoundsException {
232
233        return (int) (readUnsignedInt() ^ 0x80000000);
234    }
235
236    /**
237     * Reads a signed long (eight byte) value from the buffer.
238     * Reads values that were written using {@link TupleOutput#writeLong}.
239     *
240     * @return the value read from the buffer.
241     *
242     * @throws IndexOutOfBoundsException if not enough bytes are available in
243     * the buffer.
244     */
245    public final long readLong()
246        throws IndexOutOfBoundsException {
247
248        return readUnsignedLong() ^ 0x8000000000000000L;
249    }
250
251    /**
252     * Reads a signed float (four byte) value from the buffer.
253     * Reads values that were written using {@link TupleOutput#writeFloat}.
254     * <code>Float.intBitsToFloat</code> is used to convert the signed int
255     * value.
256     *
257     * <p><em>Note:</em> This method operations on byte array values that by
258     * default (without a custom comparator) do <em>not</em> sort correctly for
259     * negative values.  Only non-negative values are sorted correctly by
260     * default.  To sort all values correctly by default, use {@link
261     * #readSortedFloat}.</p>
262     *
263     * @return the value read from the buffer.
264     *
265     * @throws IndexOutOfBoundsException if not enough bytes are available in
266     * the buffer.
267     */
268    public final float readFloat()
269        throws IndexOutOfBoundsException {
270
271        return Float.intBitsToFloat((int) readUnsignedInt());
272    }
273
274    /**
275     * Reads a signed double (eight byte) value from the buffer.
276     * Reads values that were written using {@link TupleOutput#writeDouble}.
277     * <code>Double.longBitsToDouble</code> is used to convert the signed long
278     * value.
279     *
280     * <p><em>Note:</em> This method operations on byte array values that by
281     * default (without a custom comparator) do <em>not</em> sort correctly for
282     * negative values.  Only non-negative values are sorted correctly by
283     * default.  To sort all values correctly by default, use {@link
284     * #readSortedDouble}.</p>
285     *
286     * @return the value read from the buffer.
287     *
288     * @throws IndexOutOfBoundsException if not enough bytes are available in
289     * the buffer.
290     */
291    public final double readDouble()
292        throws IndexOutOfBoundsException {
293
294        return Double.longBitsToDouble(readUnsignedLong());
295    }
296
297    /**
298     * Reads a signed float (four byte) value from the buffer, with support
299     * for correct default sorting of all values.
300     * Reads values that were written using {@link
301     * TupleOutput#writeSortedFloat}.
302     *
303     * <p><code>Float.intBitsToFloat</code> and the following bit
304     * manipulations are used to convert the stored representation to a signed
305     * float value.</p>
306     * <pre>
307     *  int val = ... // get stored bits
308     *  val ^= (val &lt; 0) ? 0x80000000 : 0xffffffff;
309     *  return Float.intBitsToFloat(val);
310     * </pre>
311     *
312     * @return the value read from the buffer.
313     *
314     * @throws IndexOutOfBoundsException if not enough bytes are available in
315     * the buffer.
316     */
317    public final float readSortedFloat()
318        throws IndexOutOfBoundsException {
319
320        int val = (int) readUnsignedInt();
321        val ^= (val < 0) ? 0x80000000 : 0xffffffff;
322        return Float.intBitsToFloat(val);
323    }
324
325    /**
326     * Reads a signed double (eight byte) value from the buffer, with support
327     * for correct default sorting of all values.
328     * Reads values that were written using {@link
329     * TupleOutput#writeSortedDouble}.
330     *
331     * <p><code>Float.longBitsToDouble</code> and the following bit
332     * manipulations are used to convert the stored representation to a signed
333     * double value.</p>
334     * <pre>
335     *  int val = ... // get stored bits
336        val ^= (val &lt; 0) ? 0x8000000000000000L : 0xffffffffffffffffL;
337        return Double.longBitsToDouble(val);
338     * </pre>
339     *
340     * @return the value read from the buffer.
341     *
342     * @throws IndexOutOfBoundsException if not enough bytes are available in
343     * the buffer.
344     */
345    public final double readSortedDouble()
346        throws IndexOutOfBoundsException {
347
348        long val = readUnsignedLong();
349        val ^= (val < 0) ? 0x8000000000000000L : 0xffffffffffffffffL;
350        return Double.longBitsToDouble(val);
351    }
352
353    /**
354     * Reads an unsigned byte (one byte) value from the buffer.
355     * Reads values that were written using {@link
356     * TupleOutput#writeUnsignedByte}.
357     *
358     * @return the value read from the buffer.
359     *
360     * @throws IndexOutOfBoundsException if not enough bytes are available in
361     * the buffer.
362     */
363    public final int readUnsignedByte()
364        throws IndexOutOfBoundsException {
365
366        int c = readFast();
367        if (c < 0) {
368            throw new IndexOutOfBoundsException();
369        }
370        return c;
371    }
372
373    /**
374     * Reads an unsigned short (two byte) value from the buffer.
375     * Reads values that were written using {@link
376     * TupleOutput#writeUnsignedShort}.
377     *
378     * @return the value read from the buffer.
379     *
380     * @throws IndexOutOfBoundsException if not enough bytes are available in
381     * the buffer.
382     */
383    public final int readUnsignedShort()
384        throws IndexOutOfBoundsException {
385
386        int c1 = readFast();
387        int c2 = readFast();
388        if ((c1 | c2) < 0) {
389             throw new IndexOutOfBoundsException();
390        }
391        return ((c1 << 8) | c2);
392    }
393
394    // --- end DataInput compatible methods ---
395
396    /**
397     * Reads an unsigned int (four byte) value from the buffer.
398     * Reads values that were written using {@link
399     * TupleOutput#writeUnsignedInt}.
400     *
401     * @return the value read from the buffer.
402     *
403     * @throws IndexOutOfBoundsException if not enough bytes are available in
404     * the buffer.
405     */
406    public final long readUnsignedInt()
407        throws IndexOutOfBoundsException {
408
409        long c1 = readFast();
410        long c2 = readFast();
411        long c3 = readFast();
412        long c4 = readFast();
413        if ((c1 | c2 | c3 | c4) < 0) {
414	    throw new IndexOutOfBoundsException();
415        }
416        return ((c1 << 24) | (c2 << 16) | (c3 << 8) | c4);
417    }
418
419    /**
420     * This method is private since an unsigned long cannot be treated as
421     * such in Java, nor converted to a BigInteger of the same value.
422     */
423    private final long readUnsignedLong()
424        throws IndexOutOfBoundsException {
425
426        long c1 = readFast();
427        long c2 = readFast();
428        long c3 = readFast();
429        long c4 = readFast();
430        long c5 = readFast();
431        long c6 = readFast();
432        long c7 = readFast();
433        long c8 = readFast();
434        if ((c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8) < 0) {
435             throw new IndexOutOfBoundsException();
436        }
437        return ((c1 << 56) | (c2 << 48) | (c3 << 40) | (c4 << 32) |
438                (c5 << 24) | (c6 << 16) | (c7 << 8)  | c8);
439    }
440
441    /**
442     * Reads the specified number of bytes from the buffer, converting each
443     * unsigned byte value to a character of the resulting string.
444     * Reads values that were written using {@link TupleOutput#writeBytes}.
445     * Only characters with values below 0x100 may be read using this method.
446     *
447     * @param length is the number of bytes to be read.
448     *
449     * @return the value read from the buffer.
450     *
451     * @throws IndexOutOfBoundsException if not enough bytes are available in
452     * the buffer.
453     */
454    public final String readBytes(int length)
455        throws IndexOutOfBoundsException {
456
457        StringBuffer buf = new StringBuffer(length);
458        for (int i = 0; i < length; i++) {
459            int c = readFast();
460            if (c < 0) {
461                throw new IndexOutOfBoundsException();
462            }
463            buf.append((char) c);
464        }
465        return buf.toString();
466    }
467
468    /**
469     * Reads the specified number of characters from the buffer, converting
470     * each two byte unsigned value to a character of the resulting string.
471     * Reads values that were written using {@link TupleOutput#writeChars}.
472     *
473     * @param length is the number of characters to be read.
474     *
475     * @return the value read from the buffer.
476     *
477     * @throws IndexOutOfBoundsException if not enough bytes are available in
478     * the buffer.
479     */
480    public final String readChars(int length)
481        throws IndexOutOfBoundsException {
482
483        StringBuffer buf = new StringBuffer(length);
484        for (int i = 0; i < length; i++) {
485            buf.append(readChar());
486        }
487        return buf.toString();
488    }
489
490    /**
491     * Reads the specified number of bytes from the buffer, converting each
492     * unsigned byte value to a character of the resulting array.
493     * Reads values that were written using {@link TupleOutput#writeBytes}.
494     * Only characters with values below 0x100 may be read using this method.
495     *
496     * @param chars is the array to receive the data and whose length is used
497     * to determine the number of bytes to be read.
498     *
499     * @throws IndexOutOfBoundsException if not enough bytes are available in
500     * the buffer.
501     */
502    public final void readBytes(char[] chars)
503        throws IndexOutOfBoundsException {
504
505        for (int i = 0; i < chars.length; i++) {
506            int c = readFast();
507            if (c < 0) {
508                throw new IndexOutOfBoundsException();
509            }
510            chars[i] = (char) c;
511        }
512    }
513
514    /**
515     * Reads the specified number of characters from the buffer, converting
516     * each two byte unsigned value to a character of the resulting array.
517     * Reads values that were written using {@link TupleOutput#writeChars}.
518     *
519     * @param chars is the array to receive the data and whose length is used
520     * to determine the number of characters to be read.
521     *
522     * @throws IndexOutOfBoundsException if not enough bytes are available in
523     * the buffer.
524     */
525    public final void readChars(char[] chars)
526        throws IndexOutOfBoundsException {
527
528        for (int i = 0; i < chars.length; i++) {
529            chars[i] = readChar();
530        }
531    }
532
533    /**
534     * Reads the specified number of UTF characters string from the data
535     * buffer and converts the data from UTF to Unicode.
536     * Reads values that were written using {@link
537     * TupleOutput#writeString(char[])}.
538     *
539     * @param length is the number of characters to be read.
540     *
541     * @return the converted string.
542     *
543     * @throws IndexOutOfBoundsException if no null terminating byte is found
544     * in the buffer.
545     *
546     * @throws IllegalArgumentException malformed UTF data is encountered.
547     */
548    public final String readString(int length)
549        throws IndexOutOfBoundsException, IllegalArgumentException  {
550
551        char[] chars = new char[length];
552        readString(chars);
553        return new String(chars);
554    }
555
556    /**
557     * Reads the specified number of UTF characters string from the data
558     * buffer and converts the data from UTF to Unicode.
559     * Reads values that were written using {@link
560     * TupleOutput#writeString(char[])}.
561     *
562     * @param chars is the array to receive the data and whose length is used
563     * to determine the number of characters to be read.
564     *
565     * @throws IndexOutOfBoundsException if no null terminating byte is found
566     * in the buffer.
567     *
568     * @throws IllegalArgumentException malformed UTF data is encountered.
569     */
570    public final void readString(char[] chars)
571        throws IndexOutOfBoundsException, IllegalArgumentException  {
572
573        off = UtfOps.bytesToChars(buf, off, chars, 0, chars.length, false);
574    }
575
576    /**
577     * Returns the byte length of a null-terminated UTF string in the data
578     * buffer, including the terminator.  Used with string values that were
579     * written using {@link TupleOutput#writeString(String)}.
580     *
581     * @throws IndexOutOfBoundsException if no null terminating byte is found
582     * in the buffer.
583     *
584     * @throws IllegalArgumentException malformed UTF data is encountered.
585     */
586    public final int getStringByteLength()
587        throws IndexOutOfBoundsException, IllegalArgumentException  {
588
589        if (available() >= 2 &&
590            buf[off] == TupleOutput.NULL_STRING_UTF_VALUE &&
591            buf[off + 1] == 0) {
592            return 2;
593        } else {
594            return UtfOps.getZeroTerminatedByteLength(buf, off) + 1;
595        }
596    }
597
598    /**
599     * Reads a packed integer.  Note that packed integers are not appropriate
600     * for sorted values (keys) unless a custom comparator is used.
601     *
602     * @see PackedInteger
603     */
604    public final int readPackedInt() {
605
606        int len = PackedInteger.getReadIntLength(buf, off);
607        int val = PackedInteger.readInt(buf, off);
608
609        off += len;
610        return val;
611    }
612
613    /**
614     * Returns the byte length of a packed integer.
615     *
616     * @see PackedInteger
617     */
618    public final int getPackedIntByteLength() {
619        return PackedInteger.getReadIntLength(buf, off);
620    }
621
622    /**
623     * Reads a packed long integer.  Note that packed integers are not
624     * appropriate for sorted values (keys) unless a custom comparator is used.
625     *
626     * @see PackedInteger
627     */
628    public final long readPackedLong() {
629
630        int len = PackedInteger.getReadLongLength(buf, off);
631        long val = PackedInteger.readLong(buf, off);
632
633        off += len;
634        return val;
635    }
636
637    /**
638     * Returns the byte length of a packed long integer.
639     *
640     * @see PackedInteger
641     */
642    public final int getPackedLongByteLength() {
643        return PackedInteger.getReadLongLength(buf, off);
644    }
645
646    /**
647     * Reads a {@code BigInteger}.
648     *
649     * @see TupleOutput#writeBigInteger
650     */
651    public final BigInteger readBigInteger() {
652        int len = readShort();
653        if (len < 0) {
654            len = (- len);
655        }
656        byte[] a = new byte[len];
657        a[0] = readByte();
658        readFast(a, 1, a.length - 1);
659        return new BigInteger(a);
660    }
661
662    /**
663     * Returns the byte length of a {@code BigInteger}.
664     *
665     * @see TupleOutput#writeBigInteger
666     */
667    public final int getBigIntegerByteLength() {
668        int saveOff = off;
669        int len = readShort();
670        off = saveOff;
671        if (len < 0) {
672            len = (- len);
673        }
674        return len + 2;
675    }
676}
677