1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2000,2008 Oracle. All rights reserved. 5 * 6 * $Id: TupleInput.java,v 12.7 2008/01/08 20:58:36 bostic Exp $ 7 */ 8 9package com.sleepycat.bind.tuple; 10 11import java.math.BigInteger; 12 13import com.sleepycat.util.FastInputStream; 14import com.sleepycat.util.PackedInteger; 15import com.sleepycat.util.UtfOps; 16 17/** 18 * An <code>InputStream</code> with <code>DataInput</code>-like methods for 19 * reading tuple fields. It is used by <code>TupleBinding</code>. 20 * 21 * <p>This class has many methods that have the same signatures as methods in 22 * the {@link java.io.DataInput} interface. The reason this class does not 23 * implement {@link java.io.DataInput} is because it would break the interface 24 * contract for those methods because of data format differences.</p> 25 * 26 * <p>Signed numbers are stored in the buffer in MSB (most significant byte 27 * first) order with their sign bit (high-order bit) inverted to cause negative 28 * numbers to be sorted first when comparing values as unsigned byte arrays, 29 * as done in a database. Unsigned numbers, including characters, are stored 30 * in MSB order with no change to their sign bit. BigInteger values are stored 31 * with a preceding length having the same sign as the value.</p> 32 * 33 * <p>Strings and character arrays are stored either as a fixed length array of 34 * unicode characters, where the length must be known by the application, or as 35 * a null-terminated UTF byte array.</p> 36 * <ul> 37 * <li>Null strings are UTF encoded as { 0xFF }, which is not allowed in a 38 * standard UTF encoding. This allows null strings, as distinct from empty or 39 * zero length strings, to be represented in a tuple. Using the default 40 * comparator, null strings will be ordered last.</li> 41 * <li>Zero (0x0000) character values are UTF encoded as non-zero values, and 42 * therefore embedded zeros in the string are supported. The sequence { 0xC0, 43 * 0x80 } is used to encode a zero character. This UTF encoding is the same 44 * one used by native Java UTF libraries. However, this encoding of zero does 45 * impact the lexicographical ordering, and zeros will not be sorted first (the 46 * natural order) or last. For all character values other than zero, the 47 * default UTF byte ordering is the same as the Unicode lexicographical 48 * character ordering.</li> 49 * </ul> 50 * 51 * <p>Floats and doubles are stored using two different representations: sorted 52 * representation and integer-bit (IEEE 754) representation. If you use 53 * negative floating point numbers in a key, you should use sorted 54 * representation; alternatively you may use integer-bit representation but you 55 * will need to implement and configure a custom comparator to get correct 56 * numeric ordering for negative numbers.</p> 57 * 58 * <p>To use sorted representation use this set of methods:</p> 59 * <ul> 60 * <li>{@link TupleOutput#writeSortedFloat}</li> 61 * <li>{@link TupleInput#readSortedFloat}</li> 62 * <li>{@link TupleOutput#writeSortedDouble}</li> 63 * <li>{@link TupleInput#readSortedDouble}</li> 64 * </ul> 65 * 66 * <p>To use integer-bit representation use this set of methods:</p> 67 * <ul> 68 * <li>{@link TupleOutput#writeFloat}</li> 69 * <li>{@link TupleInput#readFloat}</li> 70 * <li>{@link TupleOutput#writeDouble}</li> 71 * <li>{@link TupleInput#readDouble}</li> 72 * </ul> 73 * 74 * @author Mark Hayes 75 */ 76public class TupleInput extends FastInputStream { 77 78 /** 79 * Creates a tuple input object for reading a byte array of tuple data. A 80 * reference to the byte array will be kept by this object (it will not be 81 * copied) and therefore the byte array should not be modified while this 82 * object is in use. 83 * 84 * @param buffer is the byte array to be read and should contain data in 85 * tuple format. 86 */ 87 public TupleInput(byte[] buffer) { 88 89 super(buffer); 90 } 91 92 /** 93 * Creates a tuple input object for reading a byte array of tuple data at 94 * a given offset for a given length. A reference to the byte array will 95 * be kept by this object (it will not be copied) and therefore the byte 96 * array should not be modified while this object is in use. 97 * 98 * @param buffer is the byte array to be read and should contain data in 99 * tuple format. 100 * 101 * @param offset is the byte offset at which to begin reading. 102 * 103 * @param length is the number of bytes to be read. 104 */ 105 public TupleInput(byte[] buffer, int offset, int length) { 106 107 super(buffer, offset, length); 108 } 109 110 /** 111 * Creates a tuple input object from the data contained in a tuple output 112 * object. A reference to the tuple output's byte array will be kept by 113 * this object (it will not be copied) and therefore the tuple output 114 * object should not be modified while this object is in use. 115 * 116 * @param output is the tuple output object containing the data to be read. 117 */ 118 public TupleInput(TupleOutput output) { 119 120 super(output.getBufferBytes(), output.getBufferOffset(), 121 output.getBufferLength()); 122 } 123 124 // --- begin DataInput compatible methods --- 125 126 /** 127 * Reads a null-terminated UTF string from the data buffer and converts 128 * the data from UTF to Unicode. 129 * Reads values that were written using {@link 130 * TupleOutput#writeString(String)}. 131 * 132 * @return the converted string. 133 * 134 * @throws IndexOutOfBoundsException if no null terminating byte is found 135 * in the buffer. 136 * 137 * @throws IllegalArgumentException malformed UTF data is encountered. 138 */ 139 public final String readString() 140 throws IndexOutOfBoundsException, IllegalArgumentException { 141 142 byte[] myBuf = buf; 143 int myOff = off; 144 if (available() >= 2 && 145 myBuf[myOff] == TupleOutput.NULL_STRING_UTF_VALUE && 146 myBuf[myOff + 1] == 0) { 147 skip(2); 148 return null; 149 } else { 150 int byteLen = UtfOps.getZeroTerminatedByteLength(myBuf, myOff); 151 skip(byteLen + 1); 152 return UtfOps.bytesToString(myBuf, myOff, byteLen); 153 } 154 } 155 156 /** 157 * Reads a char (two byte) unsigned value from the buffer. 158 * Reads values that were written using {@link TupleOutput#writeChar}. 159 * 160 * @return the value read from the buffer. 161 * 162 * @throws IndexOutOfBoundsException if not enough bytes are available in 163 * the buffer. 164 */ 165 public final char readChar() 166 throws IndexOutOfBoundsException { 167 168 return (char) readUnsignedShort(); 169 } 170 171 /** 172 * Reads a boolean (one byte) unsigned value from the buffer and returns 173 * true if it is non-zero and false if it is zero. 174 * Reads values that were written using {@link TupleOutput#writeBoolean}. 175 * 176 * @return the value read from the buffer. 177 * 178 * @throws IndexOutOfBoundsException if not enough bytes are available in 179 * the buffer. 180 */ 181 public final boolean readBoolean() 182 throws IndexOutOfBoundsException { 183 184 int c = readFast(); 185 if (c < 0) { 186 throw new IndexOutOfBoundsException(); 187 } 188 return (c != 0); 189 } 190 191 /** 192 * Reads a signed byte (one byte) value from the buffer. 193 * Reads values that were written using {@link TupleOutput#writeByte}. 194 * 195 * @return the value read from the buffer. 196 * 197 * @throws IndexOutOfBoundsException if not enough bytes are available in 198 * the buffer. 199 */ 200 public final byte readByte() 201 throws IndexOutOfBoundsException { 202 203 return (byte) (readUnsignedByte() ^ 0x80); 204 } 205 206 /** 207 * Reads a signed short (two byte) value from the buffer. 208 * Reads values that were written using {@link TupleOutput#writeShort}. 209 * 210 * @return the value read from the buffer. 211 * 212 * @throws IndexOutOfBoundsException if not enough bytes are available in 213 * the buffer. 214 */ 215 public final short readShort() 216 throws IndexOutOfBoundsException { 217 218 return (short) (readUnsignedShort() ^ 0x8000); 219 } 220 221 /** 222 * Reads a signed int (four byte) value from the buffer. 223 * Reads values that were written using {@link TupleOutput#writeInt}. 224 * 225 * @return the value read from the buffer. 226 * 227 * @throws IndexOutOfBoundsException if not enough bytes are available in 228 * the buffer. 229 */ 230 public final int readInt() 231 throws IndexOutOfBoundsException { 232 233 return (int) (readUnsignedInt() ^ 0x80000000); 234 } 235 236 /** 237 * Reads a signed long (eight byte) value from the buffer. 238 * Reads values that were written using {@link TupleOutput#writeLong}. 239 * 240 * @return the value read from the buffer. 241 * 242 * @throws IndexOutOfBoundsException if not enough bytes are available in 243 * the buffer. 244 */ 245 public final long readLong() 246 throws IndexOutOfBoundsException { 247 248 return readUnsignedLong() ^ 0x8000000000000000L; 249 } 250 251 /** 252 * Reads a signed float (four byte) value from the buffer. 253 * Reads values that were written using {@link TupleOutput#writeFloat}. 254 * <code>Float.intBitsToFloat</code> is used to convert the signed int 255 * value. 256 * 257 * <p><em>Note:</em> This method operations on byte array values that by 258 * default (without a custom comparator) do <em>not</em> sort correctly for 259 * negative values. Only non-negative values are sorted correctly by 260 * default. To sort all values correctly by default, use {@link 261 * #readSortedFloat}.</p> 262 * 263 * @return the value read from the buffer. 264 * 265 * @throws IndexOutOfBoundsException if not enough bytes are available in 266 * the buffer. 267 */ 268 public final float readFloat() 269 throws IndexOutOfBoundsException { 270 271 return Float.intBitsToFloat((int) readUnsignedInt()); 272 } 273 274 /** 275 * Reads a signed double (eight byte) value from the buffer. 276 * Reads values that were written using {@link TupleOutput#writeDouble}. 277 * <code>Double.longBitsToDouble</code> is used to convert the signed long 278 * value. 279 * 280 * <p><em>Note:</em> This method operations on byte array values that by 281 * default (without a custom comparator) do <em>not</em> sort correctly for 282 * negative values. Only non-negative values are sorted correctly by 283 * default. To sort all values correctly by default, use {@link 284 * #readSortedDouble}.</p> 285 * 286 * @return the value read from the buffer. 287 * 288 * @throws IndexOutOfBoundsException if not enough bytes are available in 289 * the buffer. 290 */ 291 public final double readDouble() 292 throws IndexOutOfBoundsException { 293 294 return Double.longBitsToDouble(readUnsignedLong()); 295 } 296 297 /** 298 * Reads a signed float (four byte) value from the buffer, with support 299 * for correct default sorting of all values. 300 * Reads values that were written using {@link 301 * TupleOutput#writeSortedFloat}. 302 * 303 * <p><code>Float.intBitsToFloat</code> and the following bit 304 * manipulations are used to convert the stored representation to a signed 305 * float value.</p> 306 * <pre> 307 * int val = ... // get stored bits 308 * val ^= (val < 0) ? 0x80000000 : 0xffffffff; 309 * return Float.intBitsToFloat(val); 310 * </pre> 311 * 312 * @return the value read from the buffer. 313 * 314 * @throws IndexOutOfBoundsException if not enough bytes are available in 315 * the buffer. 316 */ 317 public final float readSortedFloat() 318 throws IndexOutOfBoundsException { 319 320 int val = (int) readUnsignedInt(); 321 val ^= (val < 0) ? 0x80000000 : 0xffffffff; 322 return Float.intBitsToFloat(val); 323 } 324 325 /** 326 * Reads a signed double (eight byte) value from the buffer, with support 327 * for correct default sorting of all values. 328 * Reads values that were written using {@link 329 * TupleOutput#writeSortedDouble}. 330 * 331 * <p><code>Float.longBitsToDouble</code> and the following bit 332 * manipulations are used to convert the stored representation to a signed 333 * double value.</p> 334 * <pre> 335 * int val = ... // get stored bits 336 val ^= (val < 0) ? 0x8000000000000000L : 0xffffffffffffffffL; 337 return Double.longBitsToDouble(val); 338 * </pre> 339 * 340 * @return the value read from the buffer. 341 * 342 * @throws IndexOutOfBoundsException if not enough bytes are available in 343 * the buffer. 344 */ 345 public final double readSortedDouble() 346 throws IndexOutOfBoundsException { 347 348 long val = readUnsignedLong(); 349 val ^= (val < 0) ? 0x8000000000000000L : 0xffffffffffffffffL; 350 return Double.longBitsToDouble(val); 351 } 352 353 /** 354 * Reads an unsigned byte (one byte) value from the buffer. 355 * Reads values that were written using {@link 356 * TupleOutput#writeUnsignedByte}. 357 * 358 * @return the value read from the buffer. 359 * 360 * @throws IndexOutOfBoundsException if not enough bytes are available in 361 * the buffer. 362 */ 363 public final int readUnsignedByte() 364 throws IndexOutOfBoundsException { 365 366 int c = readFast(); 367 if (c < 0) { 368 throw new IndexOutOfBoundsException(); 369 } 370 return c; 371 } 372 373 /** 374 * Reads an unsigned short (two byte) value from the buffer. 375 * Reads values that were written using {@link 376 * TupleOutput#writeUnsignedShort}. 377 * 378 * @return the value read from the buffer. 379 * 380 * @throws IndexOutOfBoundsException if not enough bytes are available in 381 * the buffer. 382 */ 383 public final int readUnsignedShort() 384 throws IndexOutOfBoundsException { 385 386 int c1 = readFast(); 387 int c2 = readFast(); 388 if ((c1 | c2) < 0) { 389 throw new IndexOutOfBoundsException(); 390 } 391 return ((c1 << 8) | c2); 392 } 393 394 // --- end DataInput compatible methods --- 395 396 /** 397 * Reads an unsigned int (four byte) value from the buffer. 398 * Reads values that were written using {@link 399 * TupleOutput#writeUnsignedInt}. 400 * 401 * @return the value read from the buffer. 402 * 403 * @throws IndexOutOfBoundsException if not enough bytes are available in 404 * the buffer. 405 */ 406 public final long readUnsignedInt() 407 throws IndexOutOfBoundsException { 408 409 long c1 = readFast(); 410 long c2 = readFast(); 411 long c3 = readFast(); 412 long c4 = readFast(); 413 if ((c1 | c2 | c3 | c4) < 0) { 414 throw new IndexOutOfBoundsException(); 415 } 416 return ((c1 << 24) | (c2 << 16) | (c3 << 8) | c4); 417 } 418 419 /** 420 * This method is private since an unsigned long cannot be treated as 421 * such in Java, nor converted to a BigInteger of the same value. 422 */ 423 private final long readUnsignedLong() 424 throws IndexOutOfBoundsException { 425 426 long c1 = readFast(); 427 long c2 = readFast(); 428 long c3 = readFast(); 429 long c4 = readFast(); 430 long c5 = readFast(); 431 long c6 = readFast(); 432 long c7 = readFast(); 433 long c8 = readFast(); 434 if ((c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8) < 0) { 435 throw new IndexOutOfBoundsException(); 436 } 437 return ((c1 << 56) | (c2 << 48) | (c3 << 40) | (c4 << 32) | 438 (c5 << 24) | (c6 << 16) | (c7 << 8) | c8); 439 } 440 441 /** 442 * Reads the specified number of bytes from the buffer, converting each 443 * unsigned byte value to a character of the resulting string. 444 * Reads values that were written using {@link TupleOutput#writeBytes}. 445 * Only characters with values below 0x100 may be read using this method. 446 * 447 * @param length is the number of bytes to be read. 448 * 449 * @return the value read from the buffer. 450 * 451 * @throws IndexOutOfBoundsException if not enough bytes are available in 452 * the buffer. 453 */ 454 public final String readBytes(int length) 455 throws IndexOutOfBoundsException { 456 457 StringBuffer buf = new StringBuffer(length); 458 for (int i = 0; i < length; i++) { 459 int c = readFast(); 460 if (c < 0) { 461 throw new IndexOutOfBoundsException(); 462 } 463 buf.append((char) c); 464 } 465 return buf.toString(); 466 } 467 468 /** 469 * Reads the specified number of characters from the buffer, converting 470 * each two byte unsigned value to a character of the resulting string. 471 * Reads values that were written using {@link TupleOutput#writeChars}. 472 * 473 * @param length is the number of characters to be read. 474 * 475 * @return the value read from the buffer. 476 * 477 * @throws IndexOutOfBoundsException if not enough bytes are available in 478 * the buffer. 479 */ 480 public final String readChars(int length) 481 throws IndexOutOfBoundsException { 482 483 StringBuffer buf = new StringBuffer(length); 484 for (int i = 0; i < length; i++) { 485 buf.append(readChar()); 486 } 487 return buf.toString(); 488 } 489 490 /** 491 * Reads the specified number of bytes from the buffer, converting each 492 * unsigned byte value to a character of the resulting array. 493 * Reads values that were written using {@link TupleOutput#writeBytes}. 494 * Only characters with values below 0x100 may be read using this method. 495 * 496 * @param chars is the array to receive the data and whose length is used 497 * to determine the number of bytes to be read. 498 * 499 * @throws IndexOutOfBoundsException if not enough bytes are available in 500 * the buffer. 501 */ 502 public final void readBytes(char[] chars) 503 throws IndexOutOfBoundsException { 504 505 for (int i = 0; i < chars.length; i++) { 506 int c = readFast(); 507 if (c < 0) { 508 throw new IndexOutOfBoundsException(); 509 } 510 chars[i] = (char) c; 511 } 512 } 513 514 /** 515 * Reads the specified number of characters from the buffer, converting 516 * each two byte unsigned value to a character of the resulting array. 517 * Reads values that were written using {@link TupleOutput#writeChars}. 518 * 519 * @param chars is the array to receive the data and whose length is used 520 * to determine the number of characters to be read. 521 * 522 * @throws IndexOutOfBoundsException if not enough bytes are available in 523 * the buffer. 524 */ 525 public final void readChars(char[] chars) 526 throws IndexOutOfBoundsException { 527 528 for (int i = 0; i < chars.length; i++) { 529 chars[i] = readChar(); 530 } 531 } 532 533 /** 534 * Reads the specified number of UTF characters string from the data 535 * buffer and converts the data from UTF to Unicode. 536 * Reads values that were written using {@link 537 * TupleOutput#writeString(char[])}. 538 * 539 * @param length is the number of characters to be read. 540 * 541 * @return the converted string. 542 * 543 * @throws IndexOutOfBoundsException if no null terminating byte is found 544 * in the buffer. 545 * 546 * @throws IllegalArgumentException malformed UTF data is encountered. 547 */ 548 public final String readString(int length) 549 throws IndexOutOfBoundsException, IllegalArgumentException { 550 551 char[] chars = new char[length]; 552 readString(chars); 553 return new String(chars); 554 } 555 556 /** 557 * Reads the specified number of UTF characters string from the data 558 * buffer and converts the data from UTF to Unicode. 559 * Reads values that were written using {@link 560 * TupleOutput#writeString(char[])}. 561 * 562 * @param chars is the array to receive the data and whose length is used 563 * to determine the number of characters to be read. 564 * 565 * @throws IndexOutOfBoundsException if no null terminating byte is found 566 * in the buffer. 567 * 568 * @throws IllegalArgumentException malformed UTF data is encountered. 569 */ 570 public final void readString(char[] chars) 571 throws IndexOutOfBoundsException, IllegalArgumentException { 572 573 off = UtfOps.bytesToChars(buf, off, chars, 0, chars.length, false); 574 } 575 576 /** 577 * Returns the byte length of a null-terminated UTF string in the data 578 * buffer, including the terminator. Used with string values that were 579 * written using {@link TupleOutput#writeString(String)}. 580 * 581 * @throws IndexOutOfBoundsException if no null terminating byte is found 582 * in the buffer. 583 * 584 * @throws IllegalArgumentException malformed UTF data is encountered. 585 */ 586 public final int getStringByteLength() 587 throws IndexOutOfBoundsException, IllegalArgumentException { 588 589 if (available() >= 2 && 590 buf[off] == TupleOutput.NULL_STRING_UTF_VALUE && 591 buf[off + 1] == 0) { 592 return 2; 593 } else { 594 return UtfOps.getZeroTerminatedByteLength(buf, off) + 1; 595 } 596 } 597 598 /** 599 * Reads a packed integer. Note that packed integers are not appropriate 600 * for sorted values (keys) unless a custom comparator is used. 601 * 602 * @see PackedInteger 603 */ 604 public final int readPackedInt() { 605 606 int len = PackedInteger.getReadIntLength(buf, off); 607 int val = PackedInteger.readInt(buf, off); 608 609 off += len; 610 return val; 611 } 612 613 /** 614 * Returns the byte length of a packed integer. 615 * 616 * @see PackedInteger 617 */ 618 public final int getPackedIntByteLength() { 619 return PackedInteger.getReadIntLength(buf, off); 620 } 621 622 /** 623 * Reads a packed long integer. Note that packed integers are not 624 * appropriate for sorted values (keys) unless a custom comparator is used. 625 * 626 * @see PackedInteger 627 */ 628 public final long readPackedLong() { 629 630 int len = PackedInteger.getReadLongLength(buf, off); 631 long val = PackedInteger.readLong(buf, off); 632 633 off += len; 634 return val; 635 } 636 637 /** 638 * Returns the byte length of a packed long integer. 639 * 640 * @see PackedInteger 641 */ 642 public final int getPackedLongByteLength() { 643 return PackedInteger.getReadLongLength(buf, off); 644 } 645 646 /** 647 * Reads a {@code BigInteger}. 648 * 649 * @see TupleOutput#writeBigInteger 650 */ 651 public final BigInteger readBigInteger() { 652 int len = readShort(); 653 if (len < 0) { 654 len = (- len); 655 } 656 byte[] a = new byte[len]; 657 a[0] = readByte(); 658 readFast(a, 1, a.length - 1); 659 return new BigInteger(a); 660 } 661 662 /** 663 * Returns the byte length of a {@code BigInteger}. 664 * 665 * @see TupleOutput#writeBigInteger 666 */ 667 public final int getBigIntegerByteLength() { 668 int saveOff = off; 669 int len = readShort(); 670 off = saveOff; 671 if (len < 0) { 672 len = (- len); 673 } 674 return len + 2; 675 } 676} 677