1/*
2 * Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.io;
27
28/**
29 * The {@code DataInput} interface provides
30 * for reading bytes from a binary stream and
31 * reconstructing from them data in any of
32 * the Java primitive types. There is also
33 * a
34 * facility for reconstructing a {@code String}
35 * from data in
36 * <a href="#modified-utf-8">modified UTF-8</a>
37 * format.
38 * <p>
39 * It is generally true of all the reading
40 * routines in this interface that if end of
41 * file is reached before the desired number
42 * of bytes has been read, an {@code EOFException}
43 * (which is a kind of {@code IOException})
44 * is thrown. If any byte cannot be read for
45 * any reason other than end of file, an {@code IOException}
46 * other than {@code EOFException} is
47 * thrown. In particular, an {@code IOException}
48 * may be thrown if the input stream has been
49 * closed.
50 *
51 * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
52 * <p>
53 * Implementations of the DataInput and DataOutput interfaces represent
54 * Unicode strings in a format that is a slight modification of UTF-8.
55 * (For information regarding the standard UTF-8 format, see section
56 * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
57 * 4.0</i>).
58 * Note that in the following table, the most significant bit appears in the
59 * far left-hand column.
60 *
61 * <blockquote>
62 *   <table class="plain">
63 *     <caption style="display:none">Bit values and bytes</caption>
64 *     <tbody>
65 *     <tr>
66 *       <th colspan="9"><span style="font-weight:normal">
67 *         All characters in the range {@code '\u005Cu0001'} to
68 *         {@code '\u005Cu007F'} are represented by a single byte:</span></th>
69 *     </tr>
70 *     <tr>
71 *       <td></td>
72 *       <th colspan="8" id="bit_a">Bit Values</th>
73 *     </tr>
74 *     <tr>
75 *       <th id="byte1_a" style="text-align:left">Byte 1</th>
76 *       <td style="text-align:center">0
77 *       <td colspan="7" style="text-align:center">bits 6-0
78 *     </tr>
79 *     <tr>
80 *       <th colspan="9"><span style="font-weight:normal">
81 *         The null character {@code '\u005Cu0000'} and characters
82 *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
83 *         represented by a pair of bytes:</span></th>
84 *     </tr>
85 *     <tr>
86 *       <td></td>
87 *       <th colspan="8" id="bit_b">Bit Values</th>
88 *     </tr>
89 *     <tr>
90 *       <th id="byte1_b" style="text-align:left">Byte 1</th>
91 *       <td style="text-align:center">1
92 *       <td style="text-align:center">1
93 *       <td style="text-align:center">0
94 *       <td colspan="5" style="text-align:center">bits 10-6
95 *     </tr>
96 *     <tr>
97 *       <th id="byte2_a" style="text-align:left">Byte 2</th>
98 *       <td style="text-align:center">1
99 *       <td style="text-align:center">0
100 *       <td colspan="6" style="text-align:center">bits 5-0
101 *     </tr>
102 *     <tr>
103 *       <th colspan="9"><span style="font-weight:normal">
104 *         {@code char} values in the range {@code '\u005Cu0800'}
105 *         to {@code '\u005CuFFFF'} are represented by three bytes:</span></th>
106 *     </tr>
107 *     <tr>
108 *       <td></td>
109 *       <th colspan="8"id="bit_c">Bit Values</th>
110 *     </tr>
111 *     <tr>
112 *       <th id="byte1_c" style="text-align:left">Byte 1</th>
113 *       <td style="text-align:center">1
114 *       <td style="text-align:center">1
115 *       <td style="text-align:center">1
116 *       <td style="text-align:center">0
117 *       <td colspan="4" style="text-align:center">bits 15-12
118 *     </tr>
119 *     <tr>
120 *       <th id="byte2_b" style="text-align:left">Byte 2</th>
121 *       <td style="text-align:center">1
122 *       <td style="text-align:center">0
123 *       <td colspan="6" style="text-align:center">bits 11-6
124 *     </tr>
125 *     <tr>
126 *       <th id="byte3" style="text-align:left">Byte 3</th>
127 *       <td style="text-align:center">1
128 *       <td style="text-align:center">0
129 *       <td colspan="6" style="text-align:center">bits 5-0
130 *     </tr>
131 *     </tbody>
132 *   </table>
133 * </blockquote>
134 * <p>
135 * The differences between this format and the
136 * standard UTF-8 format are the following:
137 * <ul>
138 * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
139 *     rather than 1-byte, so that the encoded strings never have
140 *     embedded nulls.
141 * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
142 * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
143 *     are represented in the form of surrogate pairs.
144 * </ul>
145 * @author  Frank Yellin
146 * @see     java.io.DataInputStream
147 * @see     java.io.DataOutput
148 * @since   1.0
149 */
150public
151interface DataInput {
152    /**
153     * Reads some bytes from an input
154     * stream and stores them into the buffer
155     * array {@code b}. The number of bytes
156     * read is equal
157     * to the length of {@code b}.
158     * <p>
159     * This method blocks until one of the
160     * following conditions occurs:
161     * <ul>
162     * <li>{@code b.length}
163     * bytes of input data are available, in which
164     * case a normal return is made.
165     *
166     * <li>End of
167     * file is detected, in which case an {@code EOFException}
168     * is thrown.
169     *
170     * <li>An I/O error occurs, in
171     * which case an {@code IOException} other
172     * than {@code EOFException} is thrown.
173     * </ul>
174     * <p>
175     * If {@code b} is {@code null},
176     * a {@code NullPointerException} is thrown.
177     * If {@code b.length} is zero, then
178     * no bytes are read. Otherwise, the first
179     * byte read is stored into element {@code b[0]},
180     * the next one into {@code b[1]}, and
181     * so on.
182     * If an exception is thrown from
183     * this method, then it may be that some but
184     * not all bytes of {@code b} have been
185     * updated with data from the input stream.
186     *
187     * @param   b   the buffer into which the data is read.
188     * @throws  NullPointerException if {@code b} is {@code null}.
189     * @throws  EOFException  if this stream reaches the end before reading
190     *          all the bytes.
191     * @throws  IOException   if an I/O error occurs.
192     */
193    void readFully(byte b[]) throws IOException;
194
195    /**
196     *
197     * Reads {@code len}
198     * bytes from
199     * an input stream.
200     * <p>
201     * This method
202     * blocks until one of the following conditions
203     * occurs:
204     * <ul>
205     * <li>{@code len} bytes
206     * of input data are available, in which case
207     * a normal return is made.
208     *
209     * <li>End of file
210     * is detected, in which case an {@code EOFException}
211     * is thrown.
212     *
213     * <li>An I/O error occurs, in
214     * which case an {@code IOException} other
215     * than {@code EOFException} is thrown.
216     * </ul>
217     * <p>
218     * If {@code b} is {@code null},
219     * a {@code NullPointerException} is thrown.
220     * If {@code off} is negative, or {@code len}
221     * is negative, or {@code off+len} is
222     * greater than the length of the array {@code b},
223     * then an {@code IndexOutOfBoundsException}
224     * is thrown.
225     * If {@code len} is zero,
226     * then no bytes are read. Otherwise, the first
227     * byte read is stored into element {@code b[off]},
228     * the next one into {@code b[off+1]},
229     * and so on. The number of bytes read is,
230     * at most, equal to {@code len}.
231     *
232     * @param   b    the buffer into which the data is read.
233     * @param   off  an int specifying the offset in the data array {@code b}.
234     * @param   len  an int specifying the number of bytes to read.
235     * @throws  NullPointerException if {@code b} is {@code null}.
236     * @throws  IndexOutOfBoundsException if {@code off} is negative,
237     *          {@code len} is negative, or {@code len} is greater than
238     *          {@code b.length - off}.
239     * @throws  EOFException  if this stream reaches the end before reading
240     *          all the bytes.
241     * @throws  IOException   if an I/O error occurs.
242     */
243    void readFully(byte b[], int off, int len) throws IOException;
244
245    /**
246     * Makes an attempt to skip over
247     * {@code n} bytes
248     * of data from the input
249     * stream, discarding the skipped bytes. However,
250     * it may skip
251     * over some smaller number of
252     * bytes, possibly zero. This may result from
253     * any of a
254     * number of conditions; reaching
255     * end of file before {@code n} bytes
256     * have been skipped is
257     * only one possibility.
258     * This method never throws an {@code EOFException}.
259     * The actual
260     * number of bytes skipped is returned.
261     *
262     * @param      n   the number of bytes to be skipped.
263     * @return     the number of bytes actually skipped.
264     * @exception  IOException   if an I/O error occurs.
265     */
266    int skipBytes(int n) throws IOException;
267
268    /**
269     * Reads one input byte and returns
270     * {@code true} if that byte is nonzero,
271     * {@code false} if that byte is zero.
272     * This method is suitable for reading
273     * the byte written by the {@code writeBoolean}
274     * method of interface {@code DataOutput}.
275     *
276     * @return     the {@code boolean} value read.
277     * @exception  EOFException  if this stream reaches the end before reading
278     *               all the bytes.
279     * @exception  IOException   if an I/O error occurs.
280     */
281    boolean readBoolean() throws IOException;
282
283    /**
284     * Reads and returns one input byte.
285     * The byte is treated as a signed value in
286     * the range {@code -128} through {@code 127},
287     * inclusive.
288     * This method is suitable for
289     * reading the byte written by the {@code writeByte}
290     * method of interface {@code DataOutput}.
291     *
292     * @return     the 8-bit value read.
293     * @exception  EOFException  if this stream reaches the end before reading
294     *               all the bytes.
295     * @exception  IOException   if an I/O error occurs.
296     */
297    byte readByte() throws IOException;
298
299    /**
300     * Reads one input byte, zero-extends
301     * it to type {@code int}, and returns
302     * the result, which is therefore in the range
303     * {@code 0}
304     * through {@code 255}.
305     * This method is suitable for reading
306     * the byte written by the {@code writeByte}
307     * method of interface {@code DataOutput}
308     * if the argument to {@code writeByte}
309     * was intended to be a value in the range
310     * {@code 0} through {@code 255}.
311     *
312     * @return     the unsigned 8-bit value read.
313     * @exception  EOFException  if this stream reaches the end before reading
314     *               all the bytes.
315     * @exception  IOException   if an I/O error occurs.
316     */
317    int readUnsignedByte() throws IOException;
318
319    /**
320     * Reads two input bytes and returns
321     * a {@code short} value. Let {@code a}
322     * be the first byte read and {@code b}
323     * be the second byte. The value
324     * returned
325     * is:
326     * <pre>{@code (short)((a << 8) | (b & 0xff))
327     * }</pre>
328     * This method
329     * is suitable for reading the bytes written
330     * by the {@code writeShort} method of
331     * interface {@code DataOutput}.
332     *
333     * @return     the 16-bit value read.
334     * @exception  EOFException  if this stream reaches the end before reading
335     *               all the bytes.
336     * @exception  IOException   if an I/O error occurs.
337     */
338    short readShort() throws IOException;
339
340    /**
341     * Reads two input bytes and returns
342     * an {@code int} value in the range {@code 0}
343     * through {@code 65535}. Let {@code a}
344     * be the first byte read and
345     * {@code b}
346     * be the second byte. The value returned is:
347     * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
348     * }</pre>
349     * This method is suitable for reading the bytes
350     * written by the {@code writeShort} method
351     * of interface {@code DataOutput}  if
352     * the argument to {@code writeShort}
353     * was intended to be a value in the range
354     * {@code 0} through {@code 65535}.
355     *
356     * @return     the unsigned 16-bit value read.
357     * @exception  EOFException  if this stream reaches the end before reading
358     *               all the bytes.
359     * @exception  IOException   if an I/O error occurs.
360     */
361    int readUnsignedShort() throws IOException;
362
363    /**
364     * Reads two input bytes and returns a {@code char} value.
365     * Let {@code a}
366     * be the first byte read and {@code b}
367     * be the second byte. The value
368     * returned is:
369     * <pre>{@code (char)((a << 8) | (b & 0xff))
370     * }</pre>
371     * This method
372     * is suitable for reading bytes written by
373     * the {@code writeChar} method of interface
374     * {@code DataOutput}.
375     *
376     * @return     the {@code char} value read.
377     * @exception  EOFException  if this stream reaches the end before reading
378     *               all the bytes.
379     * @exception  IOException   if an I/O error occurs.
380     */
381    char readChar() throws IOException;
382
383    /**
384     * Reads four input bytes and returns an
385     * {@code int} value. Let {@code a-d}
386     * be the first through fourth bytes read. The value returned is:
387     * <pre>{@code
388     * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
389     *  ((c & 0xff) <<  8) | (d & 0xff))
390     * }</pre>
391     * This method is suitable
392     * for reading bytes written by the {@code writeInt}
393     * method of interface {@code DataOutput}.
394     *
395     * @return     the {@code int} value read.
396     * @exception  EOFException  if this stream reaches the end before reading
397     *               all the bytes.
398     * @exception  IOException   if an I/O error occurs.
399     */
400    int readInt() throws IOException;
401
402    /**
403     * Reads eight input bytes and returns
404     * a {@code long} value. Let {@code a-h}
405     * be the first through eighth bytes read.
406     * The value returned is:
407     * <pre>{@code
408     * (((long)(a & 0xff) << 56) |
409     *  ((long)(b & 0xff) << 48) |
410     *  ((long)(c & 0xff) << 40) |
411     *  ((long)(d & 0xff) << 32) |
412     *  ((long)(e & 0xff) << 24) |
413     *  ((long)(f & 0xff) << 16) |
414     *  ((long)(g & 0xff) <<  8) |
415     *  ((long)(h & 0xff)))
416     * }</pre>
417     * <p>
418     * This method is suitable
419     * for reading bytes written by the {@code writeLong}
420     * method of interface {@code DataOutput}.
421     *
422     * @return     the {@code long} value read.
423     * @exception  EOFException  if this stream reaches the end before reading
424     *               all the bytes.
425     * @exception  IOException   if an I/O error occurs.
426     */
427    long readLong() throws IOException;
428
429    /**
430     * Reads four input bytes and returns
431     * a {@code float} value. It does this
432     * by first constructing an {@code int}
433     * value in exactly the manner
434     * of the {@code readInt}
435     * method, then converting this {@code int}
436     * value to a {@code float} in
437     * exactly the manner of the method {@code Float.intBitsToFloat}.
438     * This method is suitable for reading
439     * bytes written by the {@code writeFloat}
440     * method of interface {@code DataOutput}.
441     *
442     * @return     the {@code float} value read.
443     * @exception  EOFException  if this stream reaches the end before reading
444     *               all the bytes.
445     * @exception  IOException   if an I/O error occurs.
446     */
447    float readFloat() throws IOException;
448
449    /**
450     * Reads eight input bytes and returns
451     * a {@code double} value. It does this
452     * by first constructing a {@code long}
453     * value in exactly the manner
454     * of the {@code readLong}
455     * method, then converting this {@code long}
456     * value to a {@code double} in exactly
457     * the manner of the method {@code Double.longBitsToDouble}.
458     * This method is suitable for reading
459     * bytes written by the {@code writeDouble}
460     * method of interface {@code DataOutput}.
461     *
462     * @return     the {@code double} value read.
463     * @exception  EOFException  if this stream reaches the end before reading
464     *               all the bytes.
465     * @exception  IOException   if an I/O error occurs.
466     */
467    double readDouble() throws IOException;
468
469    /**
470     * Reads the next line of text from the input stream.
471     * It reads successive bytes, converting
472     * each byte separately into a character,
473     * until it encounters a line terminator or
474     * end of
475     * file; the characters read are then
476     * returned as a {@code String}. Note
477     * that because this
478     * method processes bytes,
479     * it does not support input of the full Unicode
480     * character set.
481     * <p>
482     * If end of file is encountered
483     * before even one byte can be read, then {@code null}
484     * is returned. Otherwise, each byte that is
485     * read is converted to type {@code char}
486     * by zero-extension. If the character {@code '\n'}
487     * is encountered, it is discarded and reading
488     * ceases. If the character {@code '\r'}
489     * is encountered, it is discarded and, if
490     * the following byte converts &#32;to the
491     * character {@code '\n'}, then that is
492     * discarded also; reading then ceases. If
493     * end of file is encountered before either
494     * of the characters {@code '\n'} and
495     * {@code '\r'} is encountered, reading
496     * ceases. Once reading has ceased, a {@code String}
497     * is returned that contains all the characters
498     * read and not discarded, taken in order.
499     * Note that every character in this string
500     * will have a value less than {@code \u005Cu0100},
501     * that is, {@code (char)256}.
502     *
503     * @return the next line of text from the input stream,
504     *         or {@code null} if the end of file is
505     *         encountered before a byte can be read.
506     * @exception  IOException  if an I/O error occurs.
507     */
508    String readLine() throws IOException;
509
510    /**
511     * Reads in a string that has been encoded using a
512     * <a href="#modified-utf-8">modified UTF-8</a>
513     * format.
514     * The general contract of {@code readUTF}
515     * is that it reads a representation of a Unicode
516     * character string encoded in modified
517     * UTF-8 format; this string of characters
518     * is then returned as a {@code String}.
519     * <p>
520     * First, two bytes are read and used to
521     * construct an unsigned 16-bit integer in
522     * exactly the manner of the {@code readUnsignedShort}
523     * method . This integer value is called the
524     * <i>UTF length</i> and specifies the number
525     * of additional bytes to be read. These bytes
526     * are then converted to characters by considering
527     * them in groups. The length of each group
528     * is computed from the value of the first
529     * byte of the group. The byte following a
530     * group, if any, is the first byte of the
531     * next group.
532     * <p>
533     * If the first byte of a group
534     * matches the bit pattern {@code 0xxxxxxx}
535     * (where {@code x} means "may be {@code 0}
536     * or {@code 1}"), then the group consists
537     * of just that byte. The byte is zero-extended
538     * to form a character.
539     * <p>
540     * If the first byte
541     * of a group matches the bit pattern {@code 110xxxxx},
542     * then the group consists of that byte {@code a}
543     * and a second byte {@code b}. If there
544     * is no byte {@code b} (because byte
545     * {@code a} was the last of the bytes
546     * to be read), or if byte {@code b} does
547     * not match the bit pattern {@code 10xxxxxx},
548     * then a {@code UTFDataFormatException}
549     * is thrown. Otherwise, the group is converted
550     * to the character:
551     * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
552     * }</pre>
553     * If the first byte of a group
554     * matches the bit pattern {@code 1110xxxx},
555     * then the group consists of that byte {@code a}
556     * and two more bytes {@code b} and {@code c}.
557     * If there is no byte {@code c} (because
558     * byte {@code a} was one of the last
559     * two of the bytes to be read), or either
560     * byte {@code b} or byte {@code c}
561     * does not match the bit pattern {@code 10xxxxxx},
562     * then a {@code UTFDataFormatException}
563     * is thrown. Otherwise, the group is converted
564     * to the character:
565     * <pre>{@code
566     * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
567     * }</pre>
568     * If the first byte of a group matches the
569     * pattern {@code 1111xxxx} or the pattern
570     * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
571     * is thrown.
572     * <p>
573     * If end of file is encountered
574     * at any time during this entire process,
575     * then an {@code EOFException} is thrown.
576     * <p>
577     * After every group has been converted to
578     * a character by this process, the characters
579     * are gathered, in the same order in which
580     * their corresponding groups were read from
581     * the input stream, to form a {@code String},
582     * which is returned.
583     * <p>
584     * The {@code writeUTF}
585     * method of interface {@code DataOutput}
586     * may be used to write data that is suitable
587     * for reading by this method.
588     * @return     a Unicode string.
589     * @exception  EOFException            if this stream reaches the end
590     *               before reading all the bytes.
591     * @exception  IOException             if an I/O error occurs.
592     * @exception  UTFDataFormatException  if the bytes do not represent a
593     *               valid modified UTF-8 encoding of a string.
594     */
595    String readUTF() throws IOException;
596}
597