1/*
2 * Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.io;
27
28/**
29 * The {@code DataInput} interface provides
30 * for reading bytes from a binary stream and
31 * reconstructing from them data in any of
32 * the Java primitive types. There is also
33 * a
34 * facility for reconstructing a {@code String}
35 * from data in
36 * <a href="#modified-utf-8">modified UTF-8</a>
37 * format.
38 * <p>
39 * It is generally true of all the reading
40 * routines in this interface that if end of
41 * file is reached before the desired number
42 * of bytes has been read, an {@code EOFException}
43 * (which is a kind of {@code IOException})
44 * is thrown. If any byte cannot be read for
45 * any reason other than end of file, an {@code IOException}
46 * other than {@code EOFException} is
47 * thrown. In particular, an {@code IOException}
48 * may be thrown if the input stream has been
49 * closed.
50 *
51 * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
52 * <p>
53 * Implementations of the DataInput and DataOutput interfaces represent
54 * Unicode strings in a format that is a slight modification of UTF-8.
55 * (For information regarding the standard UTF-8 format, see section
56 * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
57 * 4.0</i>)
58 *
59 * <ul>
60 * <li>Characters in the range {@code '\u005Cu0001'} to
61 *         {@code '\u005Cu007F'} are represented by a single byte.
62 * <li>The null character {@code '\u005Cu0000'} and characters
63 *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
64 *         represented by a pair of bytes.
65 * <li>Characters in the range {@code '\u005Cu0800'}
66 *         to {@code '\u005CuFFFF'} are represented by three bytes.
67 * </ul>
68 *
69 *   <table class="plain" style="margin-left:2em;">
70 *     <caption>Encoding of UTF-8 values</caption>
71 *     <thead>
72 *     <tr>
73 *       <th scope="col" rowspan="2">Value</th>
74 *       <th scope="col" rowspan="2">Byte</th>
75 *       <th scope="col" colspan="8" id="bit_a">Bit Values</th>
76 *     </tr>
77 *     <tr>
78 *       <!-- Value -->
79 *       <!-- Byte -->
80 *       <th scope="col" style="width:3em"> 7 </th>
81 *       <th scope="col" style="width:3em"> 6 </th>
82 *       <th scope="col" style="width:3em"> 5 </th>
83 *       <th scope="col" style="width:3em"> 4 </th>
84 *       <th scope="col" style="width:3em"> 3 </th>
85 *       <th scope="col" style="width:3em"> 2 </th>
86 *       <th scope="col" style="width:3em"> 1 </th>
87 *       <th scope="col" style="width:3em"> 0 </th>
88 *     </thead>
89 *     <tbody>
90 *     <tr>
91 *       <th scope="row" style="text-align:left; font-weight:normal">
92 *         {@code \u005Cu0001} to {@code \u005Cu007F} </th>
93 *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
94 *       <td style="text-align:center">0
95 *       <td colspan="7" style="text-align:right; padding-right:6em">bits 6-0
96 *     </tr>
97 *     <tr>
98 *       <th scope="row" rowspan="2" style="text-align:left; font-weight:normal">
99 *           {@code \u005Cu0000},<br>
100 *           {@code \u005Cu0080} to {@code \u005Cu07FF} </th>
101 *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
102 *       <td style="text-align:center">1
103 *       <td style="text-align:center">1
104 *       <td style="text-align:center">0
105 *       <td colspan="5" style="text-align:right; padding-right:6em">bits 10-6
106 *     </tr>
107 *     <tr>
108 *       <!-- (value) -->
109 *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
110 *       <td style="text-align:center">1
111 *       <td style="text-align:center">0
112 *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
113 *     </tr>
114 *     <tr>
115 *       <th scope="row" rowspan="3" style="text-align:left; font-weight:normal">
116 *         {@code \u005Cu0800} to {@code \u005CuFFFF} </th>
117 *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
118 *       <td style="text-align:center">1
119 *       <td style="text-align:center">1
120 *       <td style="text-align:center">1
121 *       <td style="text-align:center">0
122 *       <td colspan="4" style="text-align:right; padding-right:6em">bits 15-12
123 *     </tr>
124 *     <tr>
125 *       <!-- (value) -->
126 *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
127 *       <td style="text-align:center">1
128 *       <td style="text-align:center">0
129 *       <td colspan="6" style="text-align:right; padding-right:6em">bits 11-6
130 *     </tr>
131 *     <tr>
132 *       <!-- (value) -->
133 *       <th scope="row" style="font-weight:normal; text-align:center"> 3 </th>
134 *       <td style="text-align:center">1
135 *       <td style="text-align:center">0
136 *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
137 *     </tr>
138 *     </tbody>
139 *   </table>
140 *
141 * <p>
142 * The differences between this format and the
143 * standard UTF-8 format are the following:
144 * <ul>
145 * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
146 *     rather than 1-byte, so that the encoded strings never have
147 *     embedded nulls.
148 * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
149 * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
150 *     are represented in the form of surrogate pairs.
151 * </ul>
152 * @author  Frank Yellin
153 * @see     java.io.DataInputStream
154 * @see     java.io.DataOutput
155 * @since   1.0
156 */
157public
158interface DataInput {
159    /**
160     * Reads some bytes from an input
161     * stream and stores them into the buffer
162     * array {@code b}. The number of bytes
163     * read is equal
164     * to the length of {@code b}.
165     * <p>
166     * This method blocks until one of the
167     * following conditions occurs:
168     * <ul>
169     * <li>{@code b.length}
170     * bytes of input data are available, in which
171     * case a normal return is made.
172     *
173     * <li>End of
174     * file is detected, in which case an {@code EOFException}
175     * is thrown.
176     *
177     * <li>An I/O error occurs, in
178     * which case an {@code IOException} other
179     * than {@code EOFException} is thrown.
180     * </ul>
181     * <p>
182     * If {@code b} is {@code null},
183     * a {@code NullPointerException} is thrown.
184     * If {@code b.length} is zero, then
185     * no bytes are read. Otherwise, the first
186     * byte read is stored into element {@code b[0]},
187     * the next one into {@code b[1]}, and
188     * so on.
189     * If an exception is thrown from
190     * this method, then it may be that some but
191     * not all bytes of {@code b} have been
192     * updated with data from the input stream.
193     *
194     * @param   b   the buffer into which the data is read.
195     * @throws  NullPointerException if {@code b} is {@code null}.
196     * @throws  EOFException  if this stream reaches the end before reading
197     *          all the bytes.
198     * @throws  IOException   if an I/O error occurs.
199     */
200    void readFully(byte b[]) throws IOException;
201
202    /**
203     *
204     * Reads {@code len}
205     * bytes from
206     * an input stream.
207     * <p>
208     * This method
209     * blocks until one of the following conditions
210     * occurs:
211     * <ul>
212     * <li>{@code len} bytes
213     * of input data are available, in which case
214     * a normal return is made.
215     *
216     * <li>End of file
217     * is detected, in which case an {@code EOFException}
218     * is thrown.
219     *
220     * <li>An I/O error occurs, in
221     * which case an {@code IOException} other
222     * than {@code EOFException} is thrown.
223     * </ul>
224     * <p>
225     * If {@code b} is {@code null},
226     * a {@code NullPointerException} is thrown.
227     * If {@code off} is negative, or {@code len}
228     * is negative, or {@code off+len} is
229     * greater than the length of the array {@code b},
230     * then an {@code IndexOutOfBoundsException}
231     * is thrown.
232     * If {@code len} is zero,
233     * then no bytes are read. Otherwise, the first
234     * byte read is stored into element {@code b[off]},
235     * the next one into {@code b[off+1]},
236     * and so on. The number of bytes read is,
237     * at most, equal to {@code len}.
238     *
239     * @param   b    the buffer into which the data is read.
240     * @param   off  an int specifying the offset in the data array {@code b}.
241     * @param   len  an int specifying the number of bytes to read.
242     * @throws  NullPointerException if {@code b} is {@code null}.
243     * @throws  IndexOutOfBoundsException if {@code off} is negative,
244     *          {@code len} is negative, or {@code len} is greater than
245     *          {@code b.length - off}.
246     * @throws  EOFException  if this stream reaches the end before reading
247     *          all the bytes.
248     * @throws  IOException   if an I/O error occurs.
249     */
250    void readFully(byte b[], int off, int len) throws IOException;
251
252    /**
253     * Makes an attempt to skip over
254     * {@code n} bytes
255     * of data from the input
256     * stream, discarding the skipped bytes. However,
257     * it may skip
258     * over some smaller number of
259     * bytes, possibly zero. This may result from
260     * any of a
261     * number of conditions; reaching
262     * end of file before {@code n} bytes
263     * have been skipped is
264     * only one possibility.
265     * This method never throws an {@code EOFException}.
266     * The actual
267     * number of bytes skipped is returned.
268     *
269     * @param      n   the number of bytes to be skipped.
270     * @return     the number of bytes actually skipped.
271     * @exception  IOException   if an I/O error occurs.
272     */
273    int skipBytes(int n) throws IOException;
274
275    /**
276     * Reads one input byte and returns
277     * {@code true} if that byte is nonzero,
278     * {@code false} if that byte is zero.
279     * This method is suitable for reading
280     * the byte written by the {@code writeBoolean}
281     * method of interface {@code DataOutput}.
282     *
283     * @return     the {@code boolean} value read.
284     * @exception  EOFException  if this stream reaches the end before reading
285     *               all the bytes.
286     * @exception  IOException   if an I/O error occurs.
287     */
288    boolean readBoolean() throws IOException;
289
290    /**
291     * Reads and returns one input byte.
292     * The byte is treated as a signed value in
293     * the range {@code -128} through {@code 127},
294     * inclusive.
295     * This method is suitable for
296     * reading the byte written by the {@code writeByte}
297     * method of interface {@code DataOutput}.
298     *
299     * @return     the 8-bit value read.
300     * @exception  EOFException  if this stream reaches the end before reading
301     *               all the bytes.
302     * @exception  IOException   if an I/O error occurs.
303     */
304    byte readByte() throws IOException;
305
306    /**
307     * Reads one input byte, zero-extends
308     * it to type {@code int}, and returns
309     * the result, which is therefore in the range
310     * {@code 0}
311     * through {@code 255}.
312     * This method is suitable for reading
313     * the byte written by the {@code writeByte}
314     * method of interface {@code DataOutput}
315     * if the argument to {@code writeByte}
316     * was intended to be a value in the range
317     * {@code 0} through {@code 255}.
318     *
319     * @return     the unsigned 8-bit value read.
320     * @exception  EOFException  if this stream reaches the end before reading
321     *               all the bytes.
322     * @exception  IOException   if an I/O error occurs.
323     */
324    int readUnsignedByte() throws IOException;
325
326    /**
327     * Reads two input bytes and returns
328     * a {@code short} value. Let {@code a}
329     * be the first byte read and {@code b}
330     * be the second byte. The value
331     * returned
332     * is:
333     * <pre>{@code (short)((a << 8) | (b & 0xff))
334     * }</pre>
335     * This method
336     * is suitable for reading the bytes written
337     * by the {@code writeShort} method of
338     * interface {@code DataOutput}.
339     *
340     * @return     the 16-bit value read.
341     * @exception  EOFException  if this stream reaches the end before reading
342     *               all the bytes.
343     * @exception  IOException   if an I/O error occurs.
344     */
345    short readShort() throws IOException;
346
347    /**
348     * Reads two input bytes and returns
349     * an {@code int} value in the range {@code 0}
350     * through {@code 65535}. Let {@code a}
351     * be the first byte read and
352     * {@code b}
353     * be the second byte. The value returned is:
354     * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
355     * }</pre>
356     * This method is suitable for reading the bytes
357     * written by the {@code writeShort} method
358     * of interface {@code DataOutput}  if
359     * the argument to {@code writeShort}
360     * was intended to be a value in the range
361     * {@code 0} through {@code 65535}.
362     *
363     * @return     the unsigned 16-bit value read.
364     * @exception  EOFException  if this stream reaches the end before reading
365     *               all the bytes.
366     * @exception  IOException   if an I/O error occurs.
367     */
368    int readUnsignedShort() throws IOException;
369
370    /**
371     * Reads two input bytes and returns a {@code char} value.
372     * Let {@code a}
373     * be the first byte read and {@code b}
374     * be the second byte. The value
375     * returned is:
376     * <pre>{@code (char)((a << 8) | (b & 0xff))
377     * }</pre>
378     * This method
379     * is suitable for reading bytes written by
380     * the {@code writeChar} method of interface
381     * {@code DataOutput}.
382     *
383     * @return     the {@code char} value read.
384     * @exception  EOFException  if this stream reaches the end before reading
385     *               all the bytes.
386     * @exception  IOException   if an I/O error occurs.
387     */
388    char readChar() throws IOException;
389
390    /**
391     * Reads four input bytes and returns an
392     * {@code int} value. Let {@code a-d}
393     * be the first through fourth bytes read. The value returned is:
394     * <pre>{@code
395     * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
396     *  ((c & 0xff) <<  8) | (d & 0xff))
397     * }</pre>
398     * This method is suitable
399     * for reading bytes written by the {@code writeInt}
400     * method of interface {@code DataOutput}.
401     *
402     * @return     the {@code int} value read.
403     * @exception  EOFException  if this stream reaches the end before reading
404     *               all the bytes.
405     * @exception  IOException   if an I/O error occurs.
406     */
407    int readInt() throws IOException;
408
409    /**
410     * Reads eight input bytes and returns
411     * a {@code long} value. Let {@code a-h}
412     * be the first through eighth bytes read.
413     * The value returned is:
414     * <pre>{@code
415     * (((long)(a & 0xff) << 56) |
416     *  ((long)(b & 0xff) << 48) |
417     *  ((long)(c & 0xff) << 40) |
418     *  ((long)(d & 0xff) << 32) |
419     *  ((long)(e & 0xff) << 24) |
420     *  ((long)(f & 0xff) << 16) |
421     *  ((long)(g & 0xff) <<  8) |
422     *  ((long)(h & 0xff)))
423     * }</pre>
424     * <p>
425     * This method is suitable
426     * for reading bytes written by the {@code writeLong}
427     * method of interface {@code DataOutput}.
428     *
429     * @return     the {@code long} value read.
430     * @exception  EOFException  if this stream reaches the end before reading
431     *               all the bytes.
432     * @exception  IOException   if an I/O error occurs.
433     */
434    long readLong() throws IOException;
435
436    /**
437     * Reads four input bytes and returns
438     * a {@code float} value. It does this
439     * by first constructing an {@code int}
440     * value in exactly the manner
441     * of the {@code readInt}
442     * method, then converting this {@code int}
443     * value to a {@code float} in
444     * exactly the manner of the method {@code Float.intBitsToFloat}.
445     * This method is suitable for reading
446     * bytes written by the {@code writeFloat}
447     * method of interface {@code DataOutput}.
448     *
449     * @return     the {@code float} value read.
450     * @exception  EOFException  if this stream reaches the end before reading
451     *               all the bytes.
452     * @exception  IOException   if an I/O error occurs.
453     */
454    float readFloat() throws IOException;
455
456    /**
457     * Reads eight input bytes and returns
458     * a {@code double} value. It does this
459     * by first constructing a {@code long}
460     * value in exactly the manner
461     * of the {@code readLong}
462     * method, then converting this {@code long}
463     * value to a {@code double} in exactly
464     * the manner of the method {@code Double.longBitsToDouble}.
465     * This method is suitable for reading
466     * bytes written by the {@code writeDouble}
467     * method of interface {@code DataOutput}.
468     *
469     * @return     the {@code double} value read.
470     * @exception  EOFException  if this stream reaches the end before reading
471     *               all the bytes.
472     * @exception  IOException   if an I/O error occurs.
473     */
474    double readDouble() throws IOException;
475
476    /**
477     * Reads the next line of text from the input stream.
478     * It reads successive bytes, converting
479     * each byte separately into a character,
480     * until it encounters a line terminator or
481     * end of
482     * file; the characters read are then
483     * returned as a {@code String}. Note
484     * that because this
485     * method processes bytes,
486     * it does not support input of the full Unicode
487     * character set.
488     * <p>
489     * If end of file is encountered
490     * before even one byte can be read, then {@code null}
491     * is returned. Otherwise, each byte that is
492     * read is converted to type {@code char}
493     * by zero-extension. If the character {@code '\n'}
494     * is encountered, it is discarded and reading
495     * ceases. If the character {@code '\r'}
496     * is encountered, it is discarded and, if
497     * the following byte converts &#32;to the
498     * character {@code '\n'}, then that is
499     * discarded also; reading then ceases. If
500     * end of file is encountered before either
501     * of the characters {@code '\n'} and
502     * {@code '\r'} is encountered, reading
503     * ceases. Once reading has ceased, a {@code String}
504     * is returned that contains all the characters
505     * read and not discarded, taken in order.
506     * Note that every character in this string
507     * will have a value less than {@code \u005Cu0100},
508     * that is, {@code (char)256}.
509     *
510     * @return the next line of text from the input stream,
511     *         or {@code null} if the end of file is
512     *         encountered before a byte can be read.
513     * @exception  IOException  if an I/O error occurs.
514     */
515    String readLine() throws IOException;
516
517    /**
518     * Reads in a string that has been encoded using a
519     * <a href="#modified-utf-8">modified UTF-8</a>
520     * format.
521     * The general contract of {@code readUTF}
522     * is that it reads a representation of a Unicode
523     * character string encoded in modified
524     * UTF-8 format; this string of characters
525     * is then returned as a {@code String}.
526     * <p>
527     * First, two bytes are read and used to
528     * construct an unsigned 16-bit integer in
529     * exactly the manner of the {@code readUnsignedShort}
530     * method . This integer value is called the
531     * <i>UTF length</i> and specifies the number
532     * of additional bytes to be read. These bytes
533     * are then converted to characters by considering
534     * them in groups. The length of each group
535     * is computed from the value of the first
536     * byte of the group. The byte following a
537     * group, if any, is the first byte of the
538     * next group.
539     * <p>
540     * If the first byte of a group
541     * matches the bit pattern {@code 0xxxxxxx}
542     * (where {@code x} means "may be {@code 0}
543     * or {@code 1}"), then the group consists
544     * of just that byte. The byte is zero-extended
545     * to form a character.
546     * <p>
547     * If the first byte
548     * of a group matches the bit pattern {@code 110xxxxx},
549     * then the group consists of that byte {@code a}
550     * and a second byte {@code b}. If there
551     * is no byte {@code b} (because byte
552     * {@code a} was the last of the bytes
553     * to be read), or if byte {@code b} does
554     * not match the bit pattern {@code 10xxxxxx},
555     * then a {@code UTFDataFormatException}
556     * is thrown. Otherwise, the group is converted
557     * to the character:
558     * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
559     * }</pre>
560     * If the first byte of a group
561     * matches the bit pattern {@code 1110xxxx},
562     * then the group consists of that byte {@code a}
563     * and two more bytes {@code b} and {@code c}.
564     * If there is no byte {@code c} (because
565     * byte {@code a} was one of the last
566     * two of the bytes to be read), or either
567     * byte {@code b} or byte {@code c}
568     * does not match the bit pattern {@code 10xxxxxx},
569     * then a {@code UTFDataFormatException}
570     * is thrown. Otherwise, the group is converted
571     * to the character:
572     * <pre>{@code
573     * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
574     * }</pre>
575     * If the first byte of a group matches the
576     * pattern {@code 1111xxxx} or the pattern
577     * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
578     * is thrown.
579     * <p>
580     * If end of file is encountered
581     * at any time during this entire process,
582     * then an {@code EOFException} is thrown.
583     * <p>
584     * After every group has been converted to
585     * a character by this process, the characters
586     * are gathered, in the same order in which
587     * their corresponding groups were read from
588     * the input stream, to form a {@code String},
589     * which is returned.
590     * <p>
591     * The {@code writeUTF}
592     * method of interface {@code DataOutput}
593     * may be used to write data that is suitable
594     * for reading by this method.
595     * @return     a Unicode string.
596     * @exception  EOFException            if this stream reaches the end
597     *               before reading all the bytes.
598     * @exception  IOException             if an I/O error occurs.
599     * @exception  UTFDataFormatException  if the bytes do not represent a
600     *               valid modified UTF-8 encoding of a string.
601     */
602    String readUTF() throws IOException;
603}
604