Scanner.java revision 13532:859397229dc4
1/* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package java.util; 27 28import java.io.*; 29import java.math.*; 30import java.nio.*; 31import java.nio.channels.*; 32import java.nio.charset.*; 33import java.nio.file.Path; 34import java.nio.file.Files; 35import java.text.*; 36import java.util.function.Consumer; 37import java.util.regex.*; 38import java.util.stream.Stream; 39import java.util.stream.StreamSupport; 40 41import sun.misc.LRUCache; 42 43/** 44 * A simple text scanner which can parse primitive types and strings using 45 * regular expressions. 46 * 47 * <p>A {@code Scanner} breaks its input into tokens using a 48 * delimiter pattern, which by default matches whitespace. The resulting 49 * tokens may then be converted into values of different types using the 50 * various {@code next} methods. 51 * 52 * <p>For example, this code allows a user to read a number from 53 * {@code System.in}: 54 * <blockquote><pre>{@code 55 * Scanner sc = new Scanner(System.in); 56 * int i = sc.nextInt(); 57 * }</pre></blockquote> 58 * 59 * <p>As another example, this code allows {@code long} types to be 60 * assigned from entries in a file {@code myNumbers}: 61 * <blockquote><pre>{@code 62 * Scanner sc = new Scanner(new File("myNumbers")); 63 * while (sc.hasNextLong()) { 64 * long aLong = sc.nextLong(); 65 * } 66 * }</pre></blockquote> 67 * 68 * <p>The scanner can also use delimiters other than whitespace. This 69 * example reads several items in from a string: 70 * <blockquote><pre>{@code 71 * String input = "1 fish 2 fish red fish blue fish"; 72 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 73 * System.out.println(s.nextInt()); 74 * System.out.println(s.nextInt()); 75 * System.out.println(s.next()); 76 * System.out.println(s.next()); 77 * s.close(); 78 * }</pre></blockquote> 79 * <p> 80 * prints the following output: 81 * <blockquote><pre>{@code 82 * 1 83 * 2 84 * red 85 * blue 86 * }</pre></blockquote> 87 * 88 * <p>The same output can be generated with this code, which uses a regular 89 * expression to parse all four tokens at once: 90 * <blockquote><pre>{@code 91 * String input = "1 fish 2 fish red fish blue fish"; 92 * Scanner s = new Scanner(input); 93 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 94 * MatchResult result = s.match(); 95 * for (int i=1; i<=result.groupCount(); i++) 96 * System.out.println(result.group(i)); 97 * s.close(); 98 * }</pre></blockquote> 99 * 100 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 101 * by a scanner is as recognized by {@link Character#isWhitespace(char) 102 * Character.isWhitespace()}. The {@link #reset reset()} 103 * method will reset the value of the scanner's delimiter to the default 104 * whitespace delimiter regardless of whether it was previously changed. 105 * 106 * <p>A scanning operation may block waiting for input. 107 * 108 * <p>The {@link #next} and {@link #hasNext} methods and their 109 * companion methods (such as {@link #nextInt} and 110 * {@link #hasNextInt}) first skip any input that matches the delimiter 111 * pattern, and then attempt to return the next token. Both {@code hasNext()} 112 * and {@code next()} methods may block waiting for further input. Whether a 113 * {@code hasNext()} method blocks has no connection to whether or not its 114 * associated {@code next()} method will block. The {@link #tokens} method 115 * may also block waiting for input. 116 * 117 * <p>The {@link #findInLine findInLine()}, 118 * {@link #findWithinHorizon findWithinHorizon()}, 119 * {@link #skip skip()}, and {@link #findAll findAll()} 120 * methods operate independently of the delimiter pattern. These methods will 121 * attempt to match the specified pattern with no regard to delimiters in the 122 * input and thus can be used in special circumstances where delimiters are 123 * not relevant. These methods may block waiting for more input. 124 * 125 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 126 * will not pass the token that caused the exception, so that it may be 127 * retrieved or skipped via some other method. 128 * 129 * <p>Depending upon the type of delimiting pattern, empty tokens may be 130 * returned. For example, the pattern {@code "\\s+"} will return no empty 131 * tokens since it matches multiple instances of the delimiter. The delimiting 132 * pattern {@code "\\s"} could return empty tokens since it only passes one 133 * space at a time. 134 * 135 * <p> A scanner can read text from any object which implements the {@link 136 * java.lang.Readable} interface. If an invocation of the underlying 137 * readable's {@link java.lang.Readable#read read()} method throws an {@link 138 * java.io.IOException} then the scanner assumes that the end of the input 139 * has been reached. The most recent {@code IOException} thrown by the 140 * underlying readable can be retrieved via the {@link #ioException} method. 141 * 142 * <p>When a {@code Scanner} is closed, it will close its input source 143 * if the source implements the {@link java.io.Closeable} interface. 144 * 145 * <p>A {@code Scanner} is not safe for multithreaded use without 146 * external synchronization. 147 * 148 * <p>Unless otherwise mentioned, passing a {@code null} parameter into 149 * any method of a {@code Scanner} will cause a 150 * {@code NullPointerException} to be thrown. 151 * 152 * <p>A scanner will default to interpreting numbers as decimal unless a 153 * different radix has been set by using the {@link #useRadix} method. The 154 * {@link #reset} method will reset the value of the scanner's radix to 155 * {@code 10} regardless of whether it was previously changed. 156 * 157 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 158 * 159 * <p> An instance of this class is capable of scanning numbers in the standard 160 * formats as well as in the formats of the scanner's locale. A scanner's 161 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 162 * java.util.Locale#getDefault(Locale.Category) 163 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 164 * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the 165 * scanner's locale to the initial locale regardless of whether it was 166 * previously changed. 167 * 168 * <p>The localized formats are defined in terms of the following parameters, 169 * which for a particular locale are taken from that locale's {@link 170 * java.text.DecimalFormat DecimalFormat} object, {@code df}, and its and 171 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 172 * {@code dfs}. 173 * 174 * <blockquote><dl> 175 * <dt><i>LocalGroupSeparator </i> 176 * <dd>The character used to separate thousands groups, 177 * <i>i.e.,</i> {@code dfs.}{@link 178 * java.text.DecimalFormatSymbols#getGroupingSeparator 179 * getGroupingSeparator()} 180 * <dt><i>LocalDecimalSeparator </i> 181 * <dd>The character used for the decimal point, 182 * <i>i.e.,</i> {@code dfs.}{@link 183 * java.text.DecimalFormatSymbols#getDecimalSeparator 184 * getDecimalSeparator()} 185 * <dt><i>LocalPositivePrefix </i> 186 * <dd>The string that appears before a positive number (may 187 * be empty), <i>i.e.,</i> {@code df.}{@link 188 * java.text.DecimalFormat#getPositivePrefix 189 * getPositivePrefix()} 190 * <dt><i>LocalPositiveSuffix </i> 191 * <dd>The string that appears after a positive number (may be 192 * empty), <i>i.e.,</i> {@code df.}{@link 193 * java.text.DecimalFormat#getPositiveSuffix 194 * getPositiveSuffix()} 195 * <dt><i>LocalNegativePrefix </i> 196 * <dd>The string that appears before a negative number (may 197 * be empty), <i>i.e.,</i> {@code df.}{@link 198 * java.text.DecimalFormat#getNegativePrefix 199 * getNegativePrefix()} 200 * <dt><i>LocalNegativeSuffix </i> 201 * <dd>The string that appears after a negative number (may be 202 * empty), <i>i.e.,</i> {@code df.}{@link 203 * java.text.DecimalFormat#getNegativeSuffix 204 * getNegativeSuffix()} 205 * <dt><i>LocalNaN </i> 206 * <dd>The string that represents not-a-number for 207 * floating-point values, 208 * <i>i.e.,</i> {@code dfs.}{@link 209 * java.text.DecimalFormatSymbols#getNaN 210 * getNaN()} 211 * <dt><i>LocalInfinity </i> 212 * <dd>The string that represents infinity for floating-point 213 * values, <i>i.e.,</i> {@code dfs.}{@link 214 * java.text.DecimalFormatSymbols#getInfinity 215 * getInfinity()} 216 * </dl></blockquote> 217 * 218 * <h4> <a name="number-syntax">Number syntax</a> </h4> 219 * 220 * <p> The strings that can be parsed as numbers by an instance of this class 221 * are specified in terms of the following regular-expression grammar, where 222 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 223 * 224 * <dl> 225 * <dt><i>NonAsciiDigit</i>: 226 * <dd>A non-ASCII character c for which 227 * {@link java.lang.Character#isDigit Character.isDigit}{@code (c)} 228 * returns true 229 * 230 * <dt><i>Non0Digit</i>: 231 * <dd>{@code [1-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 232 * 233 * <dt><i>Digit</i>: 234 * <dd>{@code [0-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 235 * 236 * <dt><i>GroupedNumeral</i>: 237 * <dd><code>( </code><i>Non0Digit</i> 238 * <i>Digit</i>{@code ? 239 * }<i>Digit</i>{@code ?} 240 * <dd> <code>( </code><i>LocalGroupSeparator</i> 241 * <i>Digit</i> 242 * <i>Digit</i> 243 * <i>Digit</i>{@code )+ )} 244 * 245 * <dt><i>Numeral</i>: 246 * <dd>{@code ( ( }<i>Digit</i>{@code + ) 247 * | }<i>GroupedNumeral</i>{@code )} 248 * 249 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 250 * <dd>{@code ( [-+]? ( }<i>Numeral</i>{@code 251 * ) )} 252 * <dd>{@code | }<i>LocalPositivePrefix</i> <i>Numeral</i> 253 * <i>LocalPositiveSuffix</i> 254 * <dd>{@code | }<i>LocalNegativePrefix</i> <i>Numeral</i> 255 * <i>LocalNegativeSuffix</i> 256 * 257 * <dt><i>DecimalNumeral</i>: 258 * <dd><i>Numeral</i> 259 * <dd>{@code | }<i>Numeral</i> 260 * <i>LocalDecimalSeparator</i> 261 * <i>Digit</i>{@code *} 262 * <dd>{@code | }<i>LocalDecimalSeparator</i> 263 * <i>Digit</i>{@code +} 264 * 265 * <dt><i>Exponent</i>: 266 * <dd>{@code ( [eE] [+-]? }<i>Digit</i>{@code + )} 267 * 268 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 269 * <dd>{@code ( [-+]? }<i>DecimalNumeral</i> 270 * <i>Exponent</i>{@code ? )} 271 * <dd>{@code | }<i>LocalPositivePrefix</i> 272 * <i>DecimalNumeral</i> 273 * <i>LocalPositiveSuffix</i> 274 * <i>Exponent</i>{@code ?} 275 * <dd>{@code | }<i>LocalNegativePrefix</i> 276 * <i>DecimalNumeral</i> 277 * <i>LocalNegativeSuffix</i> 278 * <i>Exponent</i>{@code ?} 279 * 280 * <dt><i>HexFloat</i>: 281 * <dd>{@code [-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 282 * ([pP][-+]?[0-9]+)?} 283 * 284 * <dt><i>NonNumber</i>: 285 * <dd>{@code NaN 286 * | }<i>LocalNan</i>{@code 287 * | Infinity 288 * | }<i>LocalInfinity</i> 289 * 290 * <dt><i>SignedNonNumber</i>: 291 * <dd>{@code ( [-+]? }<i>NonNumber</i>{@code )} 292 * <dd>{@code | }<i>LocalPositivePrefix</i> 293 * <i>NonNumber</i> 294 * <i>LocalPositiveSuffix</i> 295 * <dd>{@code | }<i>LocalNegativePrefix</i> 296 * <i>NonNumber</i> 297 * <i>LocalNegativeSuffix</i> 298 * 299 * <dt><a name="Float-regex"><i>Float</i></a>: 300 * <dd><i>Decimal</i> 301 * {@code | }<i>HexFloat</i> 302 * {@code | }<i>SignedNonNumber</i> 303 * 304 * </dl> 305 * <p>Whitespace is not significant in the above regular expressions. 306 * 307 * @since 1.5 308 */ 309public final class Scanner implements Iterator<String>, Closeable { 310 311 // Internal buffer used to hold input 312 private CharBuffer buf; 313 314 // Size of internal character buffer 315 private static final int BUFFER_SIZE = 1024; // change to 1024; 316 317 // The index into the buffer currently held by the Scanner 318 private int position; 319 320 // Internal matcher used for finding delimiters 321 private Matcher matcher; 322 323 // Pattern used to delimit tokens 324 private Pattern delimPattern; 325 326 // Pattern found in last hasNext operation 327 private Pattern hasNextPattern; 328 329 // Position after last hasNext operation 330 private int hasNextPosition; 331 332 // Result after last hasNext operation 333 private String hasNextResult; 334 335 // The input source 336 private Readable source; 337 338 // Boolean is true if source is done 339 private boolean sourceClosed = false; 340 341 // Boolean indicating more input is required 342 private boolean needInput = false; 343 344 // Boolean indicating if a delim has been skipped this operation 345 private boolean skipped = false; 346 347 // A store of a position that the scanner may fall back to 348 private int savedScannerPosition = -1; 349 350 // A cache of the last primitive type scanned 351 private Object typeCache = null; 352 353 // Boolean indicating if a match result is available 354 private boolean matchValid = false; 355 356 // Boolean indicating if this scanner has been closed 357 private boolean closed = false; 358 359 // The current radix used by this scanner 360 private int radix = 10; 361 362 // The default radix for this scanner 363 private int defaultRadix = 10; 364 365 // The locale used by this scanner 366 private Locale locale = null; 367 368 // A cache of the last few recently used Patterns 369 private LRUCache<String,Pattern> patternCache = 370 new LRUCache<String,Pattern>(7) { 371 protected Pattern create(String s) { 372 return Pattern.compile(s); 373 } 374 protected boolean hasName(Pattern p, String s) { 375 return p.pattern().equals(s); 376 } 377 }; 378 379 // A holder of the last IOException encountered 380 private IOException lastException; 381 382 // Number of times this scanner's state has been modified. 383 // Generally incremented on most public APIs and checked 384 // within spliterator implementations. 385 int modCount; 386 387 // A pattern for java whitespace 388 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 389 "\\p{javaWhitespace}+"); 390 391 // A pattern for any token 392 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 393 394 // A pattern for non-ASCII digits 395 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 396 "[\\p{javaDigit}&&[^0-9]]"); 397 398 // Fields and methods to support scanning primitive types 399 400 /** 401 * Locale dependent values used to scan numbers 402 */ 403 private String groupSeparator = "\\,"; 404 private String decimalSeparator = "\\."; 405 private String nanString = "NaN"; 406 private String infinityString = "Infinity"; 407 private String positivePrefix = ""; 408 private String negativePrefix = "\\-"; 409 private String positiveSuffix = ""; 410 private String negativeSuffix = ""; 411 412 /** 413 * Fields and an accessor method to match booleans 414 */ 415 private static volatile Pattern boolPattern; 416 private static final String BOOLEAN_PATTERN = "true|false"; 417 private static Pattern boolPattern() { 418 Pattern bp = boolPattern; 419 if (bp == null) 420 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 421 Pattern.CASE_INSENSITIVE); 422 return bp; 423 } 424 425 /** 426 * Fields and methods to match bytes, shorts, ints, and longs 427 */ 428 private Pattern integerPattern; 429 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 430 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 431 private int SIMPLE_GROUP_INDEX = 5; 432 private String buildIntegerPatternString() { 433 String radixDigits = digits.substring(0, radix); 434 // \\p{javaDigit} is not guaranteed to be appropriate 435 // here but what can we do? The final authority will be 436 // whatever parse method is invoked, so ultimately the 437 // Scanner will do the right thing 438 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 439 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 440 groupSeparator+digit+digit+digit+")+)"; 441 // digit++ is the possessive form which is necessary for reducing 442 // backtracking that would otherwise cause unacceptable performance 443 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 444 String javaStyleInteger = "([-+]?(" + numeral + "))"; 445 String negativeInteger = negativePrefix + numeral + negativeSuffix; 446 String positiveInteger = positivePrefix + numeral + positiveSuffix; 447 return "("+ javaStyleInteger + ")|(" + 448 positiveInteger + ")|(" + 449 negativeInteger + ")"; 450 } 451 private Pattern integerPattern() { 452 if (integerPattern == null) { 453 integerPattern = patternCache.forName(buildIntegerPatternString()); 454 } 455 return integerPattern; 456 } 457 458 /** 459 * Fields and an accessor method to match line separators 460 */ 461 private static volatile Pattern separatorPattern; 462 private static volatile Pattern linePattern; 463 private static final String LINE_SEPARATOR_PATTERN = 464 "\r\n|[\n\r\u2028\u2029\u0085]"; 465 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 466 467 private static Pattern separatorPattern() { 468 Pattern sp = separatorPattern; 469 if (sp == null) 470 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 471 return sp; 472 } 473 474 private static Pattern linePattern() { 475 Pattern lp = linePattern; 476 if (lp == null) 477 linePattern = lp = Pattern.compile(LINE_PATTERN); 478 return lp; 479 } 480 481 /** 482 * Fields and methods to match floats and doubles 483 */ 484 private Pattern floatPattern; 485 private Pattern decimalPattern; 486 private void buildFloatAndDecimalPattern() { 487 // \\p{javaDigit} may not be perfect, see above 488 String digit = "([0-9]|(\\p{javaDigit}))"; 489 String exponent = "([eE][+-]?"+digit+"+)?"; 490 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 491 groupSeparator+digit+digit+digit+")+)"; 492 // Once again digit++ is used for performance, as above 493 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 494 String decimalNumeral = "("+numeral+"|"+numeral + 495 decimalSeparator + digit + "*+|"+ decimalSeparator + 496 digit + "++)"; 497 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 498 infinityString+")"; 499 String positiveFloat = "(" + positivePrefix + decimalNumeral + 500 positiveSuffix + exponent + ")"; 501 String negativeFloat = "(" + negativePrefix + decimalNumeral + 502 negativeSuffix + exponent + ")"; 503 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 504 positiveFloat + "|" + negativeFloat + ")"; 505 String hexFloat = 506 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 507 String positiveNonNumber = "(" + positivePrefix + nonNumber + 508 positiveSuffix + ")"; 509 String negativeNonNumber = "(" + negativePrefix + nonNumber + 510 negativeSuffix + ")"; 511 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 512 positiveNonNumber + "|" + 513 negativeNonNumber + ")"; 514 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 515 signedNonNumber); 516 decimalPattern = Pattern.compile(decimal); 517 } 518 private Pattern floatPattern() { 519 if (floatPattern == null) { 520 buildFloatAndDecimalPattern(); 521 } 522 return floatPattern; 523 } 524 private Pattern decimalPattern() { 525 if (decimalPattern == null) { 526 buildFloatAndDecimalPattern(); 527 } 528 return decimalPattern; 529 } 530 531 // Constructors 532 533 /** 534 * Constructs a {@code Scanner} that returns values scanned 535 * from the specified source delimited by the specified pattern. 536 * 537 * @param source A character source implementing the Readable interface 538 * @param pattern A delimiting pattern 539 */ 540 private Scanner(Readable source, Pattern pattern) { 541 assert source != null : "source should not be null"; 542 assert pattern != null : "pattern should not be null"; 543 this.source = source; 544 delimPattern = pattern; 545 buf = CharBuffer.allocate(BUFFER_SIZE); 546 buf.limit(0); 547 matcher = delimPattern.matcher(buf); 548 matcher.useTransparentBounds(true); 549 matcher.useAnchoringBounds(false); 550 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 551 } 552 553 /** 554 * Constructs a new {@code Scanner} that produces values scanned 555 * from the specified source. 556 * 557 * @param source A character source implementing the {@link Readable} 558 * interface 559 */ 560 public Scanner(Readable source) { 561 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 562 } 563 564 /** 565 * Constructs a new {@code Scanner} that produces values scanned 566 * from the specified input stream. Bytes from the stream are converted 567 * into characters using the underlying platform's 568 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 569 * 570 * @param source An input stream to be scanned 571 */ 572 public Scanner(InputStream source) { 573 this(new InputStreamReader(source), WHITESPACE_PATTERN); 574 } 575 576 /** 577 * Constructs a new {@code Scanner} that produces values scanned 578 * from the specified input stream. Bytes from the stream are converted 579 * into characters using the specified charset. 580 * 581 * @param source An input stream to be scanned 582 * @param charsetName The encoding type used to convert bytes from the 583 * stream into characters to be scanned 584 * @throws IllegalArgumentException if the specified character set 585 * does not exist 586 */ 587 public Scanner(InputStream source, String charsetName) { 588 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 589 WHITESPACE_PATTERN); 590 } 591 592 /** 593 * Returns a charset object for the given charset name. 594 * @throws NullPointerException is csn is null 595 * @throws IllegalArgumentException if the charset is not supported 596 */ 597 private static Charset toCharset(String csn) { 598 Objects.requireNonNull(csn, "charsetName"); 599 try { 600 return Charset.forName(csn); 601 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 602 // IllegalArgumentException should be thrown 603 throw new IllegalArgumentException(e); 604 } 605 } 606 607 private static Readable makeReadable(InputStream source, Charset charset) { 608 return new InputStreamReader(source, charset); 609 } 610 611 /** 612 * Constructs a new {@code Scanner} that produces values scanned 613 * from the specified file. Bytes from the file are converted into 614 * characters using the underlying platform's 615 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 616 * 617 * @param source A file to be scanned 618 * @throws FileNotFoundException if source is not found 619 */ 620 public Scanner(File source) throws FileNotFoundException { 621 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 622 } 623 624 /** 625 * Constructs a new {@code Scanner} that produces values scanned 626 * from the specified file. Bytes from the file are converted into 627 * characters using the specified charset. 628 * 629 * @param source A file to be scanned 630 * @param charsetName The encoding type used to convert bytes from the file 631 * into characters to be scanned 632 * @throws FileNotFoundException if source is not found 633 * @throws IllegalArgumentException if the specified encoding is 634 * not found 635 */ 636 public Scanner(File source, String charsetName) 637 throws FileNotFoundException 638 { 639 this(Objects.requireNonNull(source), toDecoder(charsetName)); 640 } 641 642 private Scanner(File source, CharsetDecoder dec) 643 throws FileNotFoundException 644 { 645 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 646 } 647 648 private static CharsetDecoder toDecoder(String charsetName) { 649 Objects.requireNonNull(charsetName, "charsetName"); 650 try { 651 return Charset.forName(charsetName).newDecoder(); 652 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 653 throw new IllegalArgumentException(charsetName); 654 } 655 } 656 657 private static Readable makeReadable(ReadableByteChannel source, 658 CharsetDecoder dec) { 659 return Channels.newReader(source, dec, -1); 660 } 661 662 /** 663 * Constructs a new {@code Scanner} that produces values scanned 664 * from the specified file. Bytes from the file are converted into 665 * characters using the underlying platform's 666 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 667 * 668 * @param source 669 * the path to the file to be scanned 670 * @throws IOException 671 * if an I/O error occurs opening source 672 * 673 * @since 1.7 674 */ 675 public Scanner(Path source) 676 throws IOException 677 { 678 this(Files.newInputStream(source)); 679 } 680 681 /** 682 * Constructs a new {@code Scanner} that produces values scanned 683 * from the specified file. Bytes from the file are converted into 684 * characters using the specified charset. 685 * 686 * @param source 687 * the path to the file to be scanned 688 * @param charsetName 689 * The encoding type used to convert bytes from the file 690 * into characters to be scanned 691 * @throws IOException 692 * if an I/O error occurs opening source 693 * @throws IllegalArgumentException 694 * if the specified encoding is not found 695 * @since 1.7 696 */ 697 public Scanner(Path source, String charsetName) throws IOException { 698 this(Objects.requireNonNull(source), toCharset(charsetName)); 699 } 700 701 private Scanner(Path source, Charset charset) throws IOException { 702 this(makeReadable(Files.newInputStream(source), charset)); 703 } 704 705 /** 706 * Constructs a new {@code Scanner} that produces values scanned 707 * from the specified string. 708 * 709 * @param source A string to scan 710 */ 711 public Scanner(String source) { 712 this(new StringReader(source), WHITESPACE_PATTERN); 713 } 714 715 /** 716 * Constructs a new {@code Scanner} that produces values scanned 717 * from the specified channel. Bytes from the source are converted into 718 * characters using the underlying platform's 719 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 720 * 721 * @param source A channel to scan 722 */ 723 public Scanner(ReadableByteChannel source) { 724 this(makeReadable(Objects.requireNonNull(source, "source")), 725 WHITESPACE_PATTERN); 726 } 727 728 private static Readable makeReadable(ReadableByteChannel source) { 729 return makeReadable(source, Charset.defaultCharset().newDecoder()); 730 } 731 732 /** 733 * Constructs a new {@code Scanner} that produces values scanned 734 * from the specified channel. Bytes from the source are converted into 735 * characters using the specified charset. 736 * 737 * @param source A channel to scan 738 * @param charsetName The encoding type used to convert bytes from the 739 * channel into characters to be scanned 740 * @throws IllegalArgumentException if the specified character set 741 * does not exist 742 */ 743 public Scanner(ReadableByteChannel source, String charsetName) { 744 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 745 WHITESPACE_PATTERN); 746 } 747 748 // Private primitives used to support scanning 749 750 private void saveState() { 751 savedScannerPosition = position; 752 } 753 754 private void revertState() { 755 this.position = savedScannerPosition; 756 savedScannerPosition = -1; 757 skipped = false; 758 } 759 760 private boolean revertState(boolean b) { 761 this.position = savedScannerPosition; 762 savedScannerPosition = -1; 763 skipped = false; 764 return b; 765 } 766 767 private void cacheResult() { 768 hasNextResult = matcher.group(); 769 hasNextPosition = matcher.end(); 770 hasNextPattern = matcher.pattern(); 771 } 772 773 private void cacheResult(String result) { 774 hasNextResult = result; 775 hasNextPosition = matcher.end(); 776 hasNextPattern = matcher.pattern(); 777 } 778 779 // Clears both regular cache and type cache 780 private void clearCaches() { 781 hasNextPattern = null; 782 typeCache = null; 783 } 784 785 // Also clears both the regular cache and the type cache 786 private String getCachedResult() { 787 position = hasNextPosition; 788 hasNextPattern = null; 789 typeCache = null; 790 return hasNextResult; 791 } 792 793 // Also clears both the regular cache and the type cache 794 private void useTypeCache() { 795 if (closed) 796 throw new IllegalStateException("Scanner closed"); 797 position = hasNextPosition; 798 hasNextPattern = null; 799 typeCache = null; 800 } 801 802 // Tries to read more input. May block. 803 private void readInput() { 804 if (buf.limit() == buf.capacity()) 805 makeSpace(); 806 807 // Prepare to receive data 808 int p = buf.position(); 809 buf.position(buf.limit()); 810 buf.limit(buf.capacity()); 811 812 int n = 0; 813 try { 814 n = source.read(buf); 815 } catch (IOException ioe) { 816 lastException = ioe; 817 n = -1; 818 } 819 820 if (n == -1) { 821 sourceClosed = true; 822 needInput = false; 823 } 824 825 if (n > 0) 826 needInput = false; 827 828 // Restore current position and limit for reading 829 buf.limit(buf.position()); 830 buf.position(p); 831 } 832 833 // After this method is called there will either be an exception 834 // or else there will be space in the buffer 835 private boolean makeSpace() { 836 clearCaches(); 837 int offset = savedScannerPosition == -1 ? 838 position : savedScannerPosition; 839 buf.position(offset); 840 // Gain space by compacting buffer 841 if (offset > 0) { 842 buf.compact(); 843 translateSavedIndexes(offset); 844 position -= offset; 845 buf.flip(); 846 return true; 847 } 848 // Gain space by growing buffer 849 int newSize = buf.capacity() * 2; 850 CharBuffer newBuf = CharBuffer.allocate(newSize); 851 newBuf.put(buf); 852 newBuf.flip(); 853 translateSavedIndexes(offset); 854 position -= offset; 855 buf = newBuf; 856 matcher.reset(buf); 857 return true; 858 } 859 860 // When a buffer compaction/reallocation occurs the saved indexes must 861 // be modified appropriately 862 private void translateSavedIndexes(int offset) { 863 if (savedScannerPosition != -1) 864 savedScannerPosition -= offset; 865 } 866 867 // If we are at the end of input then NoSuchElement; 868 // If there is still input left then InputMismatch 869 private void throwFor() { 870 skipped = false; 871 if ((sourceClosed) && (position == buf.limit())) 872 throw new NoSuchElementException(); 873 else 874 throw new InputMismatchException(); 875 } 876 877 // Returns true if a complete token or partial token is in the buffer. 878 // It is not necessary to find a complete token since a partial token 879 // means that there will be another token with or without more input. 880 private boolean hasTokenInBuffer() { 881 matchValid = false; 882 matcher.usePattern(delimPattern); 883 matcher.region(position, buf.limit()); 884 885 // Skip delims first 886 if (matcher.lookingAt()) 887 position = matcher.end(); 888 889 // If we are sitting at the end, no more tokens in buffer 890 if (position == buf.limit()) 891 return false; 892 893 return true; 894 } 895 896 /* 897 * Returns a "complete token" that matches the specified pattern 898 * 899 * A token is complete if surrounded by delims; a partial token 900 * is prefixed by delims but not postfixed by them 901 * 902 * The position is advanced to the end of that complete token 903 * 904 * Pattern == null means accept any token at all 905 * 906 * Triple return: 907 * 1. valid string means it was found 908 * 2. null with needInput=false means we won't ever find it 909 * 3. null with needInput=true means try again after readInput 910 */ 911 private String getCompleteTokenInBuffer(Pattern pattern) { 912 matchValid = false; 913 914 // Skip delims first 915 matcher.usePattern(delimPattern); 916 if (!skipped) { // Enforcing only one skip of leading delims 917 matcher.region(position, buf.limit()); 918 if (matcher.lookingAt()) { 919 // If more input could extend the delimiters then we must wait 920 // for more input 921 if (matcher.hitEnd() && !sourceClosed) { 922 needInput = true; 923 return null; 924 } 925 // The delims were whole and the matcher should skip them 926 skipped = true; 927 position = matcher.end(); 928 } 929 } 930 931 // If we are sitting at the end, no more tokens in buffer 932 if (position == buf.limit()) { 933 if (sourceClosed) 934 return null; 935 needInput = true; 936 return null; 937 } 938 939 // Must look for next delims. Simply attempting to match the 940 // pattern at this point may find a match but it might not be 941 // the first longest match because of missing input, or it might 942 // match a partial token instead of the whole thing. 943 944 // Then look for next delims 945 matcher.region(position, buf.limit()); 946 boolean foundNextDelim = matcher.find(); 947 if (foundNextDelim && (matcher.end() == position)) { 948 // Zero length delimiter match; we should find the next one 949 // using the automatic advance past a zero length match; 950 // Otherwise we have just found the same one we just skipped 951 foundNextDelim = matcher.find(); 952 } 953 if (foundNextDelim) { 954 // In the rare case that more input could cause the match 955 // to be lost and there is more input coming we must wait 956 // for more input. Note that hitting the end is okay as long 957 // as the match cannot go away. It is the beginning of the 958 // next delims we want to be sure about, we don't care if 959 // they potentially extend further. 960 if (matcher.requireEnd() && !sourceClosed) { 961 needInput = true; 962 return null; 963 } 964 int tokenEnd = matcher.start(); 965 // There is a complete token. 966 if (pattern == null) { 967 // Must continue with match to provide valid MatchResult 968 pattern = FIND_ANY_PATTERN; 969 } 970 // Attempt to match against the desired pattern 971 matcher.usePattern(pattern); 972 matcher.region(position, tokenEnd); 973 if (matcher.matches()) { 974 String s = matcher.group(); 975 position = matcher.end(); 976 return s; 977 } else { // Complete token but it does not match 978 return null; 979 } 980 } 981 982 // If we can't find the next delims but no more input is coming, 983 // then we can treat the remainder as a whole token 984 if (sourceClosed) { 985 if (pattern == null) { 986 // Must continue with match to provide valid MatchResult 987 pattern = FIND_ANY_PATTERN; 988 } 989 // Last token; Match the pattern here or throw 990 matcher.usePattern(pattern); 991 matcher.region(position, buf.limit()); 992 if (matcher.matches()) { 993 String s = matcher.group(); 994 position = matcher.end(); 995 return s; 996 } 997 // Last piece does not match 998 return null; 999 } 1000 1001 // There is a partial token in the buffer; must read more 1002 // to complete it 1003 needInput = true; 1004 return null; 1005 } 1006 1007 // Finds the specified pattern in the buffer up to horizon. 1008 // Returns true if the specified input pattern was matched, 1009 // and leaves the matcher field with the current match state. 1010 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1011 matchValid = false; 1012 matcher.usePattern(pattern); 1013 int bufferLimit = buf.limit(); 1014 int horizonLimit = -1; 1015 int searchLimit = bufferLimit; 1016 if (horizon > 0) { 1017 horizonLimit = position + horizon; 1018 if (horizonLimit < bufferLimit) 1019 searchLimit = horizonLimit; 1020 } 1021 matcher.region(position, searchLimit); 1022 if (matcher.find()) { 1023 if (matcher.hitEnd() && (!sourceClosed)) { 1024 // The match may be longer if didn't hit horizon or real end 1025 if (searchLimit != horizonLimit) { 1026 // Hit an artificial end; try to extend the match 1027 needInput = true; 1028 return false; 1029 } 1030 // The match could go away depending on what is next 1031 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1032 // Rare case: we hit the end of input and it happens 1033 // that it is at the horizon and the end of input is 1034 // required for the match. 1035 needInput = true; 1036 return false; 1037 } 1038 } 1039 // Did not hit end, or hit real end, or hit horizon 1040 position = matcher.end(); 1041 return true; 1042 } 1043 1044 if (sourceClosed) 1045 return false; 1046 1047 // If there is no specified horizon, or if we have not searched 1048 // to the specified horizon yet, get more input 1049 if ((horizon == 0) || (searchLimit != horizonLimit)) 1050 needInput = true; 1051 return false; 1052 } 1053 1054 // Attempts to match a pattern anchored at the current position. 1055 // Returns true if the specified input pattern was matched, 1056 // and leaves the matcher field with the current match state. 1057 private boolean matchPatternInBuffer(Pattern pattern) { 1058 matchValid = false; 1059 matcher.usePattern(pattern); 1060 matcher.region(position, buf.limit()); 1061 if (matcher.lookingAt()) { 1062 if (matcher.hitEnd() && (!sourceClosed)) { 1063 // Get more input and try again 1064 needInput = true; 1065 return false; 1066 } 1067 position = matcher.end(); 1068 return true; 1069 } 1070 1071 if (sourceClosed) 1072 return false; 1073 1074 // Read more to find pattern 1075 needInput = true; 1076 return false; 1077 } 1078 1079 // Throws if the scanner is closed 1080 private void ensureOpen() { 1081 if (closed) 1082 throw new IllegalStateException("Scanner closed"); 1083 } 1084 1085 // Public methods 1086 1087 /** 1088 * Closes this scanner. 1089 * 1090 * <p> If this scanner has not yet been closed then if its underlying 1091 * {@linkplain java.lang.Readable readable} also implements the {@link 1092 * java.io.Closeable} interface then the readable's {@code close} method 1093 * will be invoked. If this scanner is already closed then invoking this 1094 * method will have no effect. 1095 * 1096 * <p>Attempting to perform search operations after a scanner has 1097 * been closed will result in an {@link IllegalStateException}. 1098 * 1099 */ 1100 public void close() { 1101 if (closed) 1102 return; 1103 if (source instanceof Closeable) { 1104 try { 1105 ((Closeable)source).close(); 1106 } catch (IOException ioe) { 1107 lastException = ioe; 1108 } 1109 } 1110 sourceClosed = true; 1111 source = null; 1112 closed = true; 1113 } 1114 1115 /** 1116 * Returns the {@code IOException} last thrown by this 1117 * {@code Scanner}'s underlying {@code Readable}. This method 1118 * returns {@code null} if no such exception exists. 1119 * 1120 * @return the last exception thrown by this scanner's readable 1121 */ 1122 public IOException ioException() { 1123 return lastException; 1124 } 1125 1126 /** 1127 * Returns the {@code Pattern} this {@code Scanner} is currently 1128 * using to match delimiters. 1129 * 1130 * @return this scanner's delimiting pattern. 1131 */ 1132 public Pattern delimiter() { 1133 return delimPattern; 1134 } 1135 1136 /** 1137 * Sets this scanner's delimiting pattern to the specified pattern. 1138 * 1139 * @param pattern A delimiting pattern 1140 * @return this scanner 1141 */ 1142 public Scanner useDelimiter(Pattern pattern) { 1143 modCount++; 1144 delimPattern = pattern; 1145 return this; 1146 } 1147 1148 /** 1149 * Sets this scanner's delimiting pattern to a pattern constructed from 1150 * the specified {@code String}. 1151 * 1152 * <p> An invocation of this method of the form 1153 * {@code useDelimiter(pattern)} behaves in exactly the same way as the 1154 * invocation {@code useDelimiter(Pattern.compile(pattern))}. 1155 * 1156 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1157 * to the <a href= "#default-delimiter">default</a>. 1158 * 1159 * @param pattern A string specifying a delimiting pattern 1160 * @return this scanner 1161 */ 1162 public Scanner useDelimiter(String pattern) { 1163 modCount++; 1164 delimPattern = patternCache.forName(pattern); 1165 return this; 1166 } 1167 1168 /** 1169 * Returns this scanner's locale. 1170 * 1171 * <p>A scanner's locale affects many elements of its default 1172 * primitive matching regular expressions; see 1173 * <a href= "#localized-numbers">localized numbers</a> above. 1174 * 1175 * @return this scanner's locale 1176 */ 1177 public Locale locale() { 1178 return this.locale; 1179 } 1180 1181 /** 1182 * Sets this scanner's locale to the specified locale. 1183 * 1184 * <p>A scanner's locale affects many elements of its default 1185 * primitive matching regular expressions; see 1186 * <a href= "#localized-numbers">localized numbers</a> above. 1187 * 1188 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1189 * the <a href= "#initial-locale">initial locale</a>. 1190 * 1191 * @param locale A string specifying the locale to use 1192 * @return this scanner 1193 */ 1194 public Scanner useLocale(Locale locale) { 1195 if (locale.equals(this.locale)) 1196 return this; 1197 1198 modCount++; 1199 this.locale = locale; 1200 DecimalFormat df = 1201 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1202 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1203 1204 // These must be literalized to avoid collision with regex 1205 // metacharacters such as dot or parenthesis 1206 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1207 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1208 1209 // Quoting the nonzero length locale-specific things 1210 // to avoid potential conflict with metacharacters 1211 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1212 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1213 positivePrefix = df.getPositivePrefix(); 1214 if (positivePrefix.length() > 0) 1215 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1216 negativePrefix = df.getNegativePrefix(); 1217 if (negativePrefix.length() > 0) 1218 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1219 positiveSuffix = df.getPositiveSuffix(); 1220 if (positiveSuffix.length() > 0) 1221 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1222 negativeSuffix = df.getNegativeSuffix(); 1223 if (negativeSuffix.length() > 0) 1224 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1225 1226 // Force rebuilding and recompilation of locale dependent 1227 // primitive patterns 1228 integerPattern = null; 1229 floatPattern = null; 1230 1231 return this; 1232 } 1233 1234 /** 1235 * Returns this scanner's default radix. 1236 * 1237 * <p>A scanner's radix affects elements of its default 1238 * number matching regular expressions; see 1239 * <a href= "#localized-numbers">localized numbers</a> above. 1240 * 1241 * @return the default radix of this scanner 1242 */ 1243 public int radix() { 1244 return this.defaultRadix; 1245 } 1246 1247 /** 1248 * Sets this scanner's default radix to the specified radix. 1249 * 1250 * <p>A scanner's radix affects elements of its default 1251 * number matching regular expressions; see 1252 * <a href= "#localized-numbers">localized numbers</a> above. 1253 * 1254 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1255 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1256 * {@code IllegalArgumentException} is thrown. 1257 * 1258 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1259 * {@code 10}. 1260 * 1261 * @param radix The radix to use when scanning numbers 1262 * @return this scanner 1263 * @throws IllegalArgumentException if radix is out of range 1264 */ 1265 public Scanner useRadix(int radix) { 1266 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1267 throw new IllegalArgumentException("radix:"+radix); 1268 1269 if (this.defaultRadix == radix) 1270 return this; 1271 modCount++; 1272 this.defaultRadix = radix; 1273 // Force rebuilding and recompilation of radix dependent patterns 1274 integerPattern = null; 1275 return this; 1276 } 1277 1278 // The next operation should occur in the specified radix but 1279 // the default is left untouched. 1280 private void setRadix(int radix) { 1281 if (this.radix != radix) { 1282 // Force rebuilding and recompilation of radix dependent patterns 1283 integerPattern = null; 1284 this.radix = radix; 1285 } 1286 } 1287 1288 /** 1289 * Returns the match result of the last scanning operation performed 1290 * by this scanner. This method throws {@code IllegalStateException} 1291 * if no match has been performed, or if the last match was 1292 * not successful. 1293 * 1294 * <p>The various {@code next} methods of {@code Scanner} 1295 * make a match result available if they complete without throwing an 1296 * exception. For instance, after an invocation of the {@link #nextInt} 1297 * method that returned an int, this method returns a 1298 * {@code MatchResult} for the search of the 1299 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1300 * defined above. Similarly the {@link #findInLine findInLine()}, 1301 * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()} 1302 * methods will make a match available if they succeed. 1303 * 1304 * @return a match result for the last match operation 1305 * @throws IllegalStateException If no match result is available 1306 */ 1307 public MatchResult match() { 1308 if (!matchValid) 1309 throw new IllegalStateException("No match result available"); 1310 return matcher.toMatchResult(); 1311 } 1312 1313 /** 1314 * <p>Returns the string representation of this {@code Scanner}. The 1315 * string representation of a {@code Scanner} contains information 1316 * that may be useful for debugging. The exact format is unspecified. 1317 * 1318 * @return The string representation of this scanner 1319 */ 1320 public String toString() { 1321 StringBuilder sb = new StringBuilder(); 1322 sb.append("java.util.Scanner"); 1323 sb.append("[delimiters=" + delimPattern + "]"); 1324 sb.append("[position=" + position + "]"); 1325 sb.append("[match valid=" + matchValid + "]"); 1326 sb.append("[need input=" + needInput + "]"); 1327 sb.append("[source closed=" + sourceClosed + "]"); 1328 sb.append("[skipped=" + skipped + "]"); 1329 sb.append("[group separator=" + groupSeparator + "]"); 1330 sb.append("[decimal separator=" + decimalSeparator + "]"); 1331 sb.append("[positive prefix=" + positivePrefix + "]"); 1332 sb.append("[negative prefix=" + negativePrefix + "]"); 1333 sb.append("[positive suffix=" + positiveSuffix + "]"); 1334 sb.append("[negative suffix=" + negativeSuffix + "]"); 1335 sb.append("[NaN string=" + nanString + "]"); 1336 sb.append("[infinity string=" + infinityString + "]"); 1337 return sb.toString(); 1338 } 1339 1340 /** 1341 * Returns true if this scanner has another token in its input. 1342 * This method may block while waiting for input to scan. 1343 * The scanner does not advance past any input. 1344 * 1345 * @return true if and only if this scanner has another token 1346 * @throws IllegalStateException if this scanner is closed 1347 * @see java.util.Iterator 1348 */ 1349 public boolean hasNext() { 1350 ensureOpen(); 1351 saveState(); 1352 modCount++; 1353 while (!sourceClosed) { 1354 if (hasTokenInBuffer()) 1355 return revertState(true); 1356 readInput(); 1357 } 1358 boolean result = hasTokenInBuffer(); 1359 return revertState(result); 1360 } 1361 1362 /** 1363 * Finds and returns the next complete token from this scanner. 1364 * A complete token is preceded and followed by input that matches 1365 * the delimiter pattern. This method may block while waiting for input 1366 * to scan, even if a previous invocation of {@link #hasNext} returned 1367 * {@code true}. 1368 * 1369 * @return the next token 1370 * @throws NoSuchElementException if no more tokens are available 1371 * @throws IllegalStateException if this scanner is closed 1372 * @see java.util.Iterator 1373 */ 1374 public String next() { 1375 ensureOpen(); 1376 clearCaches(); 1377 modCount++; 1378 1379 while (true) { 1380 String token = getCompleteTokenInBuffer(null); 1381 if (token != null) { 1382 matchValid = true; 1383 skipped = false; 1384 return token; 1385 } 1386 if (needInput) 1387 readInput(); 1388 else 1389 throwFor(); 1390 } 1391 } 1392 1393 /** 1394 * The remove operation is not supported by this implementation of 1395 * {@code Iterator}. 1396 * 1397 * @throws UnsupportedOperationException if this method is invoked. 1398 * @see java.util.Iterator 1399 */ 1400 public void remove() { 1401 throw new UnsupportedOperationException(); 1402 } 1403 1404 /** 1405 * Returns true if the next token matches the pattern constructed from the 1406 * specified string. The scanner does not advance past any input. 1407 * 1408 * <p> An invocation of this method of the form {@code hasNext(pattern)} 1409 * behaves in exactly the same way as the invocation 1410 * {@code hasNext(Pattern.compile(pattern))}. 1411 * 1412 * @param pattern a string specifying the pattern to scan 1413 * @return true if and only if this scanner has another token matching 1414 * the specified pattern 1415 * @throws IllegalStateException if this scanner is closed 1416 */ 1417 public boolean hasNext(String pattern) { 1418 return hasNext(patternCache.forName(pattern)); 1419 } 1420 1421 /** 1422 * Returns the next token if it matches the pattern constructed from the 1423 * specified string. If the match is successful, the scanner advances 1424 * past the input that matched the pattern. 1425 * 1426 * <p> An invocation of this method of the form {@code next(pattern)} 1427 * behaves in exactly the same way as the invocation 1428 * {@code next(Pattern.compile(pattern))}. 1429 * 1430 * @param pattern a string specifying the pattern to scan 1431 * @return the next token 1432 * @throws NoSuchElementException if no such tokens are available 1433 * @throws IllegalStateException if this scanner is closed 1434 */ 1435 public String next(String pattern) { 1436 return next(patternCache.forName(pattern)); 1437 } 1438 1439 /** 1440 * Returns true if the next complete token matches the specified pattern. 1441 * A complete token is prefixed and postfixed by input that matches 1442 * the delimiter pattern. This method may block while waiting for input. 1443 * The scanner does not advance past any input. 1444 * 1445 * @param pattern the pattern to scan for 1446 * @return true if and only if this scanner has another token matching 1447 * the specified pattern 1448 * @throws IllegalStateException if this scanner is closed 1449 */ 1450 public boolean hasNext(Pattern pattern) { 1451 ensureOpen(); 1452 if (pattern == null) 1453 throw new NullPointerException(); 1454 hasNextPattern = null; 1455 saveState(); 1456 modCount++; 1457 1458 while (true) { 1459 if (getCompleteTokenInBuffer(pattern) != null) { 1460 matchValid = true; 1461 cacheResult(); 1462 return revertState(true); 1463 } 1464 if (needInput) 1465 readInput(); 1466 else 1467 return revertState(false); 1468 } 1469 } 1470 1471 /** 1472 * Returns the next token if it matches the specified pattern. This 1473 * method may block while waiting for input to scan, even if a previous 1474 * invocation of {@link #hasNext(Pattern)} returned {@code true}. 1475 * If the match is successful, the scanner advances past the input that 1476 * matched the pattern. 1477 * 1478 * @param pattern the pattern to scan for 1479 * @return the next token 1480 * @throws NoSuchElementException if no more tokens are available 1481 * @throws IllegalStateException if this scanner is closed 1482 */ 1483 public String next(Pattern pattern) { 1484 ensureOpen(); 1485 if (pattern == null) 1486 throw new NullPointerException(); 1487 1488 modCount++; 1489 // Did we already find this pattern? 1490 if (hasNextPattern == pattern) 1491 return getCachedResult(); 1492 clearCaches(); 1493 1494 // Search for the pattern 1495 while (true) { 1496 String token = getCompleteTokenInBuffer(pattern); 1497 if (token != null) { 1498 matchValid = true; 1499 skipped = false; 1500 return token; 1501 } 1502 if (needInput) 1503 readInput(); 1504 else 1505 throwFor(); 1506 } 1507 } 1508 1509 /** 1510 * Returns true if there is another line in the input of this scanner. 1511 * This method may block while waiting for input. The scanner does not 1512 * advance past any input. 1513 * 1514 * @return true if and only if this scanner has another line of input 1515 * @throws IllegalStateException if this scanner is closed 1516 */ 1517 public boolean hasNextLine() { 1518 saveState(); 1519 1520 modCount++; 1521 String result = findWithinHorizon(linePattern(), 0); 1522 if (result != null) { 1523 MatchResult mr = this.match(); 1524 String lineSep = mr.group(1); 1525 if (lineSep != null) { 1526 result = result.substring(0, result.length() - 1527 lineSep.length()); 1528 cacheResult(result); 1529 1530 } else { 1531 cacheResult(); 1532 } 1533 } 1534 revertState(); 1535 return (result != null); 1536 } 1537 1538 /** 1539 * Advances this scanner past the current line and returns the input 1540 * that was skipped. 1541 * 1542 * This method returns the rest of the current line, excluding any line 1543 * separator at the end. The position is set to the beginning of the next 1544 * line. 1545 * 1546 * <p>Since this method continues to search through the input looking 1547 * for a line separator, it may buffer all of the input searching for 1548 * the line to skip if no line separators are present. 1549 * 1550 * @return the line that was skipped 1551 * @throws NoSuchElementException if no line was found 1552 * @throws IllegalStateException if this scanner is closed 1553 */ 1554 public String nextLine() { 1555 modCount++; 1556 if (hasNextPattern == linePattern()) 1557 return getCachedResult(); 1558 clearCaches(); 1559 1560 String result = findWithinHorizon(linePattern, 0); 1561 if (result == null) 1562 throw new NoSuchElementException("No line found"); 1563 MatchResult mr = this.match(); 1564 String lineSep = mr.group(1); 1565 if (lineSep != null) 1566 result = result.substring(0, result.length() - lineSep.length()); 1567 if (result == null) 1568 throw new NoSuchElementException(); 1569 else 1570 return result; 1571 } 1572 1573 // Public methods that ignore delimiters 1574 1575 /** 1576 * Attempts to find the next occurrence of a pattern constructed from the 1577 * specified string, ignoring delimiters. 1578 * 1579 * <p>An invocation of this method of the form {@code findInLine(pattern)} 1580 * behaves in exactly the same way as the invocation 1581 * {@code findInLine(Pattern.compile(pattern))}. 1582 * 1583 * @param pattern a string specifying the pattern to search for 1584 * @return the text that matched the specified pattern 1585 * @throws IllegalStateException if this scanner is closed 1586 */ 1587 public String findInLine(String pattern) { 1588 return findInLine(patternCache.forName(pattern)); 1589 } 1590 1591 /** 1592 * Attempts to find the next occurrence of the specified pattern ignoring 1593 * delimiters. If the pattern is found before the next line separator, the 1594 * scanner advances past the input that matched and returns the string that 1595 * matched the pattern. 1596 * If no such pattern is detected in the input up to the next line 1597 * separator, then {@code null} is returned and the scanner's 1598 * position is unchanged. This method may block waiting for input that 1599 * matches the pattern. 1600 * 1601 * <p>Since this method continues to search through the input looking 1602 * for the specified pattern, it may buffer all of the input searching for 1603 * the desired token if no line separators are present. 1604 * 1605 * @param pattern the pattern to scan for 1606 * @return the text that matched the specified pattern 1607 * @throws IllegalStateException if this scanner is closed 1608 */ 1609 public String findInLine(Pattern pattern) { 1610 ensureOpen(); 1611 if (pattern == null) 1612 throw new NullPointerException(); 1613 clearCaches(); 1614 modCount++; 1615 // Expand buffer to include the next newline or end of input 1616 int endPosition = 0; 1617 saveState(); 1618 while (true) { 1619 if (findPatternInBuffer(separatorPattern(), 0)) { 1620 endPosition = matcher.start(); 1621 break; // up to next newline 1622 } 1623 if (needInput) { 1624 readInput(); 1625 } else { 1626 endPosition = buf.limit(); 1627 break; // up to end of input 1628 } 1629 } 1630 revertState(); 1631 int horizonForLine = endPosition - position; 1632 // If there is nothing between the current pos and the next 1633 // newline simply return null, invoking findWithinHorizon 1634 // with "horizon=0" will scan beyond the line bound. 1635 if (horizonForLine == 0) 1636 return null; 1637 // Search for the pattern 1638 return findWithinHorizon(pattern, horizonForLine); 1639 } 1640 1641 /** 1642 * Attempts to find the next occurrence of a pattern constructed from the 1643 * specified string, ignoring delimiters. 1644 * 1645 * <p>An invocation of this method of the form 1646 * {@code findWithinHorizon(pattern)} behaves in exactly the same way as 1647 * the invocation 1648 * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}. 1649 * 1650 * @param pattern a string specifying the pattern to search for 1651 * @param horizon the search horizon 1652 * @return the text that matched the specified pattern 1653 * @throws IllegalStateException if this scanner is closed 1654 * @throws IllegalArgumentException if horizon is negative 1655 */ 1656 public String findWithinHorizon(String pattern, int horizon) { 1657 return findWithinHorizon(patternCache.forName(pattern), horizon); 1658 } 1659 1660 /** 1661 * Attempts to find the next occurrence of the specified pattern. 1662 * 1663 * <p>This method searches through the input up to the specified 1664 * search horizon, ignoring delimiters. If the pattern is found the 1665 * scanner advances past the input that matched and returns the string 1666 * that matched the pattern. If no such pattern is detected then the 1667 * null is returned and the scanner's position remains unchanged. This 1668 * method may block waiting for input that matches the pattern. 1669 * 1670 * <p>A scanner will never search more than {@code horizon} code 1671 * points beyond its current position. Note that a match may be clipped 1672 * by the horizon; that is, an arbitrary match result may have been 1673 * different if the horizon had been larger. The scanner treats the 1674 * horizon as a transparent, non-anchoring bound (see {@link 1675 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1676 * 1677 * <p>If horizon is {@code 0}, then the horizon is ignored and 1678 * this method continues to search through the input looking for the 1679 * specified pattern without bound. In this case it may buffer all of 1680 * the input searching for the pattern. 1681 * 1682 * <p>If horizon is negative, then an IllegalArgumentException is 1683 * thrown. 1684 * 1685 * @param pattern the pattern to scan for 1686 * @param horizon the search horizon 1687 * @return the text that matched the specified pattern 1688 * @throws IllegalStateException if this scanner is closed 1689 * @throws IllegalArgumentException if horizon is negative 1690 */ 1691 public String findWithinHorizon(Pattern pattern, int horizon) { 1692 ensureOpen(); 1693 if (pattern == null) 1694 throw new NullPointerException(); 1695 if (horizon < 0) 1696 throw new IllegalArgumentException("horizon < 0"); 1697 clearCaches(); 1698 modCount++; 1699 1700 // Search for the pattern 1701 while (true) { 1702 if (findPatternInBuffer(pattern, horizon)) { 1703 matchValid = true; 1704 return matcher.group(); 1705 } 1706 if (needInput) 1707 readInput(); 1708 else 1709 break; // up to end of input 1710 } 1711 return null; 1712 } 1713 1714 /** 1715 * Skips input that matches the specified pattern, ignoring delimiters. 1716 * This method will skip input if an anchored match of the specified 1717 * pattern succeeds. 1718 * 1719 * <p>If a match to the specified pattern is not found at the 1720 * current position, then no input is skipped and a 1721 * {@code NoSuchElementException} is thrown. 1722 * 1723 * <p>Since this method seeks to match the specified pattern starting at 1724 * the scanner's current position, patterns that can match a lot of 1725 * input (".*", for example) may cause the scanner to buffer a large 1726 * amount of input. 1727 * 1728 * <p>Note that it is possible to skip something without risking a 1729 * {@code NoSuchElementException} by using a pattern that can 1730 * match nothing, e.g., {@code sc.skip("[ \t]*")}. 1731 * 1732 * @param pattern a string specifying the pattern to skip over 1733 * @return this scanner 1734 * @throws NoSuchElementException if the specified pattern is not found 1735 * @throws IllegalStateException if this scanner is closed 1736 */ 1737 public Scanner skip(Pattern pattern) { 1738 ensureOpen(); 1739 if (pattern == null) 1740 throw new NullPointerException(); 1741 clearCaches(); 1742 modCount++; 1743 1744 // Search for the pattern 1745 while (true) { 1746 if (matchPatternInBuffer(pattern)) { 1747 matchValid = true; 1748 position = matcher.end(); 1749 return this; 1750 } 1751 if (needInput) 1752 readInput(); 1753 else 1754 throw new NoSuchElementException(); 1755 } 1756 } 1757 1758 /** 1759 * Skips input that matches a pattern constructed from the specified 1760 * string. 1761 * 1762 * <p> An invocation of this method of the form {@code skip(pattern)} 1763 * behaves in exactly the same way as the invocation 1764 * {@code skip(Pattern.compile(pattern))}. 1765 * 1766 * @param pattern a string specifying the pattern to skip over 1767 * @return this scanner 1768 * @throws IllegalStateException if this scanner is closed 1769 */ 1770 public Scanner skip(String pattern) { 1771 return skip(patternCache.forName(pattern)); 1772 } 1773 1774 // Convenience methods for scanning primitives 1775 1776 /** 1777 * Returns true if the next token in this scanner's input can be 1778 * interpreted as a boolean value using a case insensitive pattern 1779 * created from the string "true|false". The scanner does not 1780 * advance past the input that matched. 1781 * 1782 * @return true if and only if this scanner's next token is a valid 1783 * boolean value 1784 * @throws IllegalStateException if this scanner is closed 1785 */ 1786 public boolean hasNextBoolean() { 1787 return hasNext(boolPattern()); 1788 } 1789 1790 /** 1791 * Scans the next token of the input into a boolean value and returns 1792 * that value. This method will throw {@code InputMismatchException} 1793 * if the next token cannot be translated into a valid boolean value. 1794 * If the match is successful, the scanner advances past the input that 1795 * matched. 1796 * 1797 * @return the boolean scanned from the input 1798 * @throws InputMismatchException if the next token is not a valid boolean 1799 * @throws NoSuchElementException if input is exhausted 1800 * @throws IllegalStateException if this scanner is closed 1801 */ 1802 public boolean nextBoolean() { 1803 clearCaches(); 1804 return Boolean.parseBoolean(next(boolPattern())); 1805 } 1806 1807 /** 1808 * Returns true if the next token in this scanner's input can be 1809 * interpreted as a byte value in the default radix using the 1810 * {@link #nextByte} method. The scanner does not advance past any input. 1811 * 1812 * @return true if and only if this scanner's next token is a valid 1813 * byte value 1814 * @throws IllegalStateException if this scanner is closed 1815 */ 1816 public boolean hasNextByte() { 1817 return hasNextByte(defaultRadix); 1818 } 1819 1820 /** 1821 * Returns true if the next token in this scanner's input can be 1822 * interpreted as a byte value in the specified radix using the 1823 * {@link #nextByte} method. The scanner does not advance past any input. 1824 * 1825 * @param radix the radix used to interpret the token as a byte value 1826 * @return true if and only if this scanner's next token is a valid 1827 * byte value 1828 * @throws IllegalStateException if this scanner is closed 1829 */ 1830 public boolean hasNextByte(int radix) { 1831 setRadix(radix); 1832 boolean result = hasNext(integerPattern()); 1833 if (result) { // Cache it 1834 try { 1835 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1836 processIntegerToken(hasNextResult) : 1837 hasNextResult; 1838 typeCache = Byte.parseByte(s, radix); 1839 } catch (NumberFormatException nfe) { 1840 result = false; 1841 } 1842 } 1843 return result; 1844 } 1845 1846 /** 1847 * Scans the next token of the input as a {@code byte}. 1848 * 1849 * <p> An invocation of this method of the form 1850 * {@code nextByte()} behaves in exactly the same way as the 1851 * invocation {@code nextByte(radix)}, where {@code radix} 1852 * is the default radix of this scanner. 1853 * 1854 * @return the {@code byte} scanned from the input 1855 * @throws InputMismatchException 1856 * if the next token does not match the <i>Integer</i> 1857 * regular expression, or is out of range 1858 * @throws NoSuchElementException if input is exhausted 1859 * @throws IllegalStateException if this scanner is closed 1860 */ 1861 public byte nextByte() { 1862 return nextByte(defaultRadix); 1863 } 1864 1865 /** 1866 * Scans the next token of the input as a {@code byte}. 1867 * This method will throw {@code InputMismatchException} 1868 * if the next token cannot be translated into a valid byte value as 1869 * described below. If the translation is successful, the scanner advances 1870 * past the input that matched. 1871 * 1872 * <p> If the next token matches the <a 1873 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1874 * above then the token is converted into a {@code byte} value as if by 1875 * removing all locale specific prefixes, group separators, and locale 1876 * specific suffixes, then mapping non-ASCII digits into ASCII 1877 * digits via {@link Character#digit Character.digit}, prepending a 1878 * negative sign (-) if the locale specific negative prefixes and suffixes 1879 * were present, and passing the resulting string to 1880 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1881 * specified radix. 1882 * 1883 * @param radix the radix used to interpret the token as a byte value 1884 * @return the {@code byte} scanned from the input 1885 * @throws InputMismatchException 1886 * if the next token does not match the <i>Integer</i> 1887 * regular expression, or is out of range 1888 * @throws NoSuchElementException if input is exhausted 1889 * @throws IllegalStateException if this scanner is closed 1890 */ 1891 public byte nextByte(int radix) { 1892 // Check cached result 1893 if ((typeCache != null) && (typeCache instanceof Byte) 1894 && this.radix == radix) { 1895 byte val = ((Byte)typeCache).byteValue(); 1896 useTypeCache(); 1897 return val; 1898 } 1899 setRadix(radix); 1900 clearCaches(); 1901 // Search for next byte 1902 try { 1903 String s = next(integerPattern()); 1904 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1905 s = processIntegerToken(s); 1906 return Byte.parseByte(s, radix); 1907 } catch (NumberFormatException nfe) { 1908 position = matcher.start(); // don't skip bad token 1909 throw new InputMismatchException(nfe.getMessage()); 1910 } 1911 } 1912 1913 /** 1914 * Returns true if the next token in this scanner's input can be 1915 * interpreted as a short value in the default radix using the 1916 * {@link #nextShort} method. The scanner does not advance past any input. 1917 * 1918 * @return true if and only if this scanner's next token is a valid 1919 * short value in the default radix 1920 * @throws IllegalStateException if this scanner is closed 1921 */ 1922 public boolean hasNextShort() { 1923 return hasNextShort(defaultRadix); 1924 } 1925 1926 /** 1927 * Returns true if the next token in this scanner's input can be 1928 * interpreted as a short value in the specified radix using the 1929 * {@link #nextShort} method. The scanner does not advance past any input. 1930 * 1931 * @param radix the radix used to interpret the token as a short value 1932 * @return true if and only if this scanner's next token is a valid 1933 * short value in the specified radix 1934 * @throws IllegalStateException if this scanner is closed 1935 */ 1936 public boolean hasNextShort(int radix) { 1937 setRadix(radix); 1938 boolean result = hasNext(integerPattern()); 1939 if (result) { // Cache it 1940 try { 1941 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1942 processIntegerToken(hasNextResult) : 1943 hasNextResult; 1944 typeCache = Short.parseShort(s, radix); 1945 } catch (NumberFormatException nfe) { 1946 result = false; 1947 } 1948 } 1949 return result; 1950 } 1951 1952 /** 1953 * Scans the next token of the input as a {@code short}. 1954 * 1955 * <p> An invocation of this method of the form 1956 * {@code nextShort()} behaves in exactly the same way as the 1957 * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix} 1958 * is the default radix of this scanner. 1959 * 1960 * @return the {@code short} scanned from the input 1961 * @throws InputMismatchException 1962 * if the next token does not match the <i>Integer</i> 1963 * regular expression, or is out of range 1964 * @throws NoSuchElementException if input is exhausted 1965 * @throws IllegalStateException if this scanner is closed 1966 */ 1967 public short nextShort() { 1968 return nextShort(defaultRadix); 1969 } 1970 1971 /** 1972 * Scans the next token of the input as a {@code short}. 1973 * This method will throw {@code InputMismatchException} 1974 * if the next token cannot be translated into a valid short value as 1975 * described below. If the translation is successful, the scanner advances 1976 * past the input that matched. 1977 * 1978 * <p> If the next token matches the <a 1979 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1980 * above then the token is converted into a {@code short} value as if by 1981 * removing all locale specific prefixes, group separators, and locale 1982 * specific suffixes, then mapping non-ASCII digits into ASCII 1983 * digits via {@link Character#digit Character.digit}, prepending a 1984 * negative sign (-) if the locale specific negative prefixes and suffixes 1985 * were present, and passing the resulting string to 1986 * {@link Short#parseShort(String, int) Short.parseShort} with the 1987 * specified radix. 1988 * 1989 * @param radix the radix used to interpret the token as a short value 1990 * @return the {@code short} scanned from the input 1991 * @throws InputMismatchException 1992 * if the next token does not match the <i>Integer</i> 1993 * regular expression, or is out of range 1994 * @throws NoSuchElementException if input is exhausted 1995 * @throws IllegalStateException if this scanner is closed 1996 */ 1997 public short nextShort(int radix) { 1998 // Check cached result 1999 if ((typeCache != null) && (typeCache instanceof Short) 2000 && this.radix == radix) { 2001 short val = ((Short)typeCache).shortValue(); 2002 useTypeCache(); 2003 return val; 2004 } 2005 setRadix(radix); 2006 clearCaches(); 2007 // Search for next short 2008 try { 2009 String s = next(integerPattern()); 2010 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2011 s = processIntegerToken(s); 2012 return Short.parseShort(s, radix); 2013 } catch (NumberFormatException nfe) { 2014 position = matcher.start(); // don't skip bad token 2015 throw new InputMismatchException(nfe.getMessage()); 2016 } 2017 } 2018 2019 /** 2020 * Returns true if the next token in this scanner's input can be 2021 * interpreted as an int value in the default radix using the 2022 * {@link #nextInt} method. The scanner does not advance past any input. 2023 * 2024 * @return true if and only if this scanner's next token is a valid 2025 * int value 2026 * @throws IllegalStateException if this scanner is closed 2027 */ 2028 public boolean hasNextInt() { 2029 return hasNextInt(defaultRadix); 2030 } 2031 2032 /** 2033 * Returns true if the next token in this scanner's input can be 2034 * interpreted as an int value in the specified radix using the 2035 * {@link #nextInt} method. The scanner does not advance past any input. 2036 * 2037 * @param radix the radix used to interpret the token as an int value 2038 * @return true if and only if this scanner's next token is a valid 2039 * int value 2040 * @throws IllegalStateException if this scanner is closed 2041 */ 2042 public boolean hasNextInt(int radix) { 2043 setRadix(radix); 2044 boolean result = hasNext(integerPattern()); 2045 if (result) { // Cache it 2046 try { 2047 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2048 processIntegerToken(hasNextResult) : 2049 hasNextResult; 2050 typeCache = Integer.parseInt(s, radix); 2051 } catch (NumberFormatException nfe) { 2052 result = false; 2053 } 2054 } 2055 return result; 2056 } 2057 2058 /** 2059 * The integer token must be stripped of prefixes, group separators, 2060 * and suffixes, non ascii digits must be converted into ascii digits 2061 * before parse will accept it. 2062 */ 2063 private String processIntegerToken(String token) { 2064 String result = token.replaceAll(""+groupSeparator, ""); 2065 boolean isNegative = false; 2066 int preLen = negativePrefix.length(); 2067 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2068 isNegative = true; 2069 result = result.substring(preLen); 2070 } 2071 int sufLen = negativeSuffix.length(); 2072 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2073 isNegative = true; 2074 result = result.substring(result.length() - sufLen, 2075 result.length()); 2076 } 2077 if (isNegative) 2078 result = "-" + result; 2079 return result; 2080 } 2081 2082 /** 2083 * Scans the next token of the input as an {@code int}. 2084 * 2085 * <p> An invocation of this method of the form 2086 * {@code nextInt()} behaves in exactly the same way as the 2087 * invocation {@code nextInt(radix)}, where {@code radix} 2088 * is the default radix of this scanner. 2089 * 2090 * @return the {@code int} scanned from the input 2091 * @throws InputMismatchException 2092 * if the next token does not match the <i>Integer</i> 2093 * regular expression, or is out of range 2094 * @throws NoSuchElementException if input is exhausted 2095 * @throws IllegalStateException if this scanner is closed 2096 */ 2097 public int nextInt() { 2098 return nextInt(defaultRadix); 2099 } 2100 2101 /** 2102 * Scans the next token of the input as an {@code int}. 2103 * This method will throw {@code InputMismatchException} 2104 * if the next token cannot be translated into a valid int value as 2105 * described below. If the translation is successful, the scanner advances 2106 * past the input that matched. 2107 * 2108 * <p> If the next token matches the <a 2109 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2110 * above then the token is converted into an {@code int} value as if by 2111 * removing all locale specific prefixes, group separators, and locale 2112 * specific suffixes, then mapping non-ASCII digits into ASCII 2113 * digits via {@link Character#digit Character.digit}, prepending a 2114 * negative sign (-) if the locale specific negative prefixes and suffixes 2115 * were present, and passing the resulting string to 2116 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2117 * specified radix. 2118 * 2119 * @param radix the radix used to interpret the token as an int value 2120 * @return the {@code int} scanned from the input 2121 * @throws InputMismatchException 2122 * if the next token does not match the <i>Integer</i> 2123 * regular expression, or is out of range 2124 * @throws NoSuchElementException if input is exhausted 2125 * @throws IllegalStateException if this scanner is closed 2126 */ 2127 public int nextInt(int radix) { 2128 // Check cached result 2129 if ((typeCache != null) && (typeCache instanceof Integer) 2130 && this.radix == radix) { 2131 int val = ((Integer)typeCache).intValue(); 2132 useTypeCache(); 2133 return val; 2134 } 2135 setRadix(radix); 2136 clearCaches(); 2137 // Search for next int 2138 try { 2139 String s = next(integerPattern()); 2140 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2141 s = processIntegerToken(s); 2142 return Integer.parseInt(s, radix); 2143 } catch (NumberFormatException nfe) { 2144 position = matcher.start(); // don't skip bad token 2145 throw new InputMismatchException(nfe.getMessage()); 2146 } 2147 } 2148 2149 /** 2150 * Returns true if the next token in this scanner's input can be 2151 * interpreted as a long value in the default radix using the 2152 * {@link #nextLong} method. The scanner does not advance past any input. 2153 * 2154 * @return true if and only if this scanner's next token is a valid 2155 * long value 2156 * @throws IllegalStateException if this scanner is closed 2157 */ 2158 public boolean hasNextLong() { 2159 return hasNextLong(defaultRadix); 2160 } 2161 2162 /** 2163 * Returns true if the next token in this scanner's input can be 2164 * interpreted as a long value in the specified radix using the 2165 * {@link #nextLong} method. The scanner does not advance past any input. 2166 * 2167 * @param radix the radix used to interpret the token as a long value 2168 * @return true if and only if this scanner's next token is a valid 2169 * long value 2170 * @throws IllegalStateException if this scanner is closed 2171 */ 2172 public boolean hasNextLong(int radix) { 2173 setRadix(radix); 2174 boolean result = hasNext(integerPattern()); 2175 if (result) { // Cache it 2176 try { 2177 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2178 processIntegerToken(hasNextResult) : 2179 hasNextResult; 2180 typeCache = Long.parseLong(s, radix); 2181 } catch (NumberFormatException nfe) { 2182 result = false; 2183 } 2184 } 2185 return result; 2186 } 2187 2188 /** 2189 * Scans the next token of the input as a {@code long}. 2190 * 2191 * <p> An invocation of this method of the form 2192 * {@code nextLong()} behaves in exactly the same way as the 2193 * invocation {@code nextLong(radix)}, where {@code radix} 2194 * is the default radix of this scanner. 2195 * 2196 * @return the {@code long} scanned from the input 2197 * @throws InputMismatchException 2198 * if the next token does not match the <i>Integer</i> 2199 * regular expression, or is out of range 2200 * @throws NoSuchElementException if input is exhausted 2201 * @throws IllegalStateException if this scanner is closed 2202 */ 2203 public long nextLong() { 2204 return nextLong(defaultRadix); 2205 } 2206 2207 /** 2208 * Scans the next token of the input as a {@code long}. 2209 * This method will throw {@code InputMismatchException} 2210 * if the next token cannot be translated into a valid long value as 2211 * described below. If the translation is successful, the scanner advances 2212 * past the input that matched. 2213 * 2214 * <p> If the next token matches the <a 2215 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2216 * above then the token is converted into a {@code long} value as if by 2217 * removing all locale specific prefixes, group separators, and locale 2218 * specific suffixes, then mapping non-ASCII digits into ASCII 2219 * digits via {@link Character#digit Character.digit}, prepending a 2220 * negative sign (-) if the locale specific negative prefixes and suffixes 2221 * were present, and passing the resulting string to 2222 * {@link Long#parseLong(String, int) Long.parseLong} with the 2223 * specified radix. 2224 * 2225 * @param radix the radix used to interpret the token as an int value 2226 * @return the {@code long} scanned from the input 2227 * @throws InputMismatchException 2228 * if the next token does not match the <i>Integer</i> 2229 * regular expression, or is out of range 2230 * @throws NoSuchElementException if input is exhausted 2231 * @throws IllegalStateException if this scanner is closed 2232 */ 2233 public long nextLong(int radix) { 2234 // Check cached result 2235 if ((typeCache != null) && (typeCache instanceof Long) 2236 && this.radix == radix) { 2237 long val = ((Long)typeCache).longValue(); 2238 useTypeCache(); 2239 return val; 2240 } 2241 setRadix(radix); 2242 clearCaches(); 2243 try { 2244 String s = next(integerPattern()); 2245 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2246 s = processIntegerToken(s); 2247 return Long.parseLong(s, radix); 2248 } catch (NumberFormatException nfe) { 2249 position = matcher.start(); // don't skip bad token 2250 throw new InputMismatchException(nfe.getMessage()); 2251 } 2252 } 2253 2254 /** 2255 * The float token must be stripped of prefixes, group separators, 2256 * and suffixes, non ascii digits must be converted into ascii digits 2257 * before parseFloat will accept it. 2258 * 2259 * If there are non-ascii digits in the token these digits must 2260 * be processed before the token is passed to parseFloat. 2261 */ 2262 private String processFloatToken(String token) { 2263 String result = token.replaceAll(groupSeparator, ""); 2264 if (!decimalSeparator.equals("\\.")) 2265 result = result.replaceAll(decimalSeparator, "."); 2266 boolean isNegative = false; 2267 int preLen = negativePrefix.length(); 2268 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2269 isNegative = true; 2270 result = result.substring(preLen); 2271 } 2272 int sufLen = negativeSuffix.length(); 2273 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2274 isNegative = true; 2275 result = result.substring(result.length() - sufLen, 2276 result.length()); 2277 } 2278 if (result.equals(nanString)) 2279 result = "NaN"; 2280 if (result.equals(infinityString)) 2281 result = "Infinity"; 2282 if (isNegative) 2283 result = "-" + result; 2284 2285 // Translate non-ASCII digits 2286 Matcher m = NON_ASCII_DIGIT.matcher(result); 2287 if (m.find()) { 2288 StringBuilder inASCII = new StringBuilder(); 2289 for (int i=0; i<result.length(); i++) { 2290 char nextChar = result.charAt(i); 2291 if (Character.isDigit(nextChar)) { 2292 int d = Character.digit(nextChar, 10); 2293 if (d != -1) 2294 inASCII.append(d); 2295 else 2296 inASCII.append(nextChar); 2297 } else { 2298 inASCII.append(nextChar); 2299 } 2300 } 2301 result = inASCII.toString(); 2302 } 2303 2304 return result; 2305 } 2306 2307 /** 2308 * Returns true if the next token in this scanner's input can be 2309 * interpreted as a float value using the {@link #nextFloat} 2310 * method. The scanner does not advance past any input. 2311 * 2312 * @return true if and only if this scanner's next token is a valid 2313 * float value 2314 * @throws IllegalStateException if this scanner is closed 2315 */ 2316 public boolean hasNextFloat() { 2317 setRadix(10); 2318 boolean result = hasNext(floatPattern()); 2319 if (result) { // Cache it 2320 try { 2321 String s = processFloatToken(hasNextResult); 2322 typeCache = Float.valueOf(Float.parseFloat(s)); 2323 } catch (NumberFormatException nfe) { 2324 result = false; 2325 } 2326 } 2327 return result; 2328 } 2329 2330 /** 2331 * Scans the next token of the input as a {@code float}. 2332 * This method will throw {@code InputMismatchException} 2333 * if the next token cannot be translated into a valid float value as 2334 * described below. If the translation is successful, the scanner advances 2335 * past the input that matched. 2336 * 2337 * <p> If the next token matches the <a 2338 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2339 * then the token is converted into a {@code float} value as if by 2340 * removing all locale specific prefixes, group separators, and locale 2341 * specific suffixes, then mapping non-ASCII digits into ASCII 2342 * digits via {@link Character#digit Character.digit}, prepending a 2343 * negative sign (-) if the locale specific negative prefixes and suffixes 2344 * were present, and passing the resulting string to 2345 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2346 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2347 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2348 * appropriate. 2349 * 2350 * @return the {@code float} scanned from the input 2351 * @throws InputMismatchException 2352 * if the next token does not match the <i>Float</i> 2353 * regular expression, or is out of range 2354 * @throws NoSuchElementException if input is exhausted 2355 * @throws IllegalStateException if this scanner is closed 2356 */ 2357 public float nextFloat() { 2358 // Check cached result 2359 if ((typeCache != null) && (typeCache instanceof Float)) { 2360 float val = ((Float)typeCache).floatValue(); 2361 useTypeCache(); 2362 return val; 2363 } 2364 setRadix(10); 2365 clearCaches(); 2366 try { 2367 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2368 } catch (NumberFormatException nfe) { 2369 position = matcher.start(); // don't skip bad token 2370 throw new InputMismatchException(nfe.getMessage()); 2371 } 2372 } 2373 2374 /** 2375 * Returns true if the next token in this scanner's input can be 2376 * interpreted as a double value using the {@link #nextDouble} 2377 * method. The scanner does not advance past any input. 2378 * 2379 * @return true if and only if this scanner's next token is a valid 2380 * double value 2381 * @throws IllegalStateException if this scanner is closed 2382 */ 2383 public boolean hasNextDouble() { 2384 setRadix(10); 2385 boolean result = hasNext(floatPattern()); 2386 if (result) { // Cache it 2387 try { 2388 String s = processFloatToken(hasNextResult); 2389 typeCache = Double.valueOf(Double.parseDouble(s)); 2390 } catch (NumberFormatException nfe) { 2391 result = false; 2392 } 2393 } 2394 return result; 2395 } 2396 2397 /** 2398 * Scans the next token of the input as a {@code double}. 2399 * This method will throw {@code InputMismatchException} 2400 * if the next token cannot be translated into a valid double value. 2401 * If the translation is successful, the scanner advances past the input 2402 * that matched. 2403 * 2404 * <p> If the next token matches the <a 2405 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2406 * then the token is converted into a {@code double} value as if by 2407 * removing all locale specific prefixes, group separators, and locale 2408 * specific suffixes, then mapping non-ASCII digits into ASCII 2409 * digits via {@link Character#digit Character.digit}, prepending a 2410 * negative sign (-) if the locale specific negative prefixes and suffixes 2411 * were present, and passing the resulting string to 2412 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2413 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2414 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2415 * appropriate. 2416 * 2417 * @return the {@code double} scanned from the input 2418 * @throws InputMismatchException 2419 * if the next token does not match the <i>Float</i> 2420 * regular expression, or is out of range 2421 * @throws NoSuchElementException if the input is exhausted 2422 * @throws IllegalStateException if this scanner is closed 2423 */ 2424 public double nextDouble() { 2425 // Check cached result 2426 if ((typeCache != null) && (typeCache instanceof Double)) { 2427 double val = ((Double)typeCache).doubleValue(); 2428 useTypeCache(); 2429 return val; 2430 } 2431 setRadix(10); 2432 clearCaches(); 2433 // Search for next float 2434 try { 2435 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2436 } catch (NumberFormatException nfe) { 2437 position = matcher.start(); // don't skip bad token 2438 throw new InputMismatchException(nfe.getMessage()); 2439 } 2440 } 2441 2442 // Convenience methods for scanning multi precision numbers 2443 2444 /** 2445 * Returns true if the next token in this scanner's input can be 2446 * interpreted as a {@code BigInteger} in the default radix using the 2447 * {@link #nextBigInteger} method. The scanner does not advance past any 2448 * input. 2449 * 2450 * @return true if and only if this scanner's next token is a valid 2451 * {@code BigInteger} 2452 * @throws IllegalStateException if this scanner is closed 2453 */ 2454 public boolean hasNextBigInteger() { 2455 return hasNextBigInteger(defaultRadix); 2456 } 2457 2458 /** 2459 * Returns true if the next token in this scanner's input can be 2460 * interpreted as a {@code BigInteger} in the specified radix using 2461 * the {@link #nextBigInteger} method. The scanner does not advance past 2462 * any input. 2463 * 2464 * @param radix the radix used to interpret the token as an integer 2465 * @return true if and only if this scanner's next token is a valid 2466 * {@code BigInteger} 2467 * @throws IllegalStateException if this scanner is closed 2468 */ 2469 public boolean hasNextBigInteger(int radix) { 2470 setRadix(radix); 2471 boolean result = hasNext(integerPattern()); 2472 if (result) { // Cache it 2473 try { 2474 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2475 processIntegerToken(hasNextResult) : 2476 hasNextResult; 2477 typeCache = new BigInteger(s, radix); 2478 } catch (NumberFormatException nfe) { 2479 result = false; 2480 } 2481 } 2482 return result; 2483 } 2484 2485 /** 2486 * Scans the next token of the input as a {@link java.math.BigInteger 2487 * BigInteger}. 2488 * 2489 * <p> An invocation of this method of the form 2490 * {@code nextBigInteger()} behaves in exactly the same way as the 2491 * invocation {@code nextBigInteger(radix)}, where {@code radix} 2492 * is the default radix of this scanner. 2493 * 2494 * @return the {@code BigInteger} scanned from the input 2495 * @throws InputMismatchException 2496 * if the next token does not match the <i>Integer</i> 2497 * regular expression, or is out of range 2498 * @throws NoSuchElementException if the input is exhausted 2499 * @throws IllegalStateException if this scanner is closed 2500 */ 2501 public BigInteger nextBigInteger() { 2502 return nextBigInteger(defaultRadix); 2503 } 2504 2505 /** 2506 * Scans the next token of the input as a {@link java.math.BigInteger 2507 * BigInteger}. 2508 * 2509 * <p> If the next token matches the <a 2510 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2511 * above then the token is converted into a {@code BigInteger} value as if 2512 * by removing all group separators, mapping non-ASCII digits into ASCII 2513 * digits via the {@link Character#digit Character.digit}, and passing the 2514 * resulting string to the {@link 2515 * java.math.BigInteger#BigInteger(java.lang.String) 2516 * BigInteger(String, int)} constructor with the specified radix. 2517 * 2518 * @param radix the radix used to interpret the token 2519 * @return the {@code BigInteger} scanned from the input 2520 * @throws InputMismatchException 2521 * if the next token does not match the <i>Integer</i> 2522 * regular expression, or is out of range 2523 * @throws NoSuchElementException if the input is exhausted 2524 * @throws IllegalStateException if this scanner is closed 2525 */ 2526 public BigInteger nextBigInteger(int radix) { 2527 // Check cached result 2528 if ((typeCache != null) && (typeCache instanceof BigInteger) 2529 && this.radix == radix) { 2530 BigInteger val = (BigInteger)typeCache; 2531 useTypeCache(); 2532 return val; 2533 } 2534 setRadix(radix); 2535 clearCaches(); 2536 // Search for next int 2537 try { 2538 String s = next(integerPattern()); 2539 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2540 s = processIntegerToken(s); 2541 return new BigInteger(s, radix); 2542 } catch (NumberFormatException nfe) { 2543 position = matcher.start(); // don't skip bad token 2544 throw new InputMismatchException(nfe.getMessage()); 2545 } 2546 } 2547 2548 /** 2549 * Returns true if the next token in this scanner's input can be 2550 * interpreted as a {@code BigDecimal} using the 2551 * {@link #nextBigDecimal} method. The scanner does not advance past any 2552 * input. 2553 * 2554 * @return true if and only if this scanner's next token is a valid 2555 * {@code BigDecimal} 2556 * @throws IllegalStateException if this scanner is closed 2557 */ 2558 public boolean hasNextBigDecimal() { 2559 setRadix(10); 2560 boolean result = hasNext(decimalPattern()); 2561 if (result) { // Cache it 2562 try { 2563 String s = processFloatToken(hasNextResult); 2564 typeCache = new BigDecimal(s); 2565 } catch (NumberFormatException nfe) { 2566 result = false; 2567 } 2568 } 2569 return result; 2570 } 2571 2572 /** 2573 * Scans the next token of the input as a {@link java.math.BigDecimal 2574 * BigDecimal}. 2575 * 2576 * <p> If the next token matches the <a 2577 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2578 * above then the token is converted into a {@code BigDecimal} value as if 2579 * by removing all group separators, mapping non-ASCII digits into ASCII 2580 * digits via the {@link Character#digit Character.digit}, and passing the 2581 * resulting string to the {@link 2582 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2583 * constructor. 2584 * 2585 * @return the {@code BigDecimal} scanned from the input 2586 * @throws InputMismatchException 2587 * if the next token does not match the <i>Decimal</i> 2588 * regular expression, or is out of range 2589 * @throws NoSuchElementException if the input is exhausted 2590 * @throws IllegalStateException if this scanner is closed 2591 */ 2592 public BigDecimal nextBigDecimal() { 2593 // Check cached result 2594 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2595 BigDecimal val = (BigDecimal)typeCache; 2596 useTypeCache(); 2597 return val; 2598 } 2599 setRadix(10); 2600 clearCaches(); 2601 // Search for next float 2602 try { 2603 String s = processFloatToken(next(decimalPattern())); 2604 return new BigDecimal(s); 2605 } catch (NumberFormatException nfe) { 2606 position = matcher.start(); // don't skip bad token 2607 throw new InputMismatchException(nfe.getMessage()); 2608 } 2609 } 2610 2611 /** 2612 * Resets this scanner. 2613 * 2614 * <p> Resetting a scanner discards all of its explicit state 2615 * information which may have been changed by invocations of 2616 * {@link #useDelimiter useDelimiter()}, 2617 * {@link #useLocale useLocale()}, or 2618 * {@link #useRadix useRadix()}. 2619 * 2620 * <p> An invocation of this method of the form 2621 * {@code scanner.reset()} behaves in exactly the same way as the 2622 * invocation 2623 * 2624 * <blockquote><pre>{@code 2625 * scanner.useDelimiter("\\p{javaWhitespace}+") 2626 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2627 * .useRadix(10); 2628 * }</pre></blockquote> 2629 * 2630 * @return this scanner 2631 * 2632 * @since 1.6 2633 */ 2634 public Scanner reset() { 2635 delimPattern = WHITESPACE_PATTERN; 2636 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2637 useRadix(10); 2638 clearCaches(); 2639 modCount++; 2640 return this; 2641 } 2642 2643 /** 2644 * Returns a stream of delimiter-separated tokens from this scanner. The 2645 * stream contains the same tokens that would be returned, starting from 2646 * this scanner's current state, by calling the {@link #next} method 2647 * repeatedly until the {@link #hasNext} method returns false. 2648 * 2649 * <p>The resulting stream is sequential and ordered. All stream elements are 2650 * non-null. 2651 * 2652 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2653 * current state of this scanner. Subsequent calls to any methods on this scanner 2654 * other than {@link #close} and {@link #ioException} may return undefined results 2655 * or may cause undefined effects on the returned stream. The returned stream's source 2656 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2657 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2658 * during stream pipeline execution. 2659 * 2660 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2661 * state and cannot be reused. 2662 * 2663 * <p>If this scanner contains a resource that must be released, this scanner 2664 * should be closed, either by calling its {@link #close} method, or by 2665 * closing the returned stream. Closing the stream will close the underlying scanner. 2666 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2667 * method is called, or if this scanner is closed during stream pipeline execution. 2668 * 2669 * <p>This method might block waiting for more input. 2670 * 2671 * @apiNote 2672 * For example, the following code will create a list of 2673 * comma-delimited tokens from a string: 2674 * 2675 * <pre>{@code 2676 * List<String> result = new Scanner("abc,def,,ghi") 2677 * .useDelimiter(",") 2678 * .tokens() 2679 * .collect(Collectors.toList()); 2680 * }</pre> 2681 * 2682 * <p>The resulting list would contain {@code "abc"}, {@code "def"}, 2683 * the empty string, and {@code "ghi"}. 2684 * 2685 * @return a sequential stream of token strings 2686 * @throws IllegalStateException if this scanner is closed 2687 * @since 9 2688 */ 2689 public Stream<String> tokens() { 2690 ensureOpen(); 2691 Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false); 2692 return stream.onClose(this::close); 2693 } 2694 2695 class TokenSpliterator extends Spliterators.AbstractSpliterator<String> { 2696 int expectedCount = -1; 2697 2698 TokenSpliterator() { 2699 super(Long.MAX_VALUE, 2700 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2701 } 2702 2703 @Override 2704 public boolean tryAdvance(Consumer<? super String> cons) { 2705 if (expectedCount >= 0 && expectedCount != modCount) { 2706 throw new ConcurrentModificationException(); 2707 } 2708 2709 if (hasNext()) { 2710 String token = next(); 2711 expectedCount = modCount; 2712 cons.accept(token); 2713 if (expectedCount != modCount) { 2714 throw new ConcurrentModificationException(); 2715 } 2716 return true; 2717 } else { 2718 expectedCount = modCount; 2719 return false; 2720 } 2721 } 2722 } 2723 2724 /** 2725 * Returns a stream of match results from this scanner. The stream 2726 * contains the same results in the same order that would be returned by 2727 * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} 2728 * successively as long as {@link #findWithinHorizon findWithinHorizon()} 2729 * finds matches. 2730 * 2731 * <p>The resulting stream is sequential and ordered. All stream elements are 2732 * non-null. 2733 * 2734 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2735 * current state of this scanner. Subsequent calls to any methods on this scanner 2736 * other than {@link #close} and {@link #ioException} may return undefined results 2737 * or may cause undefined effects on the returned stream. The returned stream's source 2738 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2739 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2740 * during stream pipeline execution. 2741 * 2742 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2743 * state and cannot be reused. 2744 * 2745 * <p>If this scanner contains a resource that must be released, this scanner 2746 * should be closed, either by calling its {@link #close} method, or by 2747 * closing the returned stream. Closing the stream will close the underlying scanner. 2748 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2749 * method is called, or if this scanner is closed during stream pipeline execution. 2750 * 2751 * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method 2752 * might block waiting for additional input, and it might buffer an unbounded amount of 2753 * input searching for a match. 2754 * 2755 * @apiNote 2756 * For example, the following code will read a file and return a list 2757 * of all sequences of characters consisting of seven or more Latin capital 2758 * letters: 2759 * 2760 * <pre>{@code 2761 * try (Scanner sc = new Scanner(Paths.get("input.txt"))) { 2762 * Pattern pat = Pattern.compile("[A-Z]{7,}"); 2763 * List<String> capWords = sc.findAll(pat) 2764 * .map(MatchResult::group) 2765 * .collect(Collectors.toList()); 2766 * } 2767 * }</pre> 2768 * 2769 * @param pattern the pattern to be matched 2770 * @return a sequential stream of match results 2771 * @throws NullPointerException if pattern is null 2772 * @throws IllegalStateException if this scanner is closed 2773 * @since 9 2774 */ 2775 public Stream<MatchResult> findAll(Pattern pattern) { 2776 Objects.requireNonNull(pattern); 2777 ensureOpen(); 2778 Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false); 2779 return stream.onClose(this::close); 2780 } 2781 2782 /** 2783 * Returns a stream of match results that match the provided pattern string. 2784 * The effect is equivalent to the following code: 2785 * 2786 * <pre>{@code 2787 * scanner.findAll(Pattern.compile(patString)) 2788 * }</pre> 2789 * 2790 * @param patString the pattern string 2791 * @return a sequential stream of match results 2792 * @throws NullPointerException if patString is null 2793 * @throws IllegalStateException if this scanner is closed 2794 * @throws PatternSyntaxException if the regular expression's syntax is invalid 2795 * @since 9 2796 * @see java.util.regex.Pattern 2797 */ 2798 public Stream<MatchResult> findAll(String patString) { 2799 Objects.requireNonNull(patString); 2800 ensureOpen(); 2801 return findAll(patternCache.forName(patString)); 2802 } 2803 2804 class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> { 2805 final Pattern pattern; 2806 int expectedCount = -1; 2807 2808 FindSpliterator(Pattern pattern) { 2809 super(Long.MAX_VALUE, 2810 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2811 this.pattern = pattern; 2812 } 2813 2814 @Override 2815 public boolean tryAdvance(Consumer<? super MatchResult> cons) { 2816 ensureOpen(); 2817 if (expectedCount >= 0) { 2818 if (expectedCount != modCount) { 2819 throw new ConcurrentModificationException(); 2820 } 2821 } else { 2822 expectedCount = modCount; 2823 } 2824 2825 while (true) { 2826 // assert expectedCount == modCount 2827 if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount 2828 cons.accept(matcher.toMatchResult()); 2829 if (expectedCount != modCount) { 2830 throw new ConcurrentModificationException(); 2831 } 2832 return true; 2833 } 2834 if (needInput) 2835 readInput(); // doesn't increment modCount 2836 else 2837 return false; // reached end of input 2838 } 2839 } 2840 } 2841} 2842