ArgTokenizer.java revision 3717:2a3e23ee1b65
1/* 2 * Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.internal.jshell.tool; 27 28import java.util.ArrayList; 29import java.util.Arrays; 30import java.util.HashMap; 31import java.util.List; 32import java.util.Map; 33import static java.util.stream.Collectors.toList; 34 35/** 36 * Parse command arguments, derived from StreamTokenizer by 37 * @author James Gosling 38 */ 39class ArgTokenizer { 40 41 private final String str; 42 private final String prefix; 43 private final int length; 44 private int next = 0; 45 private char buf[] = new char[20]; 46 private int mark; 47 48 private final byte ctype[] = new byte[256]; 49 private static final byte CT_ALPHA = 0; 50 private static final byte CT_WHITESPACE = 1; 51 private static final byte CT_QUOTE = 8; 52 53 private String sval; 54 private boolean isQuoted = false; 55 56 private final Map<String, Boolean> options = new HashMap<>(); 57 private final List<String> badOptions = new ArrayList<>(); 58 59 ArgTokenizer(String prefix, String arg) { 60 this.str = arg; 61 this.prefix = prefix + " "; 62 this.length = arg.length(); 63 quoteChar('"'); 64 quoteChar('\''); 65 whitespaceChars(0x09, 0x0D); 66 whitespaceChars(0x1C, 0x20); 67 whitespaceChars(0x85, 0x85); 68 whitespaceChars(0xA0, 0xA0); 69 } 70 71 /** 72 * Return the next non-option argument. Encountered options are stored. 73 * 74 * @return the token string, or null if there are no more tokens 75 */ 76 String next() { 77 while (true) { 78 nextToken(); 79 if (sval != null && !isQuoted() && sval.startsWith("-")) { 80 // allow POSIX getopt() option format, 81 // to be consistent with command-line 82 String opt = sval.startsWith("--") 83 ? sval.substring(1) 84 : sval; 85 foundOption(opt); 86 } else { 87 break; 88 } 89 } 90 return sval; 91 } 92 93 private void foundOption(String opt) { 94 if (options.containsKey(opt)) { 95 options.put(opt, true); 96 return; 97 } 98 99 List<Map.Entry<String,Boolean>> matches = 100 options.entrySet() 101 .stream() 102 .filter(e -> e.getKey().startsWith(opt)) 103 .collect(toList()); 104 if (matches.size() == 1) { 105 matches.get(0).setValue(true); 106 } else { 107 badOptions.add(opt); 108 } 109 } 110 111 /** 112 * Set the allowed options. Must be called before any options would be read 113 * and before calling any of the option functionality below. 114 */ 115 void allowedOptions(String... opts) { 116 for (String opt : opts) { 117 options.putIfAbsent(opt, false); 118 } 119 } 120 121 /** 122 * Has the specified option been encountered. 123 * 124 * @param opt the option to check 125 * @return true if the option has been encountered 126 */ 127 boolean hasOption(String opt) { 128 Boolean has = options.get(opt); 129 if (has == null) { 130 throw new InternalError("hasOption called before allowedOptions or on bad option"); 131 } 132 return has; 133 } 134 135 /** 136 * Return the number of encountered options 137 * 138 * @return the option count 139 */ 140 int optionCount() { 141 return (int) options.entrySet().stream() 142 .filter(e -> e.getValue()) 143 .count(); 144 } 145 146 /** 147 * Return the bad options encountered. Bad options are those that were not 148 * listed in the call to allowedOptions(). 149 * 150 * @return as space-separated list the bad options encountered, or the empty 151 * string if none. 152 */ 153 String badOptions() { 154 return String.join(" ", badOptions); 155 } 156 157 /** 158 * Consume the remainder of the input. This is useful to sure all options 159 * have been encountered and to check to unexpected additional non-option 160 * input. 161 * 162 * @return the string-separated concatenation of all remaining non-option 163 * arguments. 164 */ 165 String remainder() { 166 List<String> rem = new ArrayList<>(); 167 while (next() != null) { 168 rem.add(sval); 169 } 170 return String.join(" ", rem); 171 } 172 173 String val() { 174 return sval; 175 } 176 177 boolean isQuoted() { 178 return isQuoted; 179 } 180 181 String whole() { 182 return prefix + str; 183 } 184 185 void mark() { 186 mark = next; 187 } 188 189 void rewind() { 190 next = mark; 191 } 192 193 /** 194 * Reads a single character. 195 * 196 * @return The character read, or -1 if the end of the stream has been 197 * reached 198 */ 199 private int read() { 200 if (next >= length) { 201 return -1; 202 } 203 return str.charAt(next++); 204 } 205 206 /** 207 * Specifies that all characters <i>c</i> in the range 208 * <code>low <= <i>c</i> <= high</code> 209 * are white space characters. White space characters serve only to 210 * separate tokens in the input stream. 211 * 212 * <p>Any other attribute settings for the characters in the specified 213 * range are cleared. 214 * 215 * @param low the low end of the range. 216 * @param hi the high end of the range. 217 */ 218 private void whitespaceChars(int low, int hi) { 219 if (low < 0) 220 low = 0; 221 if (hi >= ctype.length) 222 hi = ctype.length - 1; 223 while (low <= hi) 224 ctype[low++] = CT_WHITESPACE; 225 } 226 227 /** 228 * Specifies that matching pairs of this character delimit string 229 * constants in this tokenizer. 230 * <p> 231 * If a string quote character is encountered, then a string is 232 * recognized, consisting of all characters after (but not including) 233 * the string quote character, up to (but not including) the next 234 * occurrence of that same string quote character, or a line 235 * terminator, or end of file. The usual escape sequences such as 236 * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and 237 * converted to single characters as the string is parsed. 238 * 239 * <p>Any other attribute settings for the specified character are cleared. 240 * 241 * @param ch the character. 242 */ 243 private void quoteChar(int ch) { 244 if (ch >= 0 && ch < ctype.length) 245 ctype[ch] = CT_QUOTE; 246 } 247 248 private int unicode2ctype(int c) { 249 switch (c) { 250 case 0x1680: 251 case 0x180E: 252 case 0x200A: 253 case 0x202F: 254 case 0x205F: 255 case 0x3000: 256 return CT_WHITESPACE; 257 default: 258 return CT_ALPHA; 259 } 260 } 261 262 /** 263 * Parses the next token of this tokenizer. 264 */ 265 public void nextToken() { 266 byte ct[] = ctype; 267 int c; 268 int lctype; 269 sval = null; 270 isQuoted = false; 271 272 do { 273 c = read(); 274 if (c < 0) { 275 return; 276 } 277 lctype = (c < 256) ? ct[c] : unicode2ctype(c); 278 } while (lctype == CT_WHITESPACE); 279 280 if (lctype == CT_ALPHA) { 281 int i = 0; 282 do { 283 if (i >= buf.length) { 284 buf = Arrays.copyOf(buf, buf.length * 2); 285 } 286 buf[i++] = (char) c; 287 c = read(); 288 lctype = c < 0 ? CT_WHITESPACE : (c < 256)? ct[c] : unicode2ctype(c); 289 } while (lctype == CT_ALPHA); 290 if (c >= 0) --next; // push last back 291 sval = String.copyValueOf(buf, 0, i); 292 return; 293 } 294 295 if (lctype == CT_QUOTE) { 296 int quote = c; 297 int i = 0; 298 /* Invariants (because \Octal needs a lookahead): 299 * (i) c contains char value 300 * (ii) d contains the lookahead 301 */ 302 int d = read(); 303 while (d >= 0 && d != quote) { 304 if (d == '\\') { 305 c = read(); 306 int first = c; /* To allow \377, but not \477 */ 307 if (c >= '0' && c <= '7') { 308 c = c - '0'; 309 int c2 = read(); 310 if ('0' <= c2 && c2 <= '7') { 311 c = (c << 3) + (c2 - '0'); 312 c2 = read(); 313 if ('0' <= c2 && c2 <= '7' && first <= '3') { 314 c = (c << 3) + (c2 - '0'); 315 d = read(); 316 } else 317 d = c2; 318 } else 319 d = c2; 320 } else { 321 switch (c) { 322 case 'a': 323 c = 0x7; 324 break; 325 case 'b': 326 c = '\b'; 327 break; 328 case 'f': 329 c = 0xC; 330 break; 331 case 'n': 332 c = '\n'; 333 break; 334 case 'r': 335 c = '\r'; 336 break; 337 case 't': 338 c = '\t'; 339 break; 340 case 'v': 341 c = 0xB; 342 break; 343 } 344 d = read(); 345 } 346 } else { 347 c = d; 348 d = read(); 349 } 350 if (i >= buf.length) { 351 buf = Arrays.copyOf(buf, buf.length * 2); 352 } 353 buf[i++] = (char)c; 354 } 355 356 if (d == quote) { 357 isQuoted = true; 358 } 359 sval = String.copyValueOf(buf, 0, i); 360 } 361 } 362} 363