1/* 2 * Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.internal.jshell.tool; 27 28import java.util.ArrayList; 29import java.util.Arrays; 30import java.util.HashMap; 31import java.util.List; 32import java.util.Map; 33import java.util.Map.Entry; 34 35import static java.util.stream.Collectors.toList; 36 37/** 38 * Parse command arguments, derived from StreamTokenizer by 39 * @author James Gosling 40 */ 41class ArgTokenizer { 42 43 private final String str; 44 private final String prefix; 45 private final int length; 46 private int next = 0; 47 private char buf[] = new char[20]; 48 private int mark; 49 50 private final byte ctype[] = new byte[256]; 51 private static final byte CT_ALPHA = 0; 52 private static final byte CT_WHITESPACE = 1; 53 private static final byte CT_QUOTE = 8; 54 55 private String sval; 56 private boolean isQuoted = false; 57 58 private final Map<String, Boolean> options = new HashMap<>(); 59 private final List<String> badOptions = new ArrayList<>(); 60 61 ArgTokenizer(String prefix, String arg) { 62 this.str = arg; 63 this.prefix = prefix + " "; 64 this.length = arg.length(); 65 quoteChar('"'); 66 quoteChar('\''); 67 whitespaceChars(0x09, 0x0D); 68 whitespaceChars(0x1C, 0x20); 69 whitespaceChars(0x85, 0x85); 70 whitespaceChars(0xA0, 0xA0); 71 } 72 73 /** 74 * Return the next non-option argument. Encountered options are stored. 75 * 76 * @return the token string, or null if there are no more tokens 77 */ 78 String next() { 79 while (true) { 80 nextToken(); 81 if (sval != null && !isQuoted() && sval.startsWith("-")) { 82 // allow POSIX getopt() option format, 83 // to be consistent with command-line 84 String opt = sval.startsWith("--") 85 ? sval.substring(1) 86 : sval; 87 foundOption(opt); 88 } else { 89 break; 90 } 91 } 92 return sval; 93 } 94 95 private void foundOption(String opt) { 96 if (options.containsKey(opt)) { 97 options.put(opt, true); 98 return; 99 } 100 101 List<Map.Entry<String,Boolean>> matches = 102 options.entrySet() 103 .stream() 104 .filter(e -> e.getKey().startsWith(opt)) 105 .collect(toList()); 106 if (matches.size() == 1) { 107 matches.get(0).setValue(true); 108 } else { 109 badOptions.add(opt); 110 } 111 } 112 113 /** 114 * Set the allowed options. Must be called before any options would be read 115 * and before calling any of the option functionality below. 116 */ 117 void allowedOptions(String... opts) { 118 for (String opt : opts) { 119 options.putIfAbsent(opt, false); 120 } 121 } 122 123 /** 124 * Has the specified option been encountered. 125 * 126 * @param opt the option to check 127 * @return true if the option has been encountered 128 */ 129 boolean hasOption(String opt) { 130 Boolean has = options.get(opt); 131 if (has == null) { 132 throw new InternalError("hasOption called before allowedOptions or on bad option"); 133 } 134 return has; 135 } 136 137 /** 138 * Return the number of encountered options 139 * 140 * @return the option count 141 */ 142 int optionCount() { 143 return (int) options.entrySet().stream() 144 .filter(Entry::getValue) 145 .count(); 146 } 147 148 /** 149 * Return the bad options encountered. Bad options are those that were not 150 * listed in the call to allowedOptions(). 151 * 152 * @return as space-separated list the bad options encountered, or the empty 153 * string if none. 154 */ 155 String badOptions() { 156 return String.join(" ", badOptions); 157 } 158 159 /** 160 * Consume the remainder of the input. This is useful to sure all options 161 * have been encountered and to check to unexpected additional non-option 162 * input. 163 * 164 * @return the string-separated concatenation of all remaining non-option 165 * arguments. 166 */ 167 String remainder() { 168 List<String> rem = new ArrayList<>(); 169 while (next() != null) { 170 rem.add(sval); 171 } 172 return String.join(" ", rem); 173 } 174 175 String val() { 176 return sval; 177 } 178 179 boolean isQuoted() { 180 return isQuoted; 181 } 182 183 String whole() { 184 return prefix + str; 185 } 186 187 void mark() { 188 mark = next; 189 } 190 191 void rewind() { 192 next = mark; 193 } 194 195 /** 196 * Reads a single character. 197 * 198 * @return The character read, or -1 if the end of the stream has been 199 * reached 200 */ 201 private int read() { 202 if (next >= length) { 203 return -1; 204 } 205 return str.charAt(next++); 206 } 207 208 /** 209 * Specifies that all characters <i>c</i> in the range 210 * <code>low <= <i>c</i> <= high</code> 211 * are white space characters. White space characters serve only to 212 * separate tokens in the input stream. 213 * 214 * <p>Any other attribute settings for the characters in the specified 215 * range are cleared. 216 * 217 * @param low the low end of the range. 218 * @param hi the high end of the range. 219 */ 220 private void whitespaceChars(int low, int hi) { 221 if (low < 0) 222 low = 0; 223 if (hi >= ctype.length) 224 hi = ctype.length - 1; 225 while (low <= hi) 226 ctype[low++] = CT_WHITESPACE; 227 } 228 229 /** 230 * Specifies that matching pairs of this character delimit string 231 * constants in this tokenizer. 232 * <p> 233 * If a string quote character is encountered, then a string is 234 * recognized, consisting of all characters after (but not including) 235 * the string quote character, up to (but not including) the next 236 * occurrence of that same string quote character, or a line 237 * terminator, or end of file. The usual escape sequences such as 238 * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and 239 * converted to single characters as the string is parsed. 240 * 241 * <p>Any other attribute settings for the specified character are cleared. 242 * 243 * @param ch the character. 244 */ 245 private void quoteChar(int ch) { 246 if (ch >= 0 && ch < ctype.length) 247 ctype[ch] = CT_QUOTE; 248 } 249 250 private int unicode2ctype(int c) { 251 switch (c) { 252 case 0x1680: 253 case 0x180E: 254 case 0x200A: 255 case 0x202F: 256 case 0x205F: 257 case 0x3000: 258 return CT_WHITESPACE; 259 default: 260 return CT_ALPHA; 261 } 262 } 263 264 /** 265 * Parses the next token of this tokenizer. 266 */ 267 public void nextToken() { 268 byte ct[] = ctype; 269 int c; 270 int lctype; 271 sval = null; 272 isQuoted = false; 273 274 do { 275 c = read(); 276 if (c < 0) { 277 return; 278 } 279 lctype = (c < 256) ? ct[c] : unicode2ctype(c); 280 } while (lctype == CT_WHITESPACE); 281 282 if (lctype == CT_ALPHA) { 283 int i = 0; 284 do { 285 if (i >= buf.length) { 286 buf = Arrays.copyOf(buf, buf.length * 2); 287 } 288 buf[i++] = (char) c; 289 c = read(); 290 lctype = c < 0 ? CT_WHITESPACE : (c < 256)? ct[c] : unicode2ctype(c); 291 } while (lctype == CT_ALPHA); 292 if (c >= 0) --next; // push last back 293 sval = String.copyValueOf(buf, 0, i); 294 return; 295 } 296 297 if (lctype == CT_QUOTE) { 298 int quote = c; 299 int i = 0; 300 /* Invariants (because \Octal needs a lookahead): 301 * (i) c contains char value 302 * (ii) d contains the lookahead 303 */ 304 int d = read(); 305 while (d >= 0 && d != quote) { 306 if (d == '\\') { 307 c = read(); 308 int first = c; /* To allow \377, but not \477 */ 309 if (c >= '0' && c <= '7') { 310 c = c - '0'; 311 int c2 = read(); 312 if ('0' <= c2 && c2 <= '7') { 313 c = (c << 3) + (c2 - '0'); 314 c2 = read(); 315 if ('0' <= c2 && c2 <= '7' && first <= '3') { 316 c = (c << 3) + (c2 - '0'); 317 d = read(); 318 } else 319 d = c2; 320 } else 321 d = c2; 322 } else { 323 switch (c) { 324 case 'a': 325 c = 0x7; 326 break; 327 case 'b': 328 c = '\b'; 329 break; 330 case 'f': 331 c = 0xC; 332 break; 333 case 'n': 334 c = '\n'; 335 break; 336 case 'r': 337 c = '\r'; 338 break; 339 case 't': 340 c = '\t'; 341 break; 342 case 'v': 343 c = 0xB; 344 break; 345 } 346 d = read(); 347 } 348 } else { 349 c = d; 350 d = read(); 351 } 352 if (i >= buf.length) { 353 buf = Arrays.copyOf(buf, buf.length * 2); 354 } 355 buf[i++] = (char)c; 356 } 357 358 if (d == quote) { 359 isQuoted = true; 360 } 361 sval = String.copyValueOf(buf, 0, i); 362 } 363 } 364} 365