ArgTokenizer.java revision 3717:2a3e23ee1b65
1/*
2 * Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.internal.jshell.tool;
27
28import java.util.ArrayList;
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.List;
32import java.util.Map;
33import static java.util.stream.Collectors.toList;
34
35/**
36 * Parse command arguments, derived from StreamTokenizer by
37 * @author  James Gosling
38 */
39class ArgTokenizer {
40
41    private final String str;
42    private final String prefix;
43    private final int length;
44    private int next = 0;
45    private char buf[] = new char[20];
46    private int mark;
47
48    private final byte ctype[] = new byte[256];
49    private static final byte CT_ALPHA = 0;
50    private static final byte CT_WHITESPACE = 1;
51    private static final byte CT_QUOTE = 8;
52
53    private String sval;
54    private boolean isQuoted = false;
55
56    private final Map<String, Boolean> options = new HashMap<>();
57    private final List<String> badOptions = new ArrayList<>();
58
59    ArgTokenizer(String prefix, String arg) {
60        this.str = arg;
61        this.prefix = prefix + " ";
62        this.length = arg.length();
63        quoteChar('"');
64        quoteChar('\'');
65        whitespaceChars(0x09, 0x0D);
66        whitespaceChars(0x1C, 0x20);
67        whitespaceChars(0x85, 0x85);
68        whitespaceChars(0xA0, 0xA0);
69    }
70
71    /**
72     * Return the next non-option argument. Encountered options are stored.
73     *
74     * @return the token string, or null if there are no more tokens
75     */
76    String next() {
77        while (true) {
78            nextToken();
79            if (sval != null && !isQuoted() && sval.startsWith("-")) {
80                // allow POSIX getopt() option format,
81                // to be consistent with command-line
82                String opt = sval.startsWith("--")
83                        ? sval.substring(1)
84                        : sval;
85                foundOption(opt);
86            } else {
87                break;
88            }
89        }
90        return sval;
91    }
92
93    private void foundOption(String opt) {
94        if (options.containsKey(opt)) {
95            options.put(opt, true);
96            return;
97        }
98
99        List<Map.Entry<String,Boolean>> matches =
100                options.entrySet()
101                       .stream()
102                       .filter(e -> e.getKey().startsWith(opt))
103                       .collect(toList());
104        if (matches.size() == 1) {
105            matches.get(0).setValue(true);
106        } else {
107            badOptions.add(opt);
108        }
109    }
110
111    /**
112     * Set the allowed options. Must be called before any options would be read
113     * and before calling any of the option functionality below.
114     */
115    void allowedOptions(String... opts) {
116        for (String opt : opts) {
117            options.putIfAbsent(opt, false);
118        }
119    }
120
121    /**
122     * Has the specified option been encountered.
123     *
124     * @param opt the option to check
125     * @return true if the option has been encountered
126     */
127    boolean hasOption(String opt) {
128        Boolean has = options.get(opt);
129        if (has == null) {
130            throw new InternalError("hasOption called before allowedOptions or on bad option");
131        }
132        return has;
133    }
134
135    /**
136     * Return the number of encountered options
137     *
138     * @return the option count
139     */
140    int optionCount() {
141        return (int) options.entrySet().stream()
142                .filter(e -> e.getValue())
143                .count();
144    }
145
146    /**
147     * Return the bad options encountered. Bad options are those that were not
148     * listed in the call to allowedOptions().
149     *
150     * @return as space-separated list the bad options encountered, or the empty
151     * string if none.
152     */
153    String badOptions() {
154        return String.join(" ", badOptions);
155    }
156
157    /**
158     * Consume the remainder of the input. This is useful to sure all options
159     * have been encountered and to check to unexpected additional non-option
160     * input.
161     *
162     * @return the string-separated concatenation of all remaining non-option
163     * arguments.
164     */
165    String remainder() {
166        List<String> rem = new ArrayList<>();
167        while (next() != null) {
168            rem.add(sval);
169        }
170        return String.join(" ", rem);
171    }
172
173    String val() {
174        return sval;
175    }
176
177    boolean isQuoted() {
178        return isQuoted;
179    }
180
181    String whole() {
182        return prefix + str;
183    }
184
185    void mark() {
186        mark = next;
187    }
188
189    void rewind() {
190        next = mark;
191    }
192
193    /**
194     * Reads a single character.
195     *
196     * @return The character read, or -1 if the end of the stream has been
197     * reached
198     */
199    private int read() {
200        if (next >= length) {
201            return -1;
202        }
203        return str.charAt(next++);
204    }
205
206    /**
207     * Specifies that all characters <i>c</i> in the range
208     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
209     * are white space characters. White space characters serve only to
210     * separate tokens in the input stream.
211     *
212     * <p>Any other attribute settings for the characters in the specified
213     * range are cleared.
214     *
215     * @param   low   the low end of the range.
216     * @param   hi    the high end of the range.
217     */
218    private void whitespaceChars(int low, int hi) {
219        if (low < 0)
220            low = 0;
221        if (hi >= ctype.length)
222            hi = ctype.length - 1;
223        while (low <= hi)
224            ctype[low++] = CT_WHITESPACE;
225    }
226
227    /**
228     * Specifies that matching pairs of this character delimit string
229     * constants in this tokenizer.
230     * <p>
231     * If a string quote character is encountered, then a string is
232     * recognized, consisting of all characters after (but not including)
233     * the string quote character, up to (but not including) the next
234     * occurrence of that same string quote character, or a line
235     * terminator, or end of file. The usual escape sequences such as
236     * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and
237     * converted to single characters as the string is parsed.
238     *
239     * <p>Any other attribute settings for the specified character are cleared.
240     *
241     * @param   ch   the character.
242     */
243    private void quoteChar(int ch) {
244        if (ch >= 0 && ch < ctype.length)
245            ctype[ch] = CT_QUOTE;
246    }
247
248    private int unicode2ctype(int c) {
249        switch (c) {
250            case 0x1680:
251            case 0x180E:
252            case 0x200A:
253            case 0x202F:
254            case 0x205F:
255            case 0x3000:
256                return CT_WHITESPACE;
257            default:
258                return CT_ALPHA;
259        }
260    }
261
262    /**
263     * Parses the next token of this tokenizer.
264     */
265    public void nextToken() {
266        byte ct[] = ctype;
267        int c;
268        int lctype;
269        sval = null;
270        isQuoted = false;
271
272        do {
273            c = read();
274            if (c < 0) {
275                return;
276            }
277            lctype = (c < 256) ? ct[c] : unicode2ctype(c);
278        } while (lctype == CT_WHITESPACE);
279
280        if (lctype == CT_ALPHA) {
281            int i = 0;
282            do {
283                if (i >= buf.length) {
284                    buf = Arrays.copyOf(buf, buf.length * 2);
285                }
286                buf[i++] = (char) c;
287                c = read();
288                lctype = c < 0 ? CT_WHITESPACE : (c < 256)? ct[c] : unicode2ctype(c);
289            } while (lctype == CT_ALPHA);
290            if (c >= 0) --next; // push last back
291            sval = String.copyValueOf(buf, 0, i);
292            return;
293        }
294
295        if (lctype == CT_QUOTE) {
296            int quote = c;
297            int i = 0;
298            /* Invariants (because \Octal needs a lookahead):
299             *   (i)  c contains char value
300             *   (ii) d contains the lookahead
301             */
302            int d = read();
303            while (d >= 0 && d != quote) {
304                if (d == '\\') {
305                    c = read();
306                    int first = c;   /* To allow \377, but not \477 */
307                    if (c >= '0' && c <= '7') {
308                        c = c - '0';
309                        int c2 = read();
310                        if ('0' <= c2 && c2 <= '7') {
311                            c = (c << 3) + (c2 - '0');
312                            c2 = read();
313                            if ('0' <= c2 && c2 <= '7' && first <= '3') {
314                                c = (c << 3) + (c2 - '0');
315                                d = read();
316                            } else
317                                d = c2;
318                        } else
319                          d = c2;
320                    } else {
321                        switch (c) {
322                        case 'a':
323                            c = 0x7;
324                            break;
325                        case 'b':
326                            c = '\b';
327                            break;
328                        case 'f':
329                            c = 0xC;
330                            break;
331                        case 'n':
332                            c = '\n';
333                            break;
334                        case 'r':
335                            c = '\r';
336                            break;
337                        case 't':
338                            c = '\t';
339                            break;
340                        case 'v':
341                            c = 0xB;
342                            break;
343                        }
344                        d = read();
345                    }
346                } else {
347                    c = d;
348                    d = read();
349                }
350                if (i >= buf.length) {
351                    buf = Arrays.copyOf(buf, buf.length * 2);
352                }
353                buf[i++] = (char)c;
354            }
355
356            if (d == quote) {
357                isQuoted = true;
358            }
359            sval = String.copyValueOf(buf, 0, i);
360        }
361    }
362}
363