1/*
2 * Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.internal.jshell.tool;
27
28import java.util.ArrayList;
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.List;
32import java.util.Map;
33import java.util.Map.Entry;
34
35import static java.util.stream.Collectors.toList;
36
37/**
38 * Parse command arguments, derived from StreamTokenizer by
39 * @author  James Gosling
40 */
41class ArgTokenizer {
42
43    private final String str;
44    private final String prefix;
45    private final int length;
46    private int next = 0;
47    private char buf[] = new char[20];
48    private int mark;
49
50    private final byte ctype[] = new byte[256];
51    private static final byte CT_ALPHA = 0;
52    private static final byte CT_WHITESPACE = 1;
53    private static final byte CT_QUOTE = 8;
54
55    private String sval;
56    private boolean isQuoted = false;
57
58    private final Map<String, Boolean> options = new HashMap<>();
59    private final List<String> badOptions = new ArrayList<>();
60
61    ArgTokenizer(String prefix, String arg) {
62        this.str = arg;
63        this.prefix = prefix + " ";
64        this.length = arg.length();
65        quoteChar('"');
66        quoteChar('\'');
67        whitespaceChars(0x09, 0x0D);
68        whitespaceChars(0x1C, 0x20);
69        whitespaceChars(0x85, 0x85);
70        whitespaceChars(0xA0, 0xA0);
71    }
72
73    /**
74     * Return the next non-option argument. Encountered options are stored.
75     *
76     * @return the token string, or null if there are no more tokens
77     */
78    String next() {
79        while (true) {
80            nextToken();
81            if (sval != null && !isQuoted() && sval.startsWith("-")) {
82                // allow POSIX getopt() option format,
83                // to be consistent with command-line
84                String opt = sval.startsWith("--")
85                        ? sval.substring(1)
86                        : sval;
87                foundOption(opt);
88            } else {
89                break;
90            }
91        }
92        return sval;
93    }
94
95    private void foundOption(String opt) {
96        if (options.containsKey(opt)) {
97            options.put(opt, true);
98            return;
99        }
100
101        List<Map.Entry<String,Boolean>> matches =
102                options.entrySet()
103                       .stream()
104                       .filter(e -> e.getKey().startsWith(opt))
105                       .collect(toList());
106        if (matches.size() == 1) {
107            matches.get(0).setValue(true);
108        } else {
109            badOptions.add(opt);
110        }
111    }
112
113    /**
114     * Set the allowed options. Must be called before any options would be read
115     * and before calling any of the option functionality below.
116     */
117    void allowedOptions(String... opts) {
118        for (String opt : opts) {
119            options.putIfAbsent(opt, false);
120        }
121    }
122
123    /**
124     * Has the specified option been encountered.
125     *
126     * @param opt the option to check
127     * @return true if the option has been encountered
128     */
129    boolean hasOption(String opt) {
130        Boolean has = options.get(opt);
131        if (has == null) {
132            throw new InternalError("hasOption called before allowedOptions or on bad option");
133        }
134        return has;
135    }
136
137    /**
138     * Return the number of encountered options
139     *
140     * @return the option count
141     */
142    int optionCount() {
143        return (int) options.entrySet().stream()
144                .filter(Entry::getValue)
145                .count();
146    }
147
148    /**
149     * Return the bad options encountered. Bad options are those that were not
150     * listed in the call to allowedOptions().
151     *
152     * @return as space-separated list the bad options encountered, or the empty
153     * string if none.
154     */
155    String badOptions() {
156        return String.join(" ", badOptions);
157    }
158
159    /**
160     * Consume the remainder of the input. This is useful to sure all options
161     * have been encountered and to check to unexpected additional non-option
162     * input.
163     *
164     * @return the string-separated concatenation of all remaining non-option
165     * arguments.
166     */
167    String remainder() {
168        List<String> rem = new ArrayList<>();
169        while (next() != null) {
170            rem.add(sval);
171        }
172        return String.join(" ", rem);
173    }
174
175    String val() {
176        return sval;
177    }
178
179    boolean isQuoted() {
180        return isQuoted;
181    }
182
183    String whole() {
184        return prefix + str;
185    }
186
187    void mark() {
188        mark = next;
189    }
190
191    void rewind() {
192        next = mark;
193    }
194
195    /**
196     * Reads a single character.
197     *
198     * @return The character read, or -1 if the end of the stream has been
199     * reached
200     */
201    private int read() {
202        if (next >= length) {
203            return -1;
204        }
205        return str.charAt(next++);
206    }
207
208    /**
209     * Specifies that all characters <i>c</i> in the range
210     * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
211     * are white space characters. White space characters serve only to
212     * separate tokens in the input stream.
213     *
214     * <p>Any other attribute settings for the characters in the specified
215     * range are cleared.
216     *
217     * @param   low   the low end of the range.
218     * @param   hi    the high end of the range.
219     */
220    private void whitespaceChars(int low, int hi) {
221        if (low < 0)
222            low = 0;
223        if (hi >= ctype.length)
224            hi = ctype.length - 1;
225        while (low <= hi)
226            ctype[low++] = CT_WHITESPACE;
227    }
228
229    /**
230     * Specifies that matching pairs of this character delimit string
231     * constants in this tokenizer.
232     * <p>
233     * If a string quote character is encountered, then a string is
234     * recognized, consisting of all characters after (but not including)
235     * the string quote character, up to (but not including) the next
236     * occurrence of that same string quote character, or a line
237     * terminator, or end of file. The usual escape sequences such as
238     * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and
239     * converted to single characters as the string is parsed.
240     *
241     * <p>Any other attribute settings for the specified character are cleared.
242     *
243     * @param   ch   the character.
244     */
245    private void quoteChar(int ch) {
246        if (ch >= 0 && ch < ctype.length)
247            ctype[ch] = CT_QUOTE;
248    }
249
250    private int unicode2ctype(int c) {
251        switch (c) {
252            case 0x1680:
253            case 0x180E:
254            case 0x200A:
255            case 0x202F:
256            case 0x205F:
257            case 0x3000:
258                return CT_WHITESPACE;
259            default:
260                return CT_ALPHA;
261        }
262    }
263
264    /**
265     * Parses the next token of this tokenizer.
266     */
267    public void nextToken() {
268        byte ct[] = ctype;
269        int c;
270        int lctype;
271        sval = null;
272        isQuoted = false;
273
274        do {
275            c = read();
276            if (c < 0) {
277                return;
278            }
279            lctype = (c < 256) ? ct[c] : unicode2ctype(c);
280        } while (lctype == CT_WHITESPACE);
281
282        if (lctype == CT_ALPHA) {
283            int i = 0;
284            do {
285                if (i >= buf.length) {
286                    buf = Arrays.copyOf(buf, buf.length * 2);
287                }
288                buf[i++] = (char) c;
289                c = read();
290                lctype = c < 0 ? CT_WHITESPACE : (c < 256)? ct[c] : unicode2ctype(c);
291            } while (lctype == CT_ALPHA);
292            if (c >= 0) --next; // push last back
293            sval = String.copyValueOf(buf, 0, i);
294            return;
295        }
296
297        if (lctype == CT_QUOTE) {
298            int quote = c;
299            int i = 0;
300            /* Invariants (because \Octal needs a lookahead):
301             *   (i)  c contains char value
302             *   (ii) d contains the lookahead
303             */
304            int d = read();
305            while (d >= 0 && d != quote) {
306                if (d == '\\') {
307                    c = read();
308                    int first = c;   /* To allow \377, but not \477 */
309                    if (c >= '0' && c <= '7') {
310                        c = c - '0';
311                        int c2 = read();
312                        if ('0' <= c2 && c2 <= '7') {
313                            c = (c << 3) + (c2 - '0');
314                            c2 = read();
315                            if ('0' <= c2 && c2 <= '7' && first <= '3') {
316                                c = (c << 3) + (c2 - '0');
317                                d = read();
318                            } else
319                                d = c2;
320                        } else
321                          d = c2;
322                    } else {
323                        switch (c) {
324                        case 'a':
325                            c = 0x7;
326                            break;
327                        case 'b':
328                            c = '\b';
329                            break;
330                        case 'f':
331                            c = 0xC;
332                            break;
333                        case 'n':
334                            c = '\n';
335                            break;
336                        case 'r':
337                            c = '\r';
338                            break;
339                        case 't':
340                            c = '\t';
341                            break;
342                        case 'v':
343                            c = 0xB;
344                            break;
345                        }
346                        d = read();
347                    }
348                } else {
349                    c = d;
350                    d = read();
351                }
352                if (i >= buf.length) {
353                    buf = Arrays.copyOf(buf, buf.length * 2);
354                }
355                buf[i++] = (char)c;
356            }
357
358            if (d == quote) {
359                isQuoted = true;
360            }
361            sval = String.copyValueOf(buf, 0, i);
362        }
363    }
364}
365