1/*
2 * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <stdlib.h>
27#include <stdio.h>
28#include <assert.h>
29#include <sys/stat.h>
30#include <ctype.h>
31
32#ifdef DEBUG_ARGFILE
33  #ifndef NO_JNI
34    #define NO_JNI
35  #endif
36  #define JLI_ReportMessage(...) printf(__VA_ARGS__)
37  #define JDK_JAVA_OPTIONS "JDK_JAVA_OPTIONS"
38  int IsWhiteSpaceOption(const char* name) { return 1; }
39#else
40  #include "java.h"
41#endif
42
43#include "jli_util.h"
44#include "emessages.h"
45
46#define MAX_ARGF_SIZE 0x7fffffffL
47
48static char* clone_substring(const char *begin, size_t len) {
49    char *rv = (char *) JLI_MemAlloc(len + 1);
50    memcpy(rv, begin, len);
51    rv[len] = '\0';
52    return rv;
53}
54
55enum STATE {
56    FIND_NEXT,
57    IN_COMMENT,
58    IN_QUOTE,
59    IN_ESCAPE,
60    SKIP_LEAD_WS,
61    IN_TOKEN
62};
63
64typedef struct {
65    enum STATE state;
66    const char* cptr;
67    const char* eob;
68    char quote_char;
69    JLI_List parts;
70} __ctx_args;
71
72#define NOT_FOUND -1
73static int firstAppArgIndex = NOT_FOUND;
74
75static jboolean expectingNoDashArg = JNI_FALSE;
76// Initialize to 1, as the first argument is the app name and not preprocessed
77static size_t argsCount = 1;
78static jboolean stopExpansion = JNI_FALSE;
79static jboolean relaunch = JNI_FALSE;
80
81void JLI_InitArgProcessing(jboolean isJava, jboolean disableArgFile) {
82    // No expansion for relaunch
83    if (argsCount != 1) {
84        relaunch = JNI_TRUE;
85        stopExpansion = JNI_TRUE;
86        argsCount = 1;
87    } else {
88        stopExpansion = disableArgFile;
89    }
90
91    expectingNoDashArg = JNI_FALSE;
92
93    // for tools, this value remains 0 all the time.
94    firstAppArgIndex = isJava ? NOT_FOUND : 0;
95}
96
97int JLI_GetAppArgIndex() {
98    // Will be 0 for tools
99    return firstAppArgIndex;
100}
101
102static void checkArg(const char *arg) {
103    size_t idx = 0;
104    argsCount++;
105
106    // All arguments arrive here must be a launcher argument,
107    // ie. by now, all argfile expansions must have been performed.
108    if (*arg == '-') {
109        expectingNoDashArg = JNI_FALSE;
110        if (IsWhiteSpaceOption(arg)) {
111            // expect an argument
112            expectingNoDashArg = JNI_TRUE;
113
114            if (JLI_StrCmp(arg, "-jar") == 0 ||
115                JLI_StrCmp(arg, "--module") == 0 ||
116                JLI_StrCmp(arg, "-m") == 0) {
117                // This is tricky, we do expect NoDashArg
118                // But that is considered main class to stop expansion
119                expectingNoDashArg = JNI_FALSE;
120                // We can not just update the idx here because if -jar @file
121                // still need expansion of @file to get the argument for -jar
122            }
123        } else if (JLI_StrCmp(arg, "--disable-@files") == 0) {
124            stopExpansion = JNI_TRUE;
125        }
126    } else {
127        if (!expectingNoDashArg) {
128            // this is main class, argsCount is index to next arg
129            idx = argsCount;
130        }
131        expectingNoDashArg = JNI_FALSE;
132    }
133    // only update on java mode and not yet found main class
134    if (firstAppArgIndex == NOT_FOUND && idx != 0) {
135        firstAppArgIndex = (int) idx;
136    }
137}
138
139/*
140       [\n\r]   +------------+                        +------------+ [\n\r]
141      +---------+ IN_COMMENT +<------+                | IN_ESCAPE  +---------+
142      |         +------------+       |                +------------+         |
143      |    [#]       ^               |[#]                 ^     |            |
144      |   +----------+               |                [\\]|     |[^\n\r]     |
145      v   |                          |                    |     v            |
146+------------+ [^ \t\n\r\f]  +------------+['"]>      +------------+         |
147| FIND_NEXT  +-------------->+ IN_TOKEN   +-----------+ IN_QUOTE   +         |
148+------------+               +------------+   <[quote]+------------+         |
149  |   ^                          |                       |  ^   ^            |
150  |   |               [ \t\n\r\f]|                 [\n\r]|  |   |[^ \t\n\r\f]v
151  |   +--------------------------+-----------------------+  |  +--------------+
152  |                       ['"]                              |  | SKIP_LEAD_WS |
153  +---------------------------------------------------------+  +--------------+
154*/
155static char* nextToken(__ctx_args *pctx) {
156    const char* nextc = pctx->cptr;
157    const char* const eob = pctx->eob;
158    const char* anchor = nextc;
159    char *token;
160
161    for (; nextc < eob; nextc++) {
162        register char ch = *nextc;
163
164        // Skip white space characters
165        if (pctx->state == FIND_NEXT || pctx->state == SKIP_LEAD_WS) {
166            while (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f') {
167                nextc++;
168                if (nextc >= eob) {
169                    return NULL;
170                }
171                ch = *nextc;
172            }
173            pctx->state = (pctx->state == FIND_NEXT) ? IN_TOKEN : IN_QUOTE;
174            anchor = nextc;
175        // Deal with escape sequences
176        } else if (pctx->state == IN_ESCAPE) {
177            // concatenation directive
178            if (ch == '\n' || ch == '\r') {
179                pctx->state = SKIP_LEAD_WS;
180            } else {
181            // escaped character
182                char* escaped = (char*) JLI_MemAlloc(2 * sizeof(char));
183                escaped[1] = '\0';
184                switch (ch) {
185                    case 'n':
186                        escaped[0] = '\n';
187                        break;
188                    case 'r':
189                        escaped[0] = '\r';
190                        break;
191                    case 't':
192                        escaped[0] = '\t';
193                        break;
194                    case 'f':
195                        escaped[0] = '\f';
196                        break;
197                    default:
198                        escaped[0] = ch;
199                        break;
200                }
201                JLI_List_add(pctx->parts, escaped);
202                pctx->state = IN_QUOTE;
203            }
204            // anchor to next character
205            anchor = nextc + 1;
206            continue;
207        // ignore comment to EOL
208        } else if (pctx->state == IN_COMMENT) {
209            while (ch != '\n' && ch != '\r') {
210                nextc++;
211                if (nextc > eob) {
212                    return NULL;
213                }
214                ch = *nextc;
215            }
216            pctx->state = FIND_NEXT;
217            continue;
218        }
219
220        assert(pctx->state != IN_ESCAPE);
221        assert(pctx->state != FIND_NEXT);
222        assert(pctx->state != SKIP_LEAD_WS);
223        assert(pctx->state != IN_COMMENT);
224
225        switch(ch) {
226            case ' ':
227            case '\t':
228            case '\f':
229                if (pctx->state == IN_QUOTE) {
230                    continue;
231                }
232                // fall through
233            case '\n':
234            case '\r':
235                if (pctx->parts->size == 0) {
236                    token = clone_substring(anchor, nextc - anchor);
237                } else {
238                    JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
239                    token = JLI_List_combine(pctx->parts);
240                    JLI_List_free(pctx->parts);
241                    pctx->parts = JLI_List_new(4);
242                }
243                pctx->cptr = nextc + 1;
244                pctx->state = FIND_NEXT;
245                return token;
246            case '#':
247                if (pctx->state == IN_QUOTE) {
248                    continue;
249                }
250                pctx->state = IN_COMMENT;
251                break;
252            case '\\':
253                if (pctx->state != IN_QUOTE) {
254                    continue;
255                }
256                JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
257                pctx->state = IN_ESCAPE;
258                break;
259            case '\'':
260            case '"':
261                if (pctx->state == IN_QUOTE && pctx->quote_char != ch) {
262                    // not matching quote
263                    continue;
264                }
265                // partial before quote
266                if (anchor != nextc) {
267                    JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
268                }
269                // anchor after quote character
270                anchor = nextc + 1;
271                if (pctx->state == IN_TOKEN) {
272                    pctx->quote_char = ch;
273                    pctx->state = IN_QUOTE;
274                } else {
275                    pctx->state = IN_TOKEN;
276                }
277                break;
278            default:
279                break;
280        }
281    }
282
283    assert(nextc == eob);
284    if (anchor != nextc) {
285        // not yet return until end of stream, we have part of a token.
286        JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
287    }
288    return NULL;
289}
290
291static JLI_List readArgFile(FILE *file) {
292    char buf[4096];
293    JLI_List rv;
294    __ctx_args ctx;
295    size_t size;
296    char *token;
297
298    ctx.state = FIND_NEXT;
299    ctx.parts = JLI_List_new(4);
300
301    /* arbitrarily pick 8, seems to be a reasonable number of arguments */
302    rv = JLI_List_new(8);
303
304    while (!feof(file)) {
305        size = fread(buf, sizeof(char), sizeof(buf), file);
306        if (ferror(file)) {
307            JLI_List_free(rv);
308            return NULL;
309        }
310
311        /* nextc is next character to read from the buffer
312         * eob is the end of input
313         * token is the copied token value, NULL if no a complete token
314         */
315        ctx.cptr = buf;
316        ctx.eob = buf + size;
317        token = nextToken(&ctx);
318        while (token != NULL) {
319            checkArg(token);
320            JLI_List_add(rv, token);
321            token = nextToken(&ctx);
322        }
323    }
324
325    // remaining partial token
326    if (ctx.state == IN_TOKEN || ctx.state == IN_QUOTE) {
327        if (ctx.parts->size != 0) {
328            JLI_List_add(rv, JLI_List_combine(ctx.parts));
329        }
330    }
331    JLI_List_free(ctx.parts);
332
333    return rv;
334}
335
336/*
337 * if the arg represent a file, that is, prefix with a single '@',
338 * return a list of arguments from the file.
339 * otherwise, return NULL.
340 */
341static JLI_List expandArgFile(const char *arg) {
342    FILE *fptr;
343    struct stat st;
344    JLI_List rv;
345
346    /* failed to access the file */
347    if (stat(arg, &st) != 0) {
348        JLI_ReportMessage(CFG_ERROR6, arg);
349        exit(1);
350    }
351
352    if (st.st_size > MAX_ARGF_SIZE) {
353        JLI_ReportMessage(CFG_ERROR10, MAX_ARGF_SIZE);
354        exit(1);
355    }
356
357    fptr = fopen(arg, "r");
358    /* arg file cannot be openned */
359    if (fptr == NULL) {
360        JLI_ReportMessage(CFG_ERROR6, arg);
361        exit(1);
362    }
363
364    rv = readArgFile(fptr);
365    fclose(fptr);
366
367    /* error occurred reading the file */
368    if (rv == NULL) {
369        JLI_ReportMessage(DLL_ERROR4, arg);
370        exit(1);
371    }
372
373    return rv;
374}
375
376JLI_List JLI_PreprocessArg(const char *arg)
377{
378    JLI_List rv;
379
380    if (firstAppArgIndex > 0) {
381        // In user application arg, no more work.
382        return NULL;
383    }
384
385    if (stopExpansion) {
386        // still looking for user application arg
387        checkArg(arg);
388        return NULL;
389    }
390
391    if (arg[0] != '@') {
392        checkArg(arg);
393        return NULL;
394    }
395
396    if (arg[1] == '\0') {
397        // @ by itself is an argument
398        checkArg(arg);
399        return NULL;
400    }
401
402    arg++;
403    if (arg[0] == '@') {
404        // escaped @argument
405        rv = JLI_List_new(1);
406        checkArg(arg);
407        JLI_List_add(rv, JLI_StringDup(arg));
408    } else {
409        rv = expandArgFile(arg);
410    }
411    return rv;
412}
413
414int isTerminalOpt(char *arg) {
415    return JLI_StrCmp(arg, "-jar") == 0 ||
416           JLI_StrCmp(arg, "-m") == 0 ||
417           JLI_StrCmp(arg, "--module") == 0 ||
418           JLI_StrCmp(arg, "--dry-run") == 0 ||
419           JLI_StrCmp(arg, "-h") == 0 ||
420           JLI_StrCmp(arg, "-?") == 0 ||
421           JLI_StrCmp(arg, "-help") == 0 ||
422           JLI_StrCmp(arg, "--help") == 0 ||
423           JLI_StrCmp(arg, "-X") == 0 ||
424           JLI_StrCmp(arg, "--help-extra") == 0 ||
425           JLI_StrCmp(arg, "-version") == 0 ||
426           JLI_StrCmp(arg, "--version") == 0 ||
427           JLI_StrCmp(arg, "-fullversion") == 0 ||
428           JLI_StrCmp(arg, "--full-version") == 0;
429}
430
431jboolean JLI_AddArgsFromEnvVar(JLI_List args, const char *var_name) {
432    char *env = getenv(var_name);
433    char *p, *arg;
434    char quote;
435    JLI_List argsInFile;
436
437    if (firstAppArgIndex == 0) {
438        // Not 'java', return
439        return JNI_FALSE;
440    }
441
442    if (relaunch) {
443        return JNI_FALSE;
444    }
445
446    if (NULL == env) {
447        return JNI_FALSE;
448    }
449
450    JLI_ReportMessage(ARG_INFO_ENVVAR, var_name, env);
451
452    // This is retained until the process terminates as it is saved as the args
453    p = JLI_MemAlloc(JLI_StrLen(env) + 1);
454    while (*env != '\0') {
455        while (*env != '\0' && isspace(*env)) {
456            env++;
457        }
458
459        // Trailing space
460        if (*env == '\0') {
461            break;
462        }
463
464        arg = p;
465        while (*env != '\0' && !isspace(*env)) {
466            if (*env == '"' || *env == '\'') {
467                quote = *env++;
468                while (*env != quote && *env != '\0') {
469                    *p++ = *env++;
470                }
471
472                if (*env == '\0') {
473                    JLI_ReportMessage(ARG_ERROR8, var_name);
474                    exit(1);
475                }
476                env++;
477            } else {
478                *p++ = *env++;
479            }
480        }
481
482        *p++ = '\0';
483
484        argsInFile = JLI_PreprocessArg(arg);
485
486        if (NULL == argsInFile) {
487            if (isTerminalOpt(arg)) {
488                JLI_ReportMessage(ARG_ERROR9, arg, var_name);
489                exit(1);
490            }
491            JLI_List_add(args, arg);
492        } else {
493            size_t cnt, idx;
494            char *argFile = arg;
495            cnt = argsInFile->size;
496            for (idx = 0; idx < cnt; idx++) {
497                arg = argsInFile->elements[idx];
498                if (isTerminalOpt(arg)) {
499                    JLI_ReportMessage(ARG_ERROR10, arg, argFile, var_name);
500                    exit(1);
501                }
502                JLI_List_add(args, arg);
503            }
504            // Shallow free, we reuse the string to avoid copy
505            JLI_MemFree(argsInFile->elements);
506            JLI_MemFree(argsInFile);
507        }
508        /*
509         * Check if main-class is specified after argument being checked. It
510         * must always appear after expansion, as a main-class could be specified
511         * indirectly into environment variable via an @argfile, and it must be
512         * caught now.
513         */
514        if (firstAppArgIndex != NOT_FOUND) {
515            JLI_ReportMessage(ARG_ERROR11, var_name);
516            exit(1);
517        }
518
519        assert (*env == '\0' || isspace(*env));
520    }
521
522    return JNI_TRUE;
523}
524
525#ifdef DEBUG_ARGFILE
526/*
527 * Stand-alone sanity test, build with following command line
528 * $ CC -DDEBUG_ARGFILE -DNO_JNI -g args.c jli_util.c
529 */
530
531void fail(char *expected, char *actual, size_t idx) {
532    printf("FAILED: Token[%lu] expected to be <%s>, got <%s>\n", idx, expected, actual);
533    exit(1);
534}
535
536void test_case(char *case_data, char **tokens, size_t cnt_tokens) {
537    size_t actual_cnt;
538    char *token;
539    __ctx_args ctx;
540
541    actual_cnt = 0;
542
543    ctx.state = FIND_NEXT;
544    ctx.parts = JLI_List_new(4);
545    ctx.cptr = case_data;
546    ctx.eob = case_data + strlen(case_data);
547
548    printf("Test case: <%s>, expected %lu tokens.\n", case_data, cnt_tokens);
549
550    for (token = nextToken(&ctx); token != NULL; token = nextToken(&ctx)) {
551        // should not have more tokens than expected
552        if (actual_cnt >= cnt_tokens) {
553            printf("FAILED: Extra token detected: <%s>\n", token);
554            exit(2);
555        }
556        if (JLI_StrCmp(token, tokens[actual_cnt]) != 0) {
557            fail(tokens[actual_cnt], token, actual_cnt);
558        }
559        actual_cnt++;
560    }
561
562    char* last = NULL;
563    if (ctx.parts->size != 0) {
564        last = JLI_List_combine(ctx.parts);
565    }
566    JLI_List_free(ctx.parts);
567
568    if (actual_cnt >= cnt_tokens) {
569        // same number of tokens, should have nothing left to parse
570        if (last != NULL) {
571            if (*last != '#') {
572                printf("Leftover detected: %s", last);
573                exit(2);
574            }
575        }
576    } else {
577        if (JLI_StrCmp(last, tokens[actual_cnt]) != 0) {
578            fail(tokens[actual_cnt], last, actual_cnt);
579        }
580        actual_cnt++;
581    }
582    if (actual_cnt != cnt_tokens) {
583        printf("FAILED: Number of tokens not match, expected %lu, got %lu\n",
584            cnt_tokens, actual_cnt);
585        exit(3);
586    }
587
588    printf("PASS\n");
589}
590
591#define DO_CASE(name) \
592    test_case(name[0], name + 1, sizeof(name)/sizeof(char*) - 1)
593
594int main(int argc, char** argv) {
595    size_t i, j;
596
597    char* case1[] = { "-version -cp \"c:\\\\java libs\\\\one.jar\" \n",
598        "-version", "-cp", "c:\\java libs\\one.jar" };
599    DO_CASE(case1);
600
601    // note the open quote at the end
602    char* case2[] = { "com.foo.Panda \"Furious 5\"\fand\t'Shi Fu' \"escape\tprison",
603        "com.foo.Panda", "Furious 5", "and", "Shi Fu", "escape\tprison"};
604    DO_CASE(case2);
605
606    char* escaped_chars[] = { "escaped chars testing \"\\a\\b\\c\\f\\n\\r\\t\\v\\9\\6\\23\\82\\28\\377\\477\\278\\287\"",
607        "escaped", "chars", "testing", "abc\f\n\r\tv96238228377477278287"};
608    DO_CASE(escaped_chars);
609
610    char* mixed_quote[]  = { "\"mix 'single quote' in double\" 'mix \"double quote\" in single' partial\"quote me\"this",
611        "mix 'single quote' in double", "mix \"double quote\" in single", "partialquote methis"};
612    DO_CASE(mixed_quote);
613
614    char* comments[]  = { "line one #comment\n'line #2' #rest are comment\r\n#comment on line 3\nline 4 #comment to eof",
615        "line", "one", "line #2", "line", "4"};
616    DO_CASE(comments);
617
618    char* open_quote[] = { "This is an \"open quote \n    across line\n\t, note for WS.",
619        "This", "is", "an", "open quote ", "across", "line", ",", "note", "for", "WS." };
620    DO_CASE(open_quote);
621
622    char* escape_in_open_quote[] = { "Try \"this \\\\\\\\ escape\\n double quote \\\" in open quote",
623        "Try", "this \\\\ escape\n double quote \" in open quote" };
624    DO_CASE(escape_in_open_quote);
625
626    char* quote[] = { "'-Dmy.quote.single'='Property in single quote. Here a double quote\" Add some slashes \\\\/'",
627        "-Dmy.quote.single=Property in single quote. Here a double quote\" Add some slashes \\/" };
628    DO_CASE(quote);
629
630    char* multi[] = { "\"Open quote to \n  new \"line \\\n\r   third\\\n\r\\\tand\ffourth\"",
631        "Open quote to ", "new", "line third\tand\ffourth" };
632    DO_CASE(multi);
633
634    char* escape_quote[] = { "c:\\\"partial quote\"\\lib",
635        "c:\\partial quote\\lib" };
636    DO_CASE(escape_quote);
637
638    if (argc > 1) {
639        for (i = 0; i < argc; i++) {
640            JLI_List tokens = JLI_PreprocessArg(argv[i]);
641            if (NULL != tokens) {
642                for (j = 0; j < tokens->size; j++) {
643                    printf("Token[%lu]: <%s>\n", (unsigned long) j, tokens->elements[j]);
644                }
645            }
646        }
647    }
648}
649
650#endif // DEBUG_ARGFILE
651