1/*
2 * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <windows.h>
11#include <stdlib.h>
12#include <string.h>
13#include <malloc.h>
14
15#if defined(CP_UTF8)
16
17static UINT saved_cp;
18static int newargc;
19static char **newargv;
20
21static void cleanup(void)
22{
23    int i;
24
25    SetConsoleOutputCP(saved_cp);
26
27    for (i = 0; i < newargc; i++)
28        free(newargv[i]);
29
30    free(newargv);
31}
32
33/*
34 * Incrementally [re]allocate newargv and keep it NULL-terminated.
35 */
36static int validate_argv(int argc)
37{
38    static int size = 0;
39
40    if (argc >= size) {
41        char **ptr;
42
43        while (argc >= size)
44            size += 64;
45
46        ptr = realloc(newargv, size * sizeof(newargv[0]));
47        if (ptr == NULL)
48            return 0;
49
50        (newargv = ptr)[argc] = NULL;
51    } else {
52        newargv[argc] = NULL;
53    }
54
55    return 1;
56}
57
58static int process_glob(WCHAR *wstr, int wlen)
59{
60    int i, slash, udlen;
61    WCHAR saved_char;
62    WIN32_FIND_DATAW data;
63    HANDLE h;
64
65    /*
66     * Note that we support wildcard characters only in filename part
67     * of the path, and not in directories. Windows users are used to
68     * this, that's why recursive glob processing is not implemented.
69     */
70    /*
71     * Start by looking for last slash or backslash, ...
72     */
73    for (slash = 0, i = 0; i < wlen; i++)
74        if (wstr[i] == L'/' || wstr[i] == L'\\')
75            slash = i + 1;
76    /*
77     * ... then look for asterisk or question mark in the file name.
78     */
79    for (i = slash; i < wlen; i++)
80        if (wstr[i] == L'*' || wstr[i] == L'?')
81            break;
82
83    if (i == wlen)
84        return 0;   /* definitely not a glob */
85
86    saved_char = wstr[wlen];
87    wstr[wlen] = L'\0';
88    h = FindFirstFileW(wstr, &data);
89    wstr[wlen] = saved_char;
90    if (h == INVALID_HANDLE_VALUE)
91        return 0;   /* not a valid glob, just pass... */
92
93    if (slash)
94        udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
95                                    NULL, 0, NULL, NULL);
96    else
97        udlen = 0;
98
99    do {
100        int uflen;
101        char *arg;
102
103        /*
104         * skip over . and ..
105         */
106        if (data.cFileName[0] == L'.') {
107            if ((data.cFileName[1] == L'\0') ||
108                (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
109                continue;
110        }
111
112        if (!validate_argv(newargc + 1))
113            break;
114
115        /*
116         * -1 below means "scan for trailing '\0' *and* count it",
117         * so that |uflen| covers even trailing '\0'.
118         */
119        uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
120                                    NULL, 0, NULL, NULL);
121
122        arg = malloc(udlen + uflen);
123        if (arg == NULL)
124            break;
125
126        if (udlen)
127            WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
128                                arg, udlen, NULL, NULL);
129
130        WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
131                            arg + udlen, uflen, NULL, NULL);
132
133        newargv[newargc++] = arg;
134    } while (FindNextFileW(h, &data));
135
136    CloseHandle(h);
137
138    return 1;
139}
140
141void win32_utf8argv(int *argc, char **argv[])
142{
143    const WCHAR *wcmdline;
144    WCHAR *warg, *wend, *p;
145    int wlen, ulen, valid = 1;
146    char *arg;
147
148    if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0)
149        return;
150
151    newargc = 0;
152    newargv = NULL;
153    if (!validate_argv(newargc))
154        return;
155
156    wcmdline = GetCommandLineW();
157    if (wcmdline == NULL) return;
158
159    /*
160     * make a copy of the command line, since we might have to modify it...
161     */
162    wlen = wcslen(wcmdline);
163    p = _alloca((wlen + 1) * sizeof(WCHAR));
164    wcscpy(p, wcmdline);
165
166    while (*p != L'\0') {
167        int in_quote = 0;
168
169        if (*p == L' ' || *p == L'\t') {
170            p++; /* skip over whitespace */
171            continue;
172        }
173
174        /*
175         * Note: because we may need to fiddle with the number of backslashes,
176         * the argument string is copied into itself.  This is safe because
177         * the number of characters will never expand.
178         */
179        warg = wend = p;
180        while (*p != L'\0'
181               && (in_quote || (*p != L' ' && *p != L'\t'))) {
182            switch (*p) {
183            case L'\\':
184                /*
185                 * Microsoft documentation on how backslashes are treated
186                 * is:
187                 *
188                 * + Backslashes are interpreted literally, unless they
189                 *   immediately precede a double quotation mark.
190                 * + If an even number of backslashes is followed by a double
191                 *   quotation mark, one backslash is placed in the argv array
192                 *   for every pair of backslashes, and the double quotation
193                 *   mark is interpreted as a string delimiter.
194                 * + If an odd number of backslashes is followed by a double
195                 *   quotation mark, one backslash is placed in the argv array
196                 *   for every pair of backslashes, and the double quotation
197                 *   mark is "escaped" by the remaining backslash, causing a
198                 *   literal double quotation mark (") to be placed in argv.
199                 *
200                 * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
201                 *
202                 * Though referred page doesn't mention it, multiple qouble
203                 * quotes are also special. Pair of double quotes in quoted
204                 * string is counted as single double quote.
205                 */
206                {
207                    const WCHAR *q = p;
208                    int i;
209
210                    while (*p == L'\\')
211                        p++;
212
213                    if (*p == L'"') {
214                        int i;
215
216                        for (i = (p - q) / 2; i > 0; i--)
217                            *wend++ = L'\\';
218
219                        /*
220                         * if odd amount of backslashes before the quote,
221                         * said quote is part of the argument, not a delimiter
222                         */
223                        if ((p - q) % 2 == 1)
224                            *wend++ = *p++;
225                    } else {
226                        for (i = p - q; i > 0; i--)
227                            *wend++ = L'\\';
228                    }
229                }
230                break;
231            case L'"':
232                /*
233                 * Without the preceding backslash (or when preceded with an
234                 * even number of backslashes), the double quote is a simple
235                 * string delimiter and just slightly change the parsing state
236                 */
237                if (in_quote && p[1] == L'"')
238                    *wend++ = *p++;
239                else
240                    in_quote = !in_quote;
241                p++;
242                break;
243            default:
244                /*
245                 * Any other non-delimiter character is just taken verbatim
246                 */
247                *wend++ = *p++;
248            }
249        }
250
251        wlen = wend - warg;
252
253        if (wlen == 0 || !process_glob(warg, wlen)) {
254            if (!validate_argv(newargc + 1)) {
255                valid = 0;
256                break;
257            }
258
259            ulen = 0;
260            if (wlen > 0) {
261                ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
262                                           NULL, 0, NULL, NULL);
263                if (ulen <= 0)
264                    continue;
265            }
266
267            arg = malloc(ulen + 1);
268            if (arg == NULL) {
269                valid = 0;
270                break;
271            }
272
273            if (wlen > 0)
274                WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
275                                    arg, ulen, NULL, NULL);
276            arg[ulen] = '\0';
277
278            newargv[newargc++] = arg;
279        }
280    }
281
282    if (valid) {
283        saved_cp = GetConsoleOutputCP();
284        SetConsoleOutputCP(CP_UTF8);
285
286        *argc = newargc;
287        *argv = newargv;
288
289        atexit(cleanup);
290    } else if (newargv != NULL) {
291        int i;
292
293        for (i = 0; i < newargc; i++)
294            free(newargv[i]);
295
296        free(newargv);
297
298        newargc = 0;
299        newargv = NULL;
300    }
301
302    return;
303}
304#else
305void win32_utf8argv(int *argc, char **argv[])
306{   return;   }
307#endif
308