1/*
2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 * Pathname canonicalization for Win32 file systems
28 */
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <ctype.h>
34#include <assert.h>
35#include <sys/stat.h>
36
37#include <windows.h>
38#include <winbase.h>
39#include <errno.h>
40#include "io_util_md.h"
41
42#undef DEBUG_PATH        /* Define this to debug path code */
43
44#define isfilesep(c) ((c) == '/' || (c) == '\\')
45#define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
46#define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
47
48
49/* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
50   or NULL if dend would have been exceeded.  If first != '\0', copy that byte
51   before copying bytes from src to send - 1. */
52
53static char *
54cp(char *dst, char *dend, char first, char *src, char *send)
55{
56    char *p = src, *q = dst;
57    if (first != '\0') {
58        if (q < dend) {
59            *q++ = first;
60        } else {
61            errno = ENAMETOOLONG;
62            return NULL;
63        }
64    }
65    if (send - p > dend - q) {
66        errno = ENAMETOOLONG;
67        return NULL;
68    }
69    while (p < send) {
70        *q++ = *p++;
71    }
72    return q;
73}
74
75/* Wide character version of cp */
76
77static WCHAR*
78wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
79{
80    WCHAR *p = src, *q = dst;
81    if (first != L'\0') {
82        if (q < dend) {
83            *q++ = first;
84        } else {
85            errno = ENAMETOOLONG;
86            return NULL;
87        }
88    }
89    if (send - p > dend - q) {
90        errno = ENAMETOOLONG;
91        return NULL;
92    }
93    while (p < send)
94        *q++ = *p++;
95    return q;
96}
97
98
99/* Find first instance of '\\' at or following start.  Return the address of
100   that byte or the address of the null terminator if '\\' is not found. */
101
102static char *
103nextsep(char *start)
104{
105    char *p = start;
106    int c;
107    while ((c = *p) && (c != '\\')) {
108        p += ((islb(c) && p[1]) ? 2 : 1);
109    }
110    return p;
111}
112
113/* Wide character version of nextsep */
114
115static WCHAR *
116wnextsep(WCHAR *start)
117{
118    WCHAR *p = start;
119    int c;
120    while ((c = *p) && (c != L'\\'))
121        p++;
122    return p;
123}
124
125/* Tell whether the given string contains any wildcard characters */
126
127static int
128wild(char *start)
129{
130    char *p = start;
131    int c;
132    while (c = *p) {
133        if ((c == '*') || (c == '?')) return 1;
134        p += ((islb(c) && p[1]) ? 2 : 1);
135    }
136    return 0;
137}
138
139/* Wide character version of wild */
140
141static int
142wwild(WCHAR *start)
143{
144    WCHAR *p = start;
145    int c;
146    while (c = *p) {
147        if ((c == L'*') || (c == L'?'))
148            return 1;
149        p++;
150    }
151    return 0;
152}
153
154/* Tell whether the given string contains prohibited combinations of dots.
155   In the canonicalized form no path element may have dots at its end.
156   Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
157   Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
158*/
159static int
160dots(char *start)
161{
162    char *p = start;
163    while (*p) {
164        if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
165            return 0; // no more dots
166        p++; // next char
167        while ((*p) == '.') // go to the end of dots
168            p++;
169        if (*p && (*p != '\\')) // path element does not end with a dot
170            p++; // go to the next char
171        else
172            return 1; // path element does end with a dot - prohibited
173    }
174    return 0; // no prohibited combinations of dots found
175}
176
177/* Wide character version of dots */
178static int
179wdots(WCHAR *start)
180{
181    WCHAR *p = start;
182    // Skip "\\.\" prefix
183    if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
184        p = p + 4;
185
186    while (*p) {
187        if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
188            return 0; // no more dots
189        p++; // next char
190        while ((*p) == L'.') // go to the end of dots
191            p++;
192        if (*p && (*p != L'\\')) // path element does not end with a dot
193            p++; // go to the next char
194        else
195            return 1; // path element does end with a dot - prohibited
196    }
197    return 0; // no prohibited combinations of dots found
198}
199
200/* If the lookup of a particular prefix fails because the file does not exist,
201   because it is of the wrong type, because access is denied, or because the
202   network is unreachable then canonicalization does not fail, it terminates
203   successfully after copying the rest of the original path to the result path.
204   Other I/O errors cause an error return.
205*/
206
207int
208lastErrorReportable()
209{
210    DWORD errval = GetLastError();
211    if ((errval == ERROR_FILE_NOT_FOUND)
212        || (errval == ERROR_DIRECTORY)
213        || (errval == ERROR_PATH_NOT_FOUND)
214        || (errval == ERROR_BAD_NETPATH)
215        || (errval == ERROR_BAD_NET_NAME)
216        || (errval == ERROR_ACCESS_DENIED)
217        || (errval == ERROR_NETWORK_UNREACHABLE)
218        || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
219        return 0;
220    }
221
222#ifdef DEBUG_PATH
223    jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
224#endif
225    return 1;
226}
227
228/* Convert a pathname to canonical form.  The input orig_path is assumed to
229   have been converted to native form already, via JVM_NativePath().  This is
230   necessary because _fullpath() rejects duplicate separator characters on
231   Win95, though it accepts them on NT. */
232
233int
234canonicalize(char *orig_path, char *result, int size)
235{
236    WIN32_FIND_DATA fd;
237    HANDLE h;
238    char path[1024];    /* Working copy of path */
239    char *src, *dst, *dend;
240
241    /* Reject paths that contain wildcards */
242    if (wild(orig_path)) {
243        errno = EINVAL;
244        return -1;
245    }
246
247    /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
248       contrary to the documentation, the _fullpath procedure does not require
249       the drive to be available.  It also does not reliably change all
250       occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
251    if(!_fullpath(path, orig_path, sizeof(path))) {
252        return -1;
253    }
254
255    /* Correction for Win95: _fullpath may leave a trailing "\\"
256       on a UNC pathname */
257    if ((path[0] == '\\') && (path[1] == '\\')) {
258        char *p = path + strlen(path);
259        if ((p[-1] == '\\') && !islb(p[-2])) {
260            p[-1] = '\0';
261        }
262    }
263
264    if (dots(path)) /* Check for prohibited combinations of dots */
265        return -1;
266
267    src = path;            /* Start scanning here */
268    dst = result;        /* Place results here */
269    dend = dst + size;        /* Don't go to or past here */
270
271    /* Copy prefix, assuming path is absolute */
272    if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
273        /* Drive specifier */
274        *src = toupper(*src);    /* Canonicalize drive letter */
275        if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
276            return -1;
277        }
278        src += 2;
279    } else if ((src[0] == '\\') && (src[1] == '\\')) {
280        /* UNC pathname */
281        char *p;
282        p = nextsep(src + 2);    /* Skip past host name */
283        if (!*p) {
284        /* A UNC pathname must begin with "\\\\host\\share",
285           so reject this path as invalid if there is no share name */
286            errno = EINVAL;
287            return -1;
288    }
289    p = nextsep(p + 1);    /* Skip past share name */
290    if (!(dst = cp(dst, dend, '\0', src, p))) {
291        return -1;
292    }
293    src = p;
294    } else {
295        /* Invalid path */
296        errno = EINVAL;
297        return -1;
298    }
299
300    /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
301    /* for root pathes like "E:\" . If the path has this form, we should  */
302    /* simply return it, it is already canonicalized. */
303    if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
304        /* At this point we have already copied the drive specifier ("z:")*/
305        /* so we need to copy "\" and the null character. */
306        result[2] = '\\';
307        result[3] = '\0';
308        return 0;
309    }
310
311    /* At this point we have copied either a drive specifier ("z:") or a UNC
312       prefix ("\\\\host\\share") to the result buffer, and src points to the
313       first byte of the remainder of the path.  We now scan through the rest
314       of the path, looking up each prefix in order to find the true name of
315       the last element of each prefix, thereby computing the full true name of
316       the original path. */
317    while (*src) {
318        char *p = nextsep(src + 1);    /* Find next separator */
319        char c = *p;
320        assert(*src == '\\');        /* Invariant */
321        *p = '\0';            /* Temporarily clear separator */
322        h = FindFirstFile(path, &fd);    /* Look up prefix */
323        *p = c;                /* Restore separator */
324        if (h != INVALID_HANDLE_VALUE) {
325            /* Lookup succeeded; append true name to result and continue */
326            FindClose(h);
327            if (!(dst = cp(dst, dend, '\\',
328                           fd.cFileName,
329                           fd.cFileName + strlen(fd.cFileName)))) {
330                return -1;
331            }
332            src = p;
333            continue;
334        } else {
335            if (!lastErrorReportable()) {
336                if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
337                    return -1;
338                }
339                break;
340            } else {
341                return -1;
342            }
343        }
344    }
345
346    if (dst >= dend) {
347    errno = ENAMETOOLONG;
348    return -1;
349    }
350    *dst = '\0';
351    return 0;
352
353}
354
355
356/* Convert a pathname to canonical form.  The input prefix is assumed
357   to be in canonical form already, and the trailing filename must not
358   contain any wildcard, dot/double dot, or other "tricky" characters
359   that are rejected by the canonicalize() routine above.  This
360   routine is present to allow the canonicalization prefix cache to be
361   used while still returning canonical names with the correct
362   capitalization. */
363
364int
365canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
366{
367    WIN32_FIND_DATA fd;
368    HANDLE h;
369    char *src, *dst, *dend;
370
371    src = pathWithCanonicalPrefix;
372    dst = result;        /* Place results here */
373    dend = dst + size;   /* Don't go to or past here */
374
375    h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
376    if (h != INVALID_HANDLE_VALUE) {
377        /* Lookup succeeded; concatenate true name to prefix */
378        FindClose(h);
379        if (!(dst = cp(dst, dend, '\0',
380                       canonicalPrefix,
381                       canonicalPrefix + strlen(canonicalPrefix)))) {
382            return -1;
383        }
384        if (!(dst = cp(dst, dend, '\\',
385                       fd.cFileName,
386                       fd.cFileName + strlen(fd.cFileName)))) {
387            return -1;
388        }
389    } else {
390        if (!lastErrorReportable()) {
391            if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
392                return -1;
393            }
394        } else {
395            return -1;
396        }
397    }
398
399    if (dst >= dend) {
400        errno = ENAMETOOLONG;
401        return -1;
402    }
403    *dst = '\0';
404    return 0;
405}
406
407
408/* Wide character version of canonicalize. Size is a wide-character size. */
409
410int
411wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
412{
413    WIN32_FIND_DATAW fd;
414    HANDLE h;
415    WCHAR *path;    /* Working copy of path */
416    WCHAR *src, *dst, *dend, c;
417
418    /* Reject paths that contain wildcards */
419    if (wwild(orig_path)) {
420        errno = EINVAL;
421        return -1;
422    }
423
424    if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
425        return -1;
426
427    /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
428       contrary to the documentation, the _fullpath procedure does not require
429       the drive to be available.  */
430    if(!_wfullpath(path, orig_path, size)) {
431        goto err;
432    }
433
434    if (wdots(path)) /* Check for prohibited combinations of dots */
435        goto err;
436
437    src = path;            /* Start scanning here */
438    dst = result;        /* Place results here */
439    dend = dst + size;        /* Don't go to or past here */
440
441    /* Copy prefix, assuming path is absolute */
442    c = src[0];
443    if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
444       && (src[1] == L':') && (src[2] == L'\\')) {
445        /* Drive specifier */
446        *src = towupper(*src);    /* Canonicalize drive letter */
447        if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
448            goto err;
449        }
450
451        src += 2;
452    } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
453        /* UNC pathname */
454        WCHAR *p;
455        p = wnextsep(src + 2);    /* Skip past host name */
456        if (!*p) {
457            /* A UNC pathname must begin with "\\\\host\\share",
458               so reject this path as invalid if there is no share name */
459            errno = EINVAL;
460            goto err;
461        }
462        p = wnextsep(p + 1);    /* Skip past share name */
463        if (!(dst = wcp(dst, dend, L'\0', src, p)))
464            goto err;
465        src = p;
466    } else {
467        /* Invalid path */
468        errno = EINVAL;
469        goto err;
470    }
471    /* At this point we have copied either a drive specifier ("z:") or a UNC
472       prefix ("\\\\host\\share") to the result buffer, and src points to the
473       first byte of the remainder of the path.  We now scan through the rest
474       of the path, looking up each prefix in order to find the true name of
475       the last element of each prefix, thereby computing the full true name of
476       the original path. */
477    while (*src) {
478        WCHAR *p = wnextsep(src + 1);    /* Find next separator */
479        WCHAR c = *p;
480        WCHAR *pathbuf;
481        int pathlen;
482
483        assert(*src == L'\\');        /* Invariant */
484        *p = L'\0';            /* Temporarily clear separator */
485
486        if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
487            pathbuf = getPrefixed(path, pathlen);
488            h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
489            free(pathbuf);
490        } else
491            h = FindFirstFileW(path, &fd);    /* Look up prefix */
492
493        *p = c;                /* Restore separator */
494        if (h != INVALID_HANDLE_VALUE) {
495            /* Lookup succeeded; append true name to result and continue */
496            FindClose(h);
497            if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
498                            fd.cFileName + wcslen(fd.cFileName)))){
499                goto err;
500            }
501            src = p;
502            continue;
503        } else {
504            if (!lastErrorReportable()) {
505               if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
506                   goto err;
507               }
508                break;
509            } else {
510                goto err;
511            }
512        }
513    }
514
515    if (dst >= dend) {
516    errno = ENAMETOOLONG;
517        goto err;
518    }
519    *dst = L'\0';
520    free(path);
521    return 0;
522
523 err:
524    free(path);
525    return -1;
526}
527
528
529/* Wide character version of canonicalizeWithPrefix. */
530
531int
532wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
533{
534    WIN32_FIND_DATAW fd;
535    HANDLE h;
536    WCHAR *src, *dst, *dend;
537    WCHAR *pathbuf;
538    int pathlen;
539
540    src = pathWithCanonicalPrefix;
541    dst = result;        /* Place results here */
542    dend = dst + size;   /* Don't go to or past here */
543
544
545    if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
546        pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
547        h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
548        free(pathbuf);
549    } else
550        h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
551    if (h != INVALID_HANDLE_VALUE) {
552        /* Lookup succeeded; append true name to result and continue */
553        FindClose(h);
554        if (!(dst = wcp(dst, dend, L'\0',
555                        canonicalPrefix,
556                        canonicalPrefix + wcslen(canonicalPrefix)))) {
557            return -1;
558        }
559        if (!(dst = wcp(dst, dend, L'\\',
560                        fd.cFileName,
561                        fd.cFileName + wcslen(fd.cFileName)))) {
562            return -1;
563        }
564    } else {
565        if (!lastErrorReportable()) {
566            if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
567                return -1;
568            }
569        } else {
570            return -1;
571        }
572    }
573
574    if (dst >= dend) {
575        errno = ENAMETOOLONG;
576        return -1;
577    }
578    *dst = L'\0';
579    return 0;
580}
581
582
583/* The appropriate location of getPrefixed() should be io_util_md.c, but
584   java.lang.instrument package has hardwired canonicalize_md.c into their
585   dll, to avoid complicate solution such as including io_util_md.c into
586   that package, as a workaround we put this method here.
587 */
588
589/* copy \\?\ or \\?\UNC\ to the front of path*/
590WCHAR*
591getPrefixed(const WCHAR* path, int pathlen) {
592    WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
593    if (pathbuf != 0) {
594        if (path[0] == L'\\' && path[1] == L'\\') {
595            if (path[2] == L'?' && path[3] == L'\\'){
596                /* if it already has a \\?\ don't do the prefix */
597                wcscpy(pathbuf, path );
598            } else {
599                /* only UNC pathname includes double slashes here */
600                wcscpy(pathbuf, L"\\\\?\\UNC\0");
601                wcscat(pathbuf, path + 1);
602            }
603        } else {
604            wcscpy(pathbuf, L"\\\\?\\\0");
605            wcscat(pathbuf, path );
606        }
607    }
608    return pathbuf;
609}
610