1/*
2  Copyright (c) 1990-2002 Info-ZIP.  All rights reserved.
3
4  See the accompanying file LICENSE, version 2000-Apr-09 or later
5  (the contents of which are also included in unzip.h) for terms of use.
6  If, for some reason, all these files are missing, the Info-ZIP license
7  also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8*/
9/*---------------------------------------------------------------------------
10
11  match.c
12
13  The match() routine recursively compares a string to a "pattern" (regular
14  expression), returning TRUE if a match is found or FALSE if not.  This
15  version is specifically for use with unzip.c:  as did the previous match()
16  routines from SEA and J. Kercheval, it leaves the case (upper, lower, or
17  mixed) of the string alone, but converts any uppercase characters in the
18  pattern to lowercase if indicated by the global var pInfo->lcflag (which
19  is to say, string is assumed to have been converted to lowercase already,
20  if such was necessary).
21
22  GRR:  reversed order of text, pattern in matche() (now same as match());
23        added ignore_case/ic flags, Case() macro.
24
25  PaulK:  replaced matche() with recmatch() from Zip, modified to have an
26          ignore_case argument; replaced test frame with simpler one.
27
28  ---------------------------------------------------------------------------
29
30  Copyright on recmatch() from Zip's util.c (although recmatch() was almost
31  certainly written by Mark Adler...ask me how I can tell :-) ):
32
33     Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
34     Kai Uwe Rommel and Igor Mandrichenko.
35
36     Permission is granted to any individual or institution to use, copy,
37     or redistribute this software so long as all of the original files are
38     included unmodified, that it is not sold for profit, and that this copy-
39     right notice is retained.
40
41  ---------------------------------------------------------------------------
42
43  Match the pattern (wildcard) against the string (fixed):
44
45     match(string, pattern, ignore_case);
46
47  returns TRUE if string matches pattern, FALSE otherwise.  In the pattern:
48
49     `*' matches any sequence of characters (zero or more)
50     `?' matches any single character
51     [SET] matches any character in the specified set,
52     [!SET] or [^SET] matches any character not in the specified set.
53
54  A set is composed of characters or ranges; a range looks like ``character
55  hyphen character'' (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the minimal set of
56  characters allowed in the [..] pattern construct.  Other characters are
57  allowed (i.e., 8-bit characters) if your system will support them.
58
59  To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
60  side or outside a [..] construct, and match the character exactly, precede
61  it with a ``\'' (backslash).
62
63  Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is
64  defined.  See the DOSWILD section below for an explanation.  Note also
65  that with VMSWILD defined, '%' is used instead of '?', and sets (ranges)
66  are delimited by () instead of [].
67
68  ---------------------------------------------------------------------------*/
69
70
71#define __MATCH_C       /* identifies this source module */
72
73/* define ToLower() in here (for Unix, define ToLower to be macro (using
74 * isupper()); otherwise just use tolower() */
75#define UNZIP_INTERNAL
76#include "unzip.h"
77
78#ifndef THEOS   /* the Theos port defines its own variant of match() */
79
80#if 0  /* this is not useful until it matches Amiga names insensitively */
81#ifdef AMIGA        /* some other platforms might also want to use this */
82#  define ANSI_CHARSET       /* MOVE INTO UNZIP.H EVENTUALLY */
83#endif
84#endif /* 0 */
85
86#ifdef ANSI_CHARSET
87#  ifdef ToLower
88#    undef ToLower
89#  endif
90   /* uppercase letters are values 41 thru 5A, C0 thru D6, and D8 thru DE */
91#  define IsUpper(c) (c>=0xC0 ? c<=0xDE && c!=0xD7 : c>=0x41 && c<=0x5A)
92#  define ToLower(c) (IsUpper((uch) c) ? (unsigned) c | 0x20 : (unsigned) c)
93#endif
94#define Case(x)  (ic? ToLower(x) : (x))
95
96#ifdef VMSWILD
97#  define WILDCHAR   '%'
98#  define BEG_RANGE  '('
99#  define END_RANGE  ')'
100#else
101#  define WILDCHAR   '?'
102#  define BEG_RANGE  '['
103#  define END_RANGE  ']'
104#endif
105
106#if 0                /* GRR:  add this to unzip.h someday... */
107#if !(defined(MSDOS) && defined(DOSWILD))
108#define match(s,p,ic)   (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic) == 1)
109int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, int ignore_case));
110#endif
111#endif /* 0 */
112static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
113                        int ignore_case));
114
115
116
117/* match() is a shell to recmatch() to return only Boolean values. */
118
119int match(string, pattern, ignore_case)
120    ZCONST char *string, *pattern;
121    int ignore_case;
122{
123#if (defined(MSDOS) && defined(DOSWILD))
124    char *dospattern;
125    int j = strlen(pattern);
126
127/*---------------------------------------------------------------------------
128    Optional MS-DOS preprocessing section:  compare last three chars of the
129    wildcard to "*.*" and translate to "*" if found; else compare the last
130    two characters to "*." and, if found, scan the non-wild string for dots.
131    If in the latter case a dot is found, return failure; else translate the
132    "*." to "*".  In either case, continue with the normal (Unix-like) match
133    procedure after translation.  (If not enough memory, default to normal
134    match.)  This causes "a*.*" and "a*." to behave as MS-DOS users expect.
135  ---------------------------------------------------------------------------*/
136
137    if ((dospattern = (char *)malloc(j+1)) != NULL) {
138        strcpy(dospattern, pattern);
139        if (!strcmp(dospattern+j-3, "*.*")) {
140            dospattern[j-2] = '\0';                    /* nuke the ".*" */
141        } else if (!strcmp(dospattern+j-2, "*.")) {
142            char *p = MBSCHR(string, '.');
143
144            if (p) {   /* found a dot:  match fails */
145                free(dospattern);
146                return 0;
147            }
148            dospattern[j-1] = '\0';                    /* nuke the end "." */
149        }
150        j = recmatch((uch *)dospattern, (uch *)string, ignore_case);
151        free(dospattern);
152        return j == 1;
153    } else
154#endif /* MSDOS && DOSWILD */
155    return recmatch((uch *)pattern, (uch *)string, ignore_case) == 1;
156}
157
158
159
160static int recmatch(p, s, ic)
161    ZCONST uch *p;        /* sh pattern to match */
162    ZCONST uch *s;        /* string to which to match it */
163    int ic;               /* true for case insensitivity */
164/* Recursively compare the sh pattern p with the string s and return 1 if
165 * they match, and 0 or 2 if they don't or if there is a syntax error in the
166 * pattern.  This routine recurses on itself no more deeply than the number
167 * of characters in the pattern. */
168{
169    unsigned int c;       /* pattern char or start of range in [-] loop */
170
171    /* Get first character, the pattern for new recmatch calls follows */
172    c = *p; INCSTR(p);
173
174    /* If that was the end of the pattern, match if string empty too */
175    if (c == 0)
176        return *s == 0;
177
178    /* '?' (or '%') matches any character (but not an empty string).
179     * If WILD_STOP_AT_DIR is defined, it won't match '/' */
180    if (c == WILDCHAR)
181#ifdef WILD_STOP_AT_DIR
182        return (*s && *s != '/') ? recmatch(p, s + CLEN(s), ic) : 0;
183#else
184        return *s ? recmatch(p, s + CLEN(s), ic) : 0;
185#endif
186
187    /* '*' matches any number of characters, including zero */
188#ifdef AMIGA
189    if (c == '#' && *p == '?')     /* "#?" is Amiga-ese for "*" */
190        c = '*', p++;
191#endif /* AMIGA */
192    if (c == '*') {
193#ifdef WILD_STOP_AT_DIR
194#  ifdef AMIGA
195        if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
196            c = '*', p++;
197        if (c != '*') {
198#  else /* !AMIGA */
199        if (*p != '*') {
200#  endif /* ?AMIGA */
201            /* single '*': this doesn't match slashes */
202            for (; *s && *s != '/'; INCSTR(s))
203                if ((c = recmatch(p, s, ic)) != 0)
204                    return (int)c;
205            /* end of pattern: matched if at end of string, else continue */
206            if (*p == 0)
207                return (*s == 0);
208            /* continue to match if at '/' in pattern, else give up */
209            return (*p == '/' || (*p == '\\' && p[1] == '/'))
210                   ? recmatch(p, s, ic) : 2;
211        }
212        /* '**': this matches slashes */
213        ++p;        /* move p behind the second '*' */
214        /* continue with the non-WILD_STOP_AT_DIR code variant */
215#endif /* WILD_STOP_AT_DIR */
216        if (*p == 0)
217            return 1;
218        for (; *s; INCSTR(s))
219            if ((c = recmatch(p, s, ic)) != 0)
220                return (int)c;
221        return 2;       /* 2 means give up--match will return false */
222    }
223
224    /* Parse and process the list of characters and ranges in brackets */
225    if (c == BEG_RANGE) {
226        int e;          /* flag true if next char to be taken literally */
227        ZCONST uch *q;  /* pointer to end of [-] group */
228        int r;          /* flag true to match anything but the range */
229
230        if (*s == 0)                            /* need a character to match */
231            return 0;
232        p += (r = (*p == '!' || *p == '^'));    /* see if reverse */
233        for (q = p, e = 0; *q; INCSTR(q))       /* find closing bracket */
234            if (e)
235                e = 0;
236            else
237                if (*q == '\\')      /* GRR:  change to ^ for MS-DOS, OS/2? */
238                    e = 1;
239                else if (*q == END_RANGE)
240                    break;
241        if (*q != END_RANGE)         /* nothing matches if bad syntax */
242            return 0;
243        for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
244            /* go through the list */
245            if (!e && *p == '\\')               /* set escape flag if \ */
246                e = 1;
247            else if (!e && *p == '-')           /* set start of range if - */
248                c = *(p-1);
249            else {
250                unsigned int cc = Case(*s);
251
252                if (*(p+1) != '-')
253                    for (c = c ? c : *p; c <= *p; c++)  /* compare range */
254                        if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
255                            return r ? 0 : recmatch(q + 1, s + 1, ic);
256                c = e = 0;   /* clear range, escape flags */
257            }
258        }
259        return r ? recmatch(q + CLEN(q), s + CLEN(s), ic) : 0;
260                                        /* bracket match failed */
261    }
262
263    /* if escape ('\'), just compare next character */
264    if (c == '\\' && (c = *p++) == 0)     /* if \ at end, then syntax error */
265        return 0;
266
267    /* just a character--compare it */
268#ifdef QDOS
269    return QMatch(Case((uch)c), Case(*s)) ? recmatch(p, s + CLEN(s), ic) : 0;
270#else
271    return Case((uch)c) == Case(*s) ? recmatch(p, s + CLEN(s), ic) : 0;
272#endif
273
274} /* end function recmatch() */
275
276#endif /* !THEOS */
277
278
279
280
281int iswild(p)        /* originally only used for stat()-bug workaround in */
282    ZCONST char *p;  /*  VAX C, Turbo/Borland C, Watcom C, Atari MiNT libs; */
283{                    /*  now used in process_zipfiles() as well */
284    for (; *p; INCSTR(p))
285        if (*p == '\\' && *(p+1))
286            ++p;
287#ifdef THEOS
288        else if (*p == '?' || *p == '*' || *p=='#'|| *p == '@')
289#else /* !THEOS */
290#ifdef VMS
291        else if (*p == '%' || *p == '*')
292#else /* !VMS */
293#ifdef AMIGA
294        else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[')
295#else /* !AMIGA */
296        else if (*p == '?' || *p == '*' || *p == '[')
297#endif /* ?AMIGA */
298#endif /* ?VMS */
299#endif /* ?THEOS */
300#ifdef QDOS
301            return (int)p;
302#else
303            return TRUE;
304#endif
305
306    return FALSE;
307
308} /* end function iswild() */
309
310
311
312
313
314#ifdef TEST_MATCH
315
316#define put(s) {fputs(s,stdout); fflush(stdout);}
317#ifdef main
318#  undef main
319#endif
320
321int main(int argc, char **argv)
322{
323    char pat[256], str[256];
324
325    for (;;) {
326        put("Pattern (return to exit): ");
327        gets(pat);
328        if (!pat[0])
329            break;
330        for (;;) {
331            put("String (return for new pattern): ");
332            gets(str);
333            if (!str[0])
334                break;
335            printf("Case sensitive: %s  insensitive: %s\n",
336              match(str, pat, 0) ? "YES" : "NO",
337              match(str, pat, 1) ? "YES" : "NO");
338        }
339    }
340    EXIT(0);
341}
342
343#endif /* TEST_MATCH */
344