1238825Smm/*- 2238825Smm * Copyright (c) 2003-2007 Tim Kientzle 3238825Smm * All rights reserved. 4238825Smm * 5238825Smm * Redistribution and use in source and binary forms, with or without 6238825Smm * modification, are permitted provided that the following conditions 7238825Smm * are met: 8238825Smm * 1. Redistributions of source code must retain the above copyright 9238825Smm * notice, this list of conditions and the following disclaimer 10238825Smm * in this position and unchanged. 11238825Smm * 2. Redistributions in binary form must reproduce the above copyright 12238825Smm * notice, this list of conditions and the following disclaimer in the 13238825Smm * documentation and/or other materials provided with the distribution. 14238825Smm * 15238825Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16238825Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17238825Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18238825Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19238825Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20238825Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21238825Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22238825Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23238825Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24238825Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25238825Smm */ 26238825Smm 27238825Smm#include "archive_platform.h" 28238825Smm__FBSDID("$FreeBSD$"); 29238825Smm 30238825Smm#ifdef HAVE_STRING_H 31238825Smm#include <string.h> 32238825Smm#endif 33238825Smm#ifdef HAVE_WCHAR_H 34238825Smm#include <wchar.h> 35238825Smm#endif 36238825Smm 37238825Smm#include "archive_pathmatch.h" 38238825Smm 39238825Smm/* 40238825Smm * Check whether a character 'c' is matched by a list specification [...]: 41238825Smm * * Leading '!' or '^' negates the class. 42238825Smm * * <char>-<char> is a range of characters 43238825Smm * * \<char> removes any special meaning for <char> 44238825Smm * 45238825Smm * Some interesting boundary cases: 46238825Smm * a-d-e is one range (a-d) followed by two single characters - and e. 47238825Smm * \a-\d is same as a-d 48238825Smm * a\-d is three single characters: a, d, - 49238825Smm * Trailing - is not special (so [a-] is two characters a and -). 50238825Smm * Initial - is not special ([a-] is same as [-a] is same as [\\-a]) 51238825Smm * This function never sees a trailing \. 52238825Smm * [] always fails 53238825Smm * [!] always succeeds 54238825Smm */ 55238825Smmstatic int 56238825Smmpm_list(const char *start, const char *end, const char c, int flags) 57238825Smm{ 58238825Smm const char *p = start; 59238825Smm char rangeStart = '\0', nextRangeStart; 60238825Smm int match = 1, nomatch = 0; 61238825Smm 62238825Smm /* This will be used soon... */ 63238825Smm (void)flags; /* UNUSED */ 64238825Smm 65238825Smm /* If this is a negated class, return success for nomatch. */ 66238825Smm if ((*p == '!' || *p == '^') && p < end) { 67238825Smm match = 0; 68238825Smm nomatch = 1; 69238825Smm ++p; 70238825Smm } 71238825Smm 72238825Smm while (p < end) { 73238825Smm nextRangeStart = '\0'; 74238825Smm switch (*p) { 75238825Smm case '-': 76238825Smm /* Trailing or initial '-' is not special. */ 77238825Smm if ((rangeStart == '\0') || (p == end - 1)) { 78238825Smm if (*p == c) 79238825Smm return (match); 80238825Smm } else { 81238825Smm char rangeEnd = *++p; 82238825Smm if (rangeEnd == '\\') 83238825Smm rangeEnd = *++p; 84238825Smm if ((rangeStart <= c) && (c <= rangeEnd)) 85238825Smm return (match); 86238825Smm } 87238825Smm break; 88238825Smm case '\\': 89238825Smm ++p; 90238825Smm /* Fall through */ 91238825Smm default: 92238825Smm if (*p == c) 93238825Smm return (match); 94238825Smm nextRangeStart = *p; /* Possible start of range. */ 95238825Smm } 96238825Smm rangeStart = nextRangeStart; 97238825Smm ++p; 98238825Smm } 99238825Smm return (nomatch); 100238825Smm} 101238825Smm 102238825Smmstatic int 103238825Smmpm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags) 104238825Smm{ 105238825Smm const wchar_t *p = start; 106238825Smm wchar_t rangeStart = L'\0', nextRangeStart; 107238825Smm int match = 1, nomatch = 0; 108238825Smm 109238825Smm /* This will be used soon... */ 110238825Smm (void)flags; /* UNUSED */ 111238825Smm 112238825Smm /* If this is a negated class, return success for nomatch. */ 113238825Smm if ((*p == L'!' || *p == L'^') && p < end) { 114238825Smm match = 0; 115238825Smm nomatch = 1; 116238825Smm ++p; 117238825Smm } 118238825Smm 119238825Smm while (p < end) { 120238825Smm nextRangeStart = L'\0'; 121238825Smm switch (*p) { 122238825Smm case L'-': 123238825Smm /* Trailing or initial '-' is not special. */ 124238825Smm if ((rangeStart == L'\0') || (p == end - 1)) { 125238825Smm if (*p == c) 126238825Smm return (match); 127238825Smm } else { 128238825Smm wchar_t rangeEnd = *++p; 129238825Smm if (rangeEnd == L'\\') 130238825Smm rangeEnd = *++p; 131238825Smm if ((rangeStart <= c) && (c <= rangeEnd)) 132238825Smm return (match); 133238825Smm } 134238825Smm break; 135238825Smm case L'\\': 136238825Smm ++p; 137238825Smm /* Fall through */ 138238825Smm default: 139238825Smm if (*p == c) 140238825Smm return (match); 141238825Smm nextRangeStart = *p; /* Possible start of range. */ 142238825Smm } 143238825Smm rangeStart = nextRangeStart; 144238825Smm ++p; 145238825Smm } 146238825Smm return (nomatch); 147238825Smm} 148238825Smm 149238825Smm/* 150238825Smm * If s is pointing to "./", ".//", "./././" or the like, skip it. 151238825Smm */ 152238825Smmstatic const char * 153238825Smmpm_slashskip(const char *s) { 154238825Smm while ((*s == '/') 155238825Smm || (s[0] == '.' && s[1] == '/') 156238825Smm || (s[0] == '.' && s[1] == '\0')) 157238825Smm ++s; 158238825Smm return (s); 159238825Smm} 160238825Smm 161238825Smmstatic const wchar_t * 162238825Smmpm_slashskip_w(const wchar_t *s) { 163238825Smm while ((*s == L'/') 164238825Smm || (s[0] == L'.' && s[1] == L'/') 165238825Smm || (s[0] == L'.' && s[1] == L'\0')) 166238825Smm ++s; 167238825Smm return (s); 168238825Smm} 169238825Smm 170238825Smmstatic int 171238825Smmpm(const char *p, const char *s, int flags) 172238825Smm{ 173238825Smm const char *end; 174238825Smm 175238825Smm /* 176238825Smm * Ignore leading './', './/', '././', etc. 177238825Smm */ 178238825Smm if (s[0] == '.' && s[1] == '/') 179238825Smm s = pm_slashskip(s + 1); 180238825Smm if (p[0] == '.' && p[1] == '/') 181238825Smm p = pm_slashskip(p + 1); 182238825Smm 183238825Smm for (;;) { 184238825Smm switch (*p) { 185238825Smm case '\0': 186238825Smm if (s[0] == '/') { 187238825Smm if (flags & PATHMATCH_NO_ANCHOR_END) 188238825Smm return (1); 189238825Smm /* "dir" == "dir/" == "dir/." */ 190238825Smm s = pm_slashskip(s); 191238825Smm } 192238825Smm return (*s == '\0'); 193238825Smm case '?': 194238825Smm /* ? always succeeds, unless we hit end of 's' */ 195238825Smm if (*s == '\0') 196238825Smm return (0); 197238825Smm break; 198238825Smm case '*': 199238825Smm /* "*" == "**" == "***" ... */ 200238825Smm while (*p == '*') 201238825Smm ++p; 202238825Smm /* Trailing '*' always succeeds. */ 203238825Smm if (*p == '\0') 204238825Smm return (1); 205238825Smm while (*s) { 206238825Smm if (archive_pathmatch(p, s, flags)) 207238825Smm return (1); 208238825Smm ++s; 209238825Smm } 210238825Smm return (0); 211238825Smm case '[': 212238825Smm /* 213238825Smm * Find the end of the [...] character class, 214238825Smm * ignoring \] that might occur within the class. 215238825Smm */ 216238825Smm end = p + 1; 217238825Smm while (*end != '\0' && *end != ']') { 218238825Smm if (*end == '\\' && end[1] != '\0') 219238825Smm ++end; 220238825Smm ++end; 221238825Smm } 222238825Smm if (*end == ']') { 223238825Smm /* We found [...], try to match it. */ 224238825Smm if (!pm_list(p + 1, end, *s, flags)) 225238825Smm return (0); 226238825Smm p = end; /* Jump to trailing ']' char. */ 227238825Smm break; 228238825Smm } else 229238825Smm /* No final ']', so just match '['. */ 230238825Smm if (*p != *s) 231238825Smm return (0); 232238825Smm break; 233238825Smm case '\\': 234238825Smm /* Trailing '\\' matches itself. */ 235238825Smm if (p[1] == '\0') { 236238825Smm if (*s != '\\') 237238825Smm return (0); 238238825Smm } else { 239238825Smm ++p; 240238825Smm if (*p != *s) 241238825Smm return (0); 242238825Smm } 243238825Smm break; 244238825Smm case '/': 245238825Smm if (*s != '/' && *s != '\0') 246238825Smm return (0); 247238825Smm /* Note: pattern "/\./" won't match "/"; 248238825Smm * pm_slashskip() correctly stops at backslash. */ 249238825Smm p = pm_slashskip(p); 250238825Smm s = pm_slashskip(s); 251238825Smm if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 252238825Smm return (1); 253238825Smm --p; /* Counteract the increment below. */ 254238825Smm --s; 255238825Smm break; 256238825Smm case '$': 257238825Smm /* '$' is special only at end of pattern and only 258238825Smm * if PATHMATCH_NO_ANCHOR_END is specified. */ 259238825Smm if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 260238825Smm /* "dir" == "dir/" == "dir/." */ 261238825Smm return (*pm_slashskip(s) == '\0'); 262238825Smm } 263238825Smm /* Otherwise, '$' is not special. */ 264238825Smm /* FALL THROUGH */ 265238825Smm default: 266238825Smm if (*p != *s) 267238825Smm return (0); 268238825Smm break; 269238825Smm } 270238825Smm ++p; 271238825Smm ++s; 272238825Smm } 273238825Smm} 274238825Smm 275238825Smmstatic int 276238825Smmpm_w(const wchar_t *p, const wchar_t *s, int flags) 277238825Smm{ 278238825Smm const wchar_t *end; 279238825Smm 280238825Smm /* 281238825Smm * Ignore leading './', './/', '././', etc. 282238825Smm */ 283238825Smm if (s[0] == L'.' && s[1] == L'/') 284238825Smm s = pm_slashskip_w(s + 1); 285238825Smm if (p[0] == L'.' && p[1] == L'/') 286238825Smm p = pm_slashskip_w(p + 1); 287238825Smm 288238825Smm for (;;) { 289238825Smm switch (*p) { 290238825Smm case L'\0': 291238825Smm if (s[0] == L'/') { 292238825Smm if (flags & PATHMATCH_NO_ANCHOR_END) 293238825Smm return (1); 294238825Smm /* "dir" == "dir/" == "dir/." */ 295238825Smm s = pm_slashskip_w(s); 296238825Smm } 297238825Smm return (*s == L'\0'); 298238825Smm case L'?': 299238825Smm /* ? always succeeds, unless we hit end of 's' */ 300238825Smm if (*s == L'\0') 301238825Smm return (0); 302238825Smm break; 303238825Smm case L'*': 304238825Smm /* "*" == "**" == "***" ... */ 305238825Smm while (*p == L'*') 306238825Smm ++p; 307238825Smm /* Trailing '*' always succeeds. */ 308238825Smm if (*p == L'\0') 309238825Smm return (1); 310238825Smm while (*s) { 311238825Smm if (archive_pathmatch_w(p, s, flags)) 312238825Smm return (1); 313238825Smm ++s; 314238825Smm } 315238825Smm return (0); 316238825Smm case L'[': 317238825Smm /* 318238825Smm * Find the end of the [...] character class, 319238825Smm * ignoring \] that might occur within the class. 320238825Smm */ 321238825Smm end = p + 1; 322238825Smm while (*end != L'\0' && *end != L']') { 323238825Smm if (*end == L'\\' && end[1] != L'\0') 324238825Smm ++end; 325238825Smm ++end; 326238825Smm } 327238825Smm if (*end == L']') { 328238825Smm /* We found [...], try to match it. */ 329238825Smm if (!pm_list_w(p + 1, end, *s, flags)) 330238825Smm return (0); 331238825Smm p = end; /* Jump to trailing ']' char. */ 332238825Smm break; 333238825Smm } else 334238825Smm /* No final ']', so just match '['. */ 335238825Smm if (*p != *s) 336238825Smm return (0); 337238825Smm break; 338238825Smm case L'\\': 339238825Smm /* Trailing '\\' matches itself. */ 340238825Smm if (p[1] == L'\0') { 341238825Smm if (*s != L'\\') 342238825Smm return (0); 343238825Smm } else { 344238825Smm ++p; 345238825Smm if (*p != *s) 346238825Smm return (0); 347238825Smm } 348238825Smm break; 349238825Smm case L'/': 350238825Smm if (*s != L'/' && *s != L'\0') 351238825Smm return (0); 352238825Smm /* Note: pattern "/\./" won't match "/"; 353238825Smm * pm_slashskip() correctly stops at backslash. */ 354238825Smm p = pm_slashskip_w(p); 355238825Smm s = pm_slashskip_w(s); 356238825Smm if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 357238825Smm return (1); 358238825Smm --p; /* Counteract the increment below. */ 359238825Smm --s; 360238825Smm break; 361238825Smm case L'$': 362238825Smm /* '$' is special only at end of pattern and only 363238825Smm * if PATHMATCH_NO_ANCHOR_END is specified. */ 364238825Smm if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 365238825Smm /* "dir" == "dir/" == "dir/." */ 366238825Smm return (*pm_slashskip_w(s) == L'\0'); 367238825Smm } 368238825Smm /* Otherwise, '$' is not special. */ 369238825Smm /* FALL THROUGH */ 370238825Smm default: 371238825Smm if (*p != *s) 372238825Smm return (0); 373238825Smm break; 374238825Smm } 375238825Smm ++p; 376238825Smm ++s; 377238825Smm } 378238825Smm} 379238825Smm 380238825Smm/* Main entry point. */ 381238825Smmint 382238825Smm__archive_pathmatch(const char *p, const char *s, int flags) 383238825Smm{ 384238825Smm /* Empty pattern only matches the empty string. */ 385238825Smm if (p == NULL || *p == '\0') 386238825Smm return (s == NULL || *s == '\0'); 387370535Sgit2svn else if (s == NULL) 388370535Sgit2svn return (0); 389238825Smm 390238825Smm /* Leading '^' anchors the start of the pattern. */ 391238825Smm if (*p == '^') { 392238825Smm ++p; 393238825Smm flags &= ~PATHMATCH_NO_ANCHOR_START; 394238825Smm } 395238825Smm 396238825Smm if (*p == '/' && *s != '/') 397238825Smm return (0); 398238825Smm 399299529Smm /* Certain patterns anchor implicitly. */ 400299529Smm if (*p == '*' || *p == '/') { 401238825Smm while (*p == '/') 402238825Smm ++p; 403238825Smm while (*s == '/') 404238825Smm ++s; 405238825Smm return (pm(p, s, flags)); 406238825Smm } 407238825Smm 408238825Smm /* If start is unanchored, try to match start of each path element. */ 409238825Smm if (flags & PATHMATCH_NO_ANCHOR_START) { 410238825Smm for ( ; s != NULL; s = strchr(s, '/')) { 411238825Smm if (*s == '/') 412238825Smm s++; 413238825Smm if (pm(p, s, flags)) 414238825Smm return (1); 415238825Smm } 416238825Smm return (0); 417238825Smm } 418238825Smm 419238825Smm /* Default: Match from beginning. */ 420238825Smm return (pm(p, s, flags)); 421238825Smm} 422238825Smm 423238825Smmint 424238825Smm__archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags) 425238825Smm{ 426238825Smm /* Empty pattern only matches the empty string. */ 427238825Smm if (p == NULL || *p == L'\0') 428238825Smm return (s == NULL || *s == L'\0'); 429370535Sgit2svn else if (s == NULL) 430370535Sgit2svn return (0); 431238825Smm 432238825Smm /* Leading '^' anchors the start of the pattern. */ 433238825Smm if (*p == L'^') { 434238825Smm ++p; 435238825Smm flags &= ~PATHMATCH_NO_ANCHOR_START; 436238825Smm } 437238825Smm 438238825Smm if (*p == L'/' && *s != L'/') 439238825Smm return (0); 440238825Smm 441299529Smm /* Certain patterns anchor implicitly. */ 442299529Smm if (*p == L'*' || *p == L'/') { 443238825Smm while (*p == L'/') 444238825Smm ++p; 445238825Smm while (*s == L'/') 446238825Smm ++s; 447238825Smm return (pm_w(p, s, flags)); 448238825Smm } 449238825Smm 450238825Smm /* If start is unanchored, try to match start of each path element. */ 451238825Smm if (flags & PATHMATCH_NO_ANCHOR_START) { 452238825Smm for ( ; s != NULL; s = wcschr(s, L'/')) { 453238825Smm if (*s == L'/') 454238825Smm s++; 455238825Smm if (pm_w(p, s, flags)) 456238825Smm return (1); 457238825Smm } 458238825Smm return (0); 459238825Smm } 460238825Smm 461238825Smm /* Default: Match from beginning. */ 462238825Smm return (pm_w(p, s, flags)); 463238825Smm} 464