1/* Copyright (C) 1991-2020 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 The GNU C Library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public 6 License as published by the Free Software Foundation; either 7 version 3 of the License, or (at your option) any later version. 8 9 The GNU C Library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 General Public License for more details. 13 14 You should have received a copy of the GNU General Public 15 License along with the GNU C Library; if not, see 16 <https://www.gnu.org/licenses/>. */ 17 18#ifdef _LIBC 19# include <stdint.h> 20#endif 21 22struct STRUCT 23{ 24 const CHAR *pattern; 25 const CHAR *string; 26 bool no_leading_period; 27}; 28 29/* Match STRING against the file name pattern PATTERN, returning zero if 30 it matches, nonzero if not. */ 31static int FCT (const CHAR *pattern, const CHAR *string, 32 const CHAR *string_end, bool no_leading_period, int flags, 33 struct STRUCT *ends, size_t alloca_used); 34static int EXT (INT opt, const CHAR *pattern, const CHAR *string, 35 const CHAR *string_end, bool no_leading_period, int flags, 36 size_t alloca_used); 37static const CHAR *END (const CHAR *patternp); 38 39static int 40FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, 41 bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) 42{ 43 const CHAR *p = pattern, *n = string; 44 UCHAR c; 45#ifdef _LIBC 46# if WIDE_CHAR_VERSION 47 const char *collseq = (const char *) 48 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 49# else 50 const UCHAR *collseq = (const UCHAR *) 51 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); 52# endif 53#endif 54 55 while ((c = *p++) != L_('\0')) 56 { 57 bool new_no_leading_period = false; 58 c = FOLD (c); 59 60 switch (c) 61 { 62 case L_('?'): 63 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') 64 { 65 int res = EXT (c, p, n, string_end, no_leading_period, 66 flags, alloca_used); 67 if (res != -1) 68 return res; 69 } 70 71 if (n == string_end) 72 return FNM_NOMATCH; 73 else if (*n == L_('/') && (flags & FNM_FILE_NAME)) 74 return FNM_NOMATCH; 75 else if (*n == L_('.') && no_leading_period) 76 return FNM_NOMATCH; 77 break; 78 79 case L_('\\'): 80 if (!(flags & FNM_NOESCAPE)) 81 { 82 c = *p++; 83 if (c == L_('\0')) 84 /* Trailing \ loses. */ 85 return FNM_NOMATCH; 86 c = FOLD (c); 87 } 88 if (n == string_end || FOLD ((UCHAR) *n) != c) 89 return FNM_NOMATCH; 90 break; 91 92 case L_('*'): 93 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') 94 { 95 int res = EXT (c, p, n, string_end, no_leading_period, 96 flags, alloca_used); 97 if (res != -1) 98 return res; 99 } 100 else if (ends != NULL) 101 { 102 ends->pattern = p - 1; 103 ends->string = n; 104 ends->no_leading_period = no_leading_period; 105 return 0; 106 } 107 108 if (n != string_end && *n == L_('.') && no_leading_period) 109 return FNM_NOMATCH; 110 111 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) 112 { 113 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) 114 { 115 const CHAR *endp = END (p); 116 if (endp != p) 117 { 118 /* This is a pattern. Skip over it. */ 119 p = endp; 120 continue; 121 } 122 } 123 124 if (c == L_('?')) 125 { 126 /* A ? needs to match one character. */ 127 if (n == string_end) 128 /* There isn't another character; no match. */ 129 return FNM_NOMATCH; 130 else if (*n == L_('/') 131 && __glibc_unlikely (flags & FNM_FILE_NAME)) 132 /* A slash does not match a wildcard under 133 FNM_FILE_NAME. */ 134 return FNM_NOMATCH; 135 else 136 /* One character of the string is consumed in matching 137 this ? wildcard, so *??? won't match if there are 138 less than three characters. */ 139 ++n; 140 } 141 } 142 143 if (c == L_('\0')) 144 /* The wildcard(s) is/are the last element of the pattern. 145 If the name is a file name and contains another slash 146 this means it cannot match, unless the FNM_LEADING_DIR 147 flag is set. */ 148 { 149 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; 150 151 if (flags & FNM_FILE_NAME) 152 { 153 if (flags & FNM_LEADING_DIR) 154 result = 0; 155 else 156 { 157 if (MEMCHR (n, L_('/'), string_end - n) == NULL) 158 result = 0; 159 } 160 } 161 162 return result; 163 } 164 else 165 { 166 const CHAR *endp; 167 struct STRUCT end; 168 169 end.pattern = NULL; 170 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), 171 string_end - n); 172 if (endp == NULL) 173 endp = string_end; 174 175 if (c == L_('[') 176 || (__glibc_unlikely (flags & FNM_EXTMATCH) 177 && (c == L_('@') || c == L_('+') || c == L_('!')) 178 && *p == L_('('))) 179 { 180 int flags2 = ((flags & FNM_FILE_NAME) 181 ? flags : (flags & ~FNM_PERIOD)); 182 183 for (--p; n < endp; ++n, no_leading_period = false) 184 if (FCT (p, n, string_end, no_leading_period, flags2, 185 &end, alloca_used) == 0) 186 goto found; 187 } 188 else if (c == L_('/') && (flags & FNM_FILE_NAME)) 189 { 190 while (n < string_end && *n != L_('/')) 191 ++n; 192 if (n < string_end && *n == L_('/') 193 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, 194 NULL, alloca_used) == 0)) 195 return 0; 196 } 197 else 198 { 199 int flags2 = ((flags & FNM_FILE_NAME) 200 ? flags : (flags & ~FNM_PERIOD)); 201 202 if (c == L_('\\') && !(flags & FNM_NOESCAPE)) 203 c = *p; 204 c = FOLD (c); 205 for (--p; n < endp; ++n, no_leading_period = false) 206 if (FOLD ((UCHAR) *n) == c 207 && (FCT (p, n, string_end, no_leading_period, flags2, 208 &end, alloca_used) == 0)) 209 { 210 found: 211 if (end.pattern == NULL) 212 return 0; 213 break; 214 } 215 if (end.pattern != NULL) 216 { 217 p = end.pattern; 218 n = end.string; 219 no_leading_period = end.no_leading_period; 220 continue; 221 } 222 } 223 } 224 225 /* If we come here no match is possible with the wildcard. */ 226 return FNM_NOMATCH; 227 228 case L_('['): 229 { 230 /* Nonzero if the sense of the character class is inverted. */ 231 const CHAR *p_init = p; 232 const CHAR *n_init = n; 233 bool not; 234 CHAR cold; 235 UCHAR fn; 236 237 if (posixly_correct == 0) 238 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 239 240 if (n == string_end) 241 return FNM_NOMATCH; 242 243 if (*n == L_('.') && no_leading_period) 244 return FNM_NOMATCH; 245 246 if (*n == L_('/') && (flags & FNM_FILE_NAME)) 247 /* '/' cannot be matched. */ 248 return FNM_NOMATCH; 249 250 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); 251 if (not) 252 ++p; 253 254 fn = FOLD ((UCHAR) *n); 255 256 c = *p++; 257 for (;;) 258 { 259 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) 260 { 261 if (*p == L_('\0')) 262 return FNM_NOMATCH; 263 c = FOLD ((UCHAR) *p); 264 ++p; 265 266 goto normal_bracket; 267 } 268 else if (c == L_('[') && *p == L_(':')) 269 { 270 /* Leave room for the null. */ 271 CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; 272 size_t c1 = 0; 273 wctype_t wt; 274 const CHAR *startp = p; 275 276 for (;;) 277 { 278 if (c1 == CHAR_CLASS_MAX_LENGTH) 279 /* The name is too long and therefore the pattern 280 is ill-formed. */ 281 return FNM_NOMATCH; 282 283 c = *++p; 284 if (c == L_(':') && p[1] == L_(']')) 285 { 286 p += 2; 287 break; 288 } 289 if (c < L_('a') || c >= L_('z')) 290 { 291 /* This cannot possibly be a character class name. 292 Match it as a normal range. */ 293 p = startp; 294 c = L_('['); 295 goto normal_bracket; 296 } 297 str[c1++] = c; 298 } 299 str[c1] = L_('\0'); 300 301 wt = IS_CHAR_CLASS (str); 302 if (wt == 0) 303 /* Invalid character class name. */ 304 return FNM_NOMATCH; 305 306#if defined _LIBC && ! WIDE_CHAR_VERSION 307 /* The following code is glibc specific but does 308 there a good job in speeding up the code since 309 we can avoid the btowc() call. */ 310 if (_ISCTYPE ((UCHAR) *n, wt)) 311 goto matched; 312#else 313 if (iswctype (BTOWC ((UCHAR) *n), wt)) 314 goto matched; 315#endif 316 c = *p++; 317 } 318#ifdef _LIBC 319 else if (c == L_('[') && *p == L_('=')) 320 { 321 /* It's important that STR be a scalar variable rather 322 than a one-element array, because GCC (at least 4.9.2 323 -O2 on x86-64) can be confused by the array and 324 diagnose a "used initialized" in a dead branch in the 325 findidx function. */ 326 UCHAR str; 327 uint32_t nrules = 328 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 329 const CHAR *startp = p; 330 331 c = *++p; 332 if (c == L_('\0')) 333 { 334 p = startp; 335 c = L_('['); 336 goto normal_bracket; 337 } 338 str = c; 339 340 c = *++p; 341 if (c != L_('=') || p[1] != L_(']')) 342 { 343 p = startp; 344 c = L_('['); 345 goto normal_bracket; 346 } 347 p += 2; 348 349 if (nrules == 0) 350 { 351 if ((UCHAR) *n == str) 352 goto matched; 353 } 354 else 355 { 356 const int32_t *table; 357# if WIDE_CHAR_VERSION 358 const int32_t *weights; 359 const wint_t *extra; 360# else 361 const unsigned char *weights; 362 const unsigned char *extra; 363# endif 364 const int32_t *indirect; 365 int32_t idx; 366 const UCHAR *cp = (const UCHAR *) &str; 367 368# if WIDE_CHAR_VERSION 369 table = (const int32_t *) 370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 371 weights = (const int32_t *) 372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 373 extra = (const wint_t *) 374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 375 indirect = (const int32_t *) 376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 377# else 378 table = (const int32_t *) 379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 380 weights = (const unsigned char *) 381 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 382 extra = (const unsigned char *) 383 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 384 indirect = (const int32_t *) 385 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 386# endif 387 388 idx = FINDIDX (table, indirect, extra, &cp, 1); 389 if (idx != 0) 390 { 391 /* We found a table entry. Now see whether the 392 character we are currently at has the same 393 equivalence class value. */ 394 int len = weights[idx & 0xffffff]; 395 int32_t idx2; 396 const UCHAR *np = (const UCHAR *) n; 397 398 idx2 = FINDIDX (table, indirect, extra, 399 &np, string_end - n); 400 if (idx2 != 0 401 && (idx >> 24) == (idx2 >> 24) 402 && len == weights[idx2 & 0xffffff]) 403 { 404 int cnt = 0; 405 406 idx &= 0xffffff; 407 idx2 &= 0xffffff; 408 409 while (cnt < len 410 && (weights[idx + 1 + cnt] 411 == weights[idx2 + 1 + cnt])) 412 ++cnt; 413 414 if (cnt == len) 415 goto matched; 416 } 417 } 418 } 419 420 c = *p++; 421 } 422#endif 423 else if (c == L_('\0')) 424 { 425 /* [ unterminated, treat as normal character. */ 426 p = p_init; 427 n = n_init; 428 c = L_('['); 429 goto normal_match; 430 } 431 else 432 { 433 bool is_range = false; 434 435#ifdef _LIBC 436 bool is_seqval = false; 437 438 if (c == L_('[') && *p == L_('.')) 439 { 440 uint32_t nrules = 441 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 442 const CHAR *startp = p; 443 size_t c1 = 0; 444 445 while (1) 446 { 447 c = *++p; 448 if (c == L_('.') && p[1] == L_(']')) 449 { 450 p += 2; 451 break; 452 } 453 if (c == '\0') 454 return FNM_NOMATCH; 455 ++c1; 456 } 457 458 /* We have to handling the symbols differently in 459 ranges since then the collation sequence is 460 important. */ 461 is_range = *p == L_('-') && p[1] != L_('\0'); 462 463 if (nrules == 0) 464 { 465 /* There are no names defined in the collation 466 data. Therefore we only accept the trivial 467 names consisting of the character itself. */ 468 if (c1 != 1) 469 return FNM_NOMATCH; 470 471 if (!is_range && *n == startp[1]) 472 goto matched; 473 474 cold = startp[1]; 475 c = *p++; 476 } 477 else 478 { 479 int32_t table_size; 480 const int32_t *symb_table; 481 const unsigned char *extra; 482 int32_t idx; 483 int32_t elem; 484# if WIDE_CHAR_VERSION 485 CHAR *wextra; 486# endif 487 488 table_size = 489 _NL_CURRENT_WORD (LC_COLLATE, 490 _NL_COLLATE_SYMB_HASH_SIZEMB); 491 symb_table = (const int32_t *) 492 _NL_CURRENT (LC_COLLATE, 493 _NL_COLLATE_SYMB_TABLEMB); 494 extra = (const unsigned char *) 495 _NL_CURRENT (LC_COLLATE, 496 _NL_COLLATE_SYMB_EXTRAMB); 497 498 for (elem = 0; elem < table_size; elem++) 499 if (symb_table[2 * elem] != 0) 500 { 501 idx = symb_table[2 * elem + 1]; 502 /* Skip the name of collating element. */ 503 idx += 1 + extra[idx]; 504# if WIDE_CHAR_VERSION 505 /* Skip the byte sequence of the 506 collating element. */ 507 idx += 1 + extra[idx]; 508 /* Adjust for the alignment. */ 509 idx = (idx + 3) & ~3; 510 511 wextra = (CHAR *) &extra[idx + 4]; 512 513 if (/* Compare the length of the sequence. */ 514 c1 == wextra[0] 515 /* Compare the wide char sequence. */ 516 && (__wmemcmp (startp + 1, &wextra[1], 517 c1) 518 == 0)) 519 /* Yep, this is the entry. */ 520 break; 521# else 522 if (/* Compare the length of the sequence. */ 523 c1 == extra[idx] 524 /* Compare the byte sequence. */ 525 && memcmp (startp + 1, 526 &extra[idx + 1], c1) == 0) 527 /* Yep, this is the entry. */ 528 break; 529# endif 530 } 531 532 if (elem < table_size) 533 { 534 /* Compare the byte sequence but only if 535 this is not part of a range. */ 536 if (! is_range 537 538# if WIDE_CHAR_VERSION 539 && __wmemcmp (n, &wextra[1], c1) == 0 540# else 541 && memcmp (n, &extra[idx + 1], c1) == 0 542# endif 543 ) 544 { 545 n += c1 - 1; 546 goto matched; 547 } 548 549 /* Get the collation sequence value. */ 550 is_seqval = true; 551# if WIDE_CHAR_VERSION 552 cold = wextra[1 + wextra[idx]]; 553# else 554 idx += 1 + extra[idx]; 555 /* Adjust for the alignment. */ 556 idx = (idx + 3) & ~3; 557 cold = *((int32_t *) &extra[idx]); 558# endif 559 560 c = *p++; 561 } 562 else if (c1 == 1) 563 { 564 /* No valid character. Match it as a 565 single byte. */ 566 if (!is_range && *n == startp[1]) 567 goto matched; 568 569 cold = startp[1]; 570 c = *p++; 571 } 572 else 573 return FNM_NOMATCH; 574 } 575 } 576 else 577#endif 578 { 579 c = FOLD (c); 580 normal_bracket: 581 582 /* We have to handling the symbols differently in 583 ranges since then the collation sequence is 584 important. */ 585 is_range = (*p == L_('-') && p[1] != L_('\0') 586 && p[1] != L_(']')); 587 588 if (!is_range && c == fn) 589 goto matched; 590 591#if _LIBC 592 /* This is needed if we goto normal_bracket; from 593 outside of is_seqval's scope. */ 594 is_seqval = false; 595#endif 596 cold = c; 597 c = *p++; 598 } 599 600 if (c == L_('-') && *p != L_(']')) 601 { 602#if _LIBC 603 /* We have to find the collation sequence 604 value for C. Collation sequence is nothing 605 we can regularly access. The sequence 606 value is defined by the order in which the 607 definitions of the collation values for the 608 various characters appear in the source 609 file. A strange concept, nowhere 610 documented. */ 611 uint32_t fcollseq; 612 uint32_t lcollseq; 613 UCHAR cend = *p++; 614 615# if WIDE_CHAR_VERSION 616 /* Search in the 'names' array for the characters. */ 617 fcollseq = __collseq_table_lookup (collseq, fn); 618 if (fcollseq == ~((uint32_t) 0)) 619 /* XXX We don't know anything about the character 620 we are supposed to match. This means we are 621 failing. */ 622 goto range_not_matched; 623 624 if (is_seqval) 625 lcollseq = cold; 626 else 627 lcollseq = __collseq_table_lookup (collseq, cold); 628# else 629 fcollseq = collseq[fn]; 630 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; 631# endif 632 633 is_seqval = false; 634 if (cend == L_('[') && *p == L_('.')) 635 { 636 uint32_t nrules = 637 _NL_CURRENT_WORD (LC_COLLATE, 638 _NL_COLLATE_NRULES); 639 const CHAR *startp = p; 640 size_t c1 = 0; 641 642 while (1) 643 { 644 c = *++p; 645 if (c == L_('.') && p[1] == L_(']')) 646 { 647 p += 2; 648 break; 649 } 650 if (c == '\0') 651 return FNM_NOMATCH; 652 ++c1; 653 } 654 655 if (nrules == 0) 656 { 657 /* There are no names defined in the 658 collation data. Therefore we only 659 accept the trivial names consisting 660 of the character itself. */ 661 if (c1 != 1) 662 return FNM_NOMATCH; 663 664 cend = startp[1]; 665 } 666 else 667 { 668 int32_t table_size; 669 const int32_t *symb_table; 670 const unsigned char *extra; 671 int32_t idx; 672 int32_t elem; 673# if WIDE_CHAR_VERSION 674 CHAR *wextra; 675# endif 676 677 table_size = 678 _NL_CURRENT_WORD (LC_COLLATE, 679 _NL_COLLATE_SYMB_HASH_SIZEMB); 680 symb_table = (const int32_t *) 681 _NL_CURRENT (LC_COLLATE, 682 _NL_COLLATE_SYMB_TABLEMB); 683 extra = (const unsigned char *) 684 _NL_CURRENT (LC_COLLATE, 685 _NL_COLLATE_SYMB_EXTRAMB); 686 687 for (elem = 0; elem < table_size; elem++) 688 if (symb_table[2 * elem] != 0) 689 { 690 idx = symb_table[2 * elem + 1]; 691 /* Skip the name of collating 692 element. */ 693 idx += 1 + extra[idx]; 694# if WIDE_CHAR_VERSION 695 /* Skip the byte sequence of the 696 collating element. */ 697 idx += 1 + extra[idx]; 698 /* Adjust for the alignment. */ 699 idx = (idx + 3) & ~3; 700 701 wextra = (CHAR *) &extra[idx + 4]; 702 703 if (/* Compare the length of the 704 sequence. */ 705 c1 == wextra[0] 706 /* Compare the wide char sequence. */ 707 && (__wmemcmp (startp + 1, 708 &wextra[1], c1) 709 == 0)) 710 /* Yep, this is the entry. */ 711 break; 712# else 713 if (/* Compare the length of the 714 sequence. */ 715 c1 == extra[idx] 716 /* Compare the byte sequence. */ 717 && memcmp (startp + 1, 718 &extra[idx + 1], c1) == 0) 719 /* Yep, this is the entry. */ 720 break; 721# endif 722 } 723 724 if (elem < table_size) 725 { 726 /* Get the collation sequence value. */ 727 is_seqval = true; 728# if WIDE_CHAR_VERSION 729 cend = wextra[1 + wextra[idx]]; 730# else 731 idx += 1 + extra[idx]; 732 /* Adjust for the alignment. */ 733 idx = (idx + 3) & ~3; 734 cend = *((int32_t *) &extra[idx]); 735# endif 736 } 737 else if (c1 == 1) 738 { 739 cend = startp[1]; 740 c = *p++; 741 } 742 else 743 return FNM_NOMATCH; 744 } 745 } 746 else 747 { 748 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) 749 cend = *p++; 750 if (cend == L_('\0')) 751 return FNM_NOMATCH; 752 cend = FOLD (cend); 753 } 754 755 /* XXX It is not entirely clear to me how to handle 756 characters which are not mentioned in the 757 collation specification. */ 758 if ( 759# if WIDE_CHAR_VERSION 760 lcollseq == 0xffffffff || 761# endif 762 lcollseq <= fcollseq) 763 { 764 /* We have to look at the upper bound. */ 765 uint32_t hcollseq; 766 767 if (is_seqval) 768 hcollseq = cend; 769 else 770 { 771# if WIDE_CHAR_VERSION 772 hcollseq = 773 __collseq_table_lookup (collseq, cend); 774 if (hcollseq == ~((uint32_t) 0)) 775 { 776 /* Hum, no information about the upper 777 bound. The matching succeeds if the 778 lower bound is matched exactly. */ 779 if (lcollseq != fcollseq) 780 goto range_not_matched; 781 782 goto matched; 783 } 784# else 785 hcollseq = collseq[cend]; 786# endif 787 } 788 789 if (lcollseq <= hcollseq && fcollseq <= hcollseq) 790 goto matched; 791 } 792# if WIDE_CHAR_VERSION 793 range_not_matched: 794# endif 795#else 796 /* We use a boring value comparison of the character 797 values. This is better than comparing using 798 'strcoll' since the latter would have surprising 799 and sometimes fatal consequences. */ 800 UCHAR cend = *p++; 801 802 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) 803 cend = *p++; 804 if (cend == L_('\0')) 805 return FNM_NOMATCH; 806 807 /* It is a range. */ 808 if ((UCHAR) cold <= fn && fn <= cend) 809 goto matched; 810#endif 811 812 c = *p++; 813 } 814 } 815 816 if (c == L_(']')) 817 break; 818 } 819 820 if (!not) 821 return FNM_NOMATCH; 822 break; 823 824 matched: 825 /* Skip the rest of the [...] that already matched. */ 826 while ((c = *p++) != L_(']')) 827 { 828 if (c == L_('\0')) 829 /* [... (unterminated) loses. */ 830 return FNM_NOMATCH; 831 832 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) 833 { 834 if (*p == L_('\0')) 835 return FNM_NOMATCH; 836 /* XXX 1003.2d11 is unclear if this is right. */ 837 ++p; 838 } 839 else if (c == L_('[') && *p == L_(':')) 840 { 841 int c1 = 0; 842 const CHAR *startp = p; 843 844 while (1) 845 { 846 c = *++p; 847 if (++c1 == CHAR_CLASS_MAX_LENGTH) 848 return FNM_NOMATCH; 849 850 if (*p == L_(':') && p[1] == L_(']')) 851 break; 852 853 if (c < L_('a') || c >= L_('z')) 854 { 855 p = startp - 2; 856 break; 857 } 858 } 859 p += 2; 860 } 861 else if (c == L_('[') && *p == L_('=')) 862 { 863 c = *++p; 864 if (c == L_('\0')) 865 return FNM_NOMATCH; 866 c = *++p; 867 if (c != L_('=') || p[1] != L_(']')) 868 return FNM_NOMATCH; 869 p += 2; 870 } 871 else if (c == L_('[') && *p == L_('.')) 872 { 873 while (1) 874 { 875 c = *++p; 876 if (c == L_('\0')) 877 return FNM_NOMATCH; 878 879 if (c == L_('.') && p[1] == L_(']')) 880 break; 881 } 882 p += 2; 883 } 884 } 885 if (not) 886 return FNM_NOMATCH; 887 } 888 break; 889 890 case L_('+'): 891 case L_('@'): 892 case L_('!'): 893 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') 894 { 895 int res = EXT (c, p, n, string_end, no_leading_period, flags, 896 alloca_used); 897 if (res != -1) 898 return res; 899 } 900 goto normal_match; 901 902 case L_('/'): 903 if (NO_LEADING_PERIOD (flags)) 904 { 905 if (n == string_end || c != (UCHAR) *n) 906 return FNM_NOMATCH; 907 908 new_no_leading_period = true; 909 break; 910 } 911 FALLTHROUGH; 912 default: 913 normal_match: 914 if (n == string_end || c != FOLD ((UCHAR) *n)) 915 return FNM_NOMATCH; 916 } 917 918 no_leading_period = new_no_leading_period; 919 ++n; 920 } 921 922 if (n == string_end) 923 return 0; 924 925 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) 926 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ 927 return 0; 928 929 return FNM_NOMATCH; 930} 931 932 933static const CHAR * 934END (const CHAR *pattern) 935{ 936 const CHAR *p = pattern; 937 938 while (1) 939 if (*++p == L_('\0')) 940 /* This is an invalid pattern. */ 941 return pattern; 942 else if (*p == L_('[')) 943 { 944 /* Handle brackets special. */ 945 if (posixly_correct == 0) 946 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 947 948 /* Skip the not sign. We have to recognize it because of a possibly 949 following ']'. */ 950 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) 951 ++p; 952 /* A leading ']' is recognized as such. */ 953 if (*p == L_(']')) 954 ++p; 955 /* Skip over all characters of the list. */ 956 while (*p != L_(']')) 957 if (*p++ == L_('\0')) 958 /* This is no valid pattern. */ 959 return pattern; 960 } 961 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') 962 || *p == L_('!')) && p[1] == L_('(')) 963 { 964 p = END (p + 1); 965 if (*p == L_('\0')) 966 /* This is an invalid pattern. */ 967 return pattern; 968 } 969 else if (*p == L_(')')) 970 break; 971 972 return p + 1; 973} 974 975 976static int 977EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, 978 bool no_leading_period, int flags, size_t alloca_used) 979{ 980 const CHAR *startp; 981 size_t level; 982 struct patternlist 983 { 984 struct patternlist *next; 985 CHAR malloced; 986 CHAR str[FLEXIBLE_ARRAY_MEMBER]; 987 } *list = NULL; 988 struct patternlist **lastp = &list; 989 size_t pattern_len = STRLEN (pattern); 990 bool any_malloced = false; 991 const CHAR *p; 992 const CHAR *rs; 993 int retval = 0; 994 995 /* Parse the pattern. Store the individual parts in the list. */ 996 level = 0; 997 for (startp = p = pattern + 1; ; ++p) 998 if (*p == L_('\0')) 999 { 1000 /* This is an invalid pattern. */ 1001 retval = -1; 1002 goto out; 1003 } 1004 else if (*p == L_('[')) 1005 { 1006 /* Handle brackets special. */ 1007 if (posixly_correct == 0) 1008 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 1009 1010 /* Skip the not sign. We have to recognize it because of a possibly 1011 following ']'. */ 1012 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) 1013 ++p; 1014 /* A leading ']' is recognized as such. */ 1015 if (*p == L_(']')) 1016 ++p; 1017 /* Skip over all characters of the list. */ 1018 while (*p != L_(']')) 1019 if (*p++ == L_('\0')) 1020 { 1021 /* This is no valid pattern. */ 1022 retval = -1; 1023 goto out; 1024 } 1025 } 1026 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') 1027 || *p == L_('!')) && p[1] == L_('(')) 1028 /* Remember the nesting level. */ 1029 ++level; 1030 else if (*p == L_(')')) 1031 { 1032 if (level-- == 0) 1033 { 1034 /* This means we found the end of the pattern. */ 1035#define NEW_PATTERN \ 1036 struct patternlist *newp; \ 1037 size_t plen = (opt == L_('?') || opt == L_('@') \ 1038 ? pattern_len : (p - startp + 1UL)); \ 1039 ptrdiff_t slen = FLEXSIZEOF (struct patternlist, str, 0); \ 1040 ptrdiff_t new_used = alloca_used + slen; \ 1041 ptrdiff_t plensize; \ 1042 if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize) \ 1043 || INT_ADD_WRAPV (new_used, plensize, &new_used)) \ 1044 { \ 1045 retval = -2; \ 1046 goto out; \ 1047 } \ 1048 slen += plensize; \ 1049 bool malloced = ! __libc_use_alloca (new_used); \ 1050 if (__glibc_unlikely (malloced)) \ 1051 { \ 1052 newp = malloc (slen); \ 1053 if (newp == NULL) \ 1054 { \ 1055 retval = -2; \ 1056 goto out; \ 1057 } \ 1058 any_malloced = true; \ 1059 } \ 1060 else \ 1061 newp = alloca_account (slen, alloca_used); \ 1062 newp->next = NULL; \ 1063 newp->malloced = malloced; \ 1064 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ 1065 *lastp = newp; \ 1066 lastp = &newp->next 1067 NEW_PATTERN; 1068 break; 1069 } 1070 } 1071 else if (*p == L_('|')) 1072 { 1073 if (level == 0) 1074 { 1075 NEW_PATTERN; 1076 startp = p + 1; 1077 } 1078 } 1079 assert (list != NULL); 1080 assert (p[-1] == L_(')')); 1081#undef NEW_PATTERN 1082 1083 switch (opt) 1084 { 1085 case L_('*'): 1086 if (FCT (p, string, string_end, no_leading_period, flags, NULL, 1087 alloca_used) == 0) 1088 goto success; 1089 FALLTHROUGH; 1090 case L_('+'): 1091 do 1092 { 1093 for (rs = string; rs <= string_end; ++rs) 1094 /* First match the prefix with the current pattern with the 1095 current pattern. */ 1096 if (FCT (list->str, string, rs, no_leading_period, 1097 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, 1098 NULL, alloca_used) == 0 1099 /* This was successful. Now match the rest with the rest 1100 of the pattern. */ 1101 && (FCT (p, rs, string_end, 1102 rs == string 1103 ? no_leading_period 1104 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1105 flags & FNM_FILE_NAME 1106 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 1107 /* This didn't work. Try the whole pattern. */ 1108 || (rs != string 1109 && FCT (pattern - 1, rs, string_end, 1110 rs == string 1111 ? no_leading_period 1112 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1113 flags & FNM_FILE_NAME 1114 ? flags : flags & ~FNM_PERIOD, NULL, 1115 alloca_used) == 0))) 1116 /* It worked. Signal success. */ 1117 goto success; 1118 } 1119 while ((list = list->next) != NULL); 1120 1121 /* None of the patterns lead to a match. */ 1122 retval = FNM_NOMATCH; 1123 break; 1124 1125 case L_('?'): 1126 if (FCT (p, string, string_end, no_leading_period, flags, NULL, 1127 alloca_used) == 0) 1128 goto success; 1129 FALLTHROUGH; 1130 case L_('@'): 1131 do 1132 /* I cannot believe it but 'strcat' is actually acceptable 1133 here. Match the entire string with the prefix from the 1134 pattern list and the rest of the pattern following the 1135 pattern list. */ 1136 if (FCT (STRCAT (list->str, p), string, string_end, 1137 no_leading_period, 1138 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, 1139 NULL, alloca_used) == 0) 1140 /* It worked. Signal success. */ 1141 goto success; 1142 while ((list = list->next) != NULL); 1143 1144 /* None of the patterns lead to a match. */ 1145 retval = FNM_NOMATCH; 1146 break; 1147 1148 case L_('!'): 1149 for (rs = string; rs <= string_end; ++rs) 1150 { 1151 struct patternlist *runp; 1152 1153 for (runp = list; runp != NULL; runp = runp->next) 1154 if (FCT (runp->str, string, rs, no_leading_period, 1155 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, 1156 NULL, alloca_used) == 0) 1157 break; 1158 1159 /* If none of the patterns matched see whether the rest does. */ 1160 if (runp == NULL 1161 && (FCT (p, rs, string_end, 1162 rs == string 1163 ? no_leading_period 1164 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1165 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, 1166 NULL, alloca_used) == 0)) 1167 /* This is successful. */ 1168 goto success; 1169 } 1170 1171 /* None of the patterns together with the rest of the pattern 1172 lead to a match. */ 1173 retval = FNM_NOMATCH; 1174 break; 1175 1176 default: 1177 assert (! "Invalid extended matching operator"); 1178 retval = -1; 1179 break; 1180 } 1181 1182 success: 1183 out: 1184 if (any_malloced) 1185 while (list != NULL) 1186 { 1187 struct patternlist *old = list; 1188 list = list->next; 1189 if (old->malloced) 1190 free (old); 1191 } 1192 1193 return retval; 1194} 1195 1196 1197#undef FOLD 1198#undef CHAR 1199#undef UCHAR 1200#undef INT 1201#undef FCT 1202#undef EXT 1203#undef END 1204#undef STRUCT 1205#undef MEMPCPY 1206#undef MEMCHR 1207#undef STRLEN 1208#undef STRCAT 1209#undef L_ 1210#undef BTOWC 1211#undef WIDE_CHAR_VERSION 1212#undef FINDIDX 1213