1/*- 2 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer. 8 * --- 89 unchanged lines hidden (view full) --- 98static void repeat __P((struct parse *p, sopno start, int from, int to)); 99static int seterr __P((struct parse *p, int e)); 100static cset *allocset __P((struct parse *p)); 101static void freeset __P((struct parse *p, cset *cs)); 102static int freezeset __P((struct parse *p, cset *cs)); 103static int firstch __P((struct parse *p, cset *cs)); 104static int nch __P((struct parse *p, cset *cs)); 105static void mcadd __P((struct parse *p, cset *cs, char *cp)); |
106#if used |
107static void mcsub __P((cset *cs, char *cp)); 108static int mcin __P((cset *cs, char *cp)); 109static char *mcfind __P((cset *cs, char *cp)); |
110#endif |
111static void mcinvert __P((struct parse *p, cset *cs)); 112static void mccase __P((struct parse *p, cset *cs)); 113static int isinsets __P((struct re_guts *g, int c)); 114static int samesets __P((struct re_guts *g, int c1, int c2)); 115static void categorize __P((struct parse *p, struct re_guts *g)); 116static sopno dupl __P((struct parse *p, sopno start, sopno finish)); 117static void doemit __P((struct parse *p, sop op, size_t opnd)); 118static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); --- 173 unchanged lines hidden (view full) --- 292 register sopno conc; 293 register int first = 1; /* is this the first alternative? */ 294 295 for (;;) { 296 /* do a bunch of concatenated expressions */ 297 conc = HERE(); 298 while (MORE() && (c = PEEK()) != '|' && c != stop) 299 p_ere_exp(p); |
300 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
301 302 if (!EAT('|')) 303 break; /* NOTE BREAK OUT */ 304 305 if (first) { 306 INSERT(OCH_, conc); /* offset is wrong */ 307 prevfwd = conc; 308 prevback = conc; --- 30 unchanged lines hidden (view full) --- 339 int wascaret = 0; 340 341 assert(MORE()); /* caller should have ensured this */ 342 c = GETNEXT(); 343 344 pos = HERE(); 345 switch (c) { 346 case '(': |
347 (void)REQUIRE(MORE(), REG_EPAREN); |
348 p->g->nsub++; 349 subno = p->g->nsub; 350 if (subno < NPAREN) 351 p->pbegin[subno] = HERE(); 352 EMIT(OLPAREN, subno); 353 if (!SEE(')')) 354 p_ere(p, ')'); 355 if (subno < NPAREN) { 356 p->pend[subno] = HERE(); 357 assert(p->pend[subno] != 0); 358 } 359 EMIT(ORPAREN, subno); |
360 (void)MUSTEAT(')', REG_EPAREN); |
361 break; 362#ifndef POSIX_MISTAKE 363 case ')': /* happens only if no current unmatched ( */ 364 /* 365 * You may ask, why the ifndef? Because I didn't notice 366 * this until slightly too late for 1003.2, and none of the 367 * other 1003.2 regular-expression reviewers noticed it at 368 * all. So an unmatched ) is legal POSIX, at least until --- 26 unchanged lines hidden (view full) --- 395 nonnewline(p); 396 else 397 EMIT(OANY, 0); 398 break; 399 case '[': 400 p_bracket(p); 401 break; 402 case '\\': |
403 (void)REQUIRE(MORE(), REG_EESCAPE); |
404 c = GETNEXT(); 405 ordinary(p, c); 406 break; 407 case '{': /* okay as ordinary except if digit follows */ |
408 (void)REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); |
409 /* FALLTHROUGH */ 410 default: 411 ordinary(p, c); 412 break; 413 } 414 415 if (!MORE()) 416 return; 417 c = PEEK(); 418 /* we call { a repetition if followed by a digit */ 419 if (!( c == '*' || c == '+' || c == '?' || 420 (c == '{' && MORE2() && isdigit(PEEK2())) )) 421 return; /* no repetition, we're done */ 422 NEXT(); 423 |
424 (void)REQUIRE(!wascaret, REG_BADRPT); |
425 switch (c) { 426 case '*': /* implemented as +? */ 427 /* this case does not require the (y|) trick, noKLUDGE */ 428 INSERT(OPLUS_, pos); 429 ASTERN(O_PLUS, pos); 430 INSERT(OQUEST_, pos); 431 ASTERN(O_QUEST, pos); 432 break; --- 10 unchanged lines hidden (view full) --- 443 AHEAD(THERE()); /* ...so fix it */ 444 ASTERN(O_CH, THERETHERE()); 445 break; 446 case '{': 447 count = p_count(p); 448 if (EAT(',')) { 449 if (isdigit(PEEK())) { 450 count2 = p_count(p); |
451 (void)REQUIRE(count <= count2, REG_BADBR); |
452 } else /* single number with comma */ 453 count2 = INFINITY; 454 } else /* just a single number */ 455 count2 = count; 456 repeat(p, pos, count, count2); 457 if (!EAT('}')) { /* error heuristics */ 458 while (MORE() && PEEK() != '}') 459 NEXT(); |
460 (void)REQUIRE(MORE(), REG_EBRACE); |
461 SETERROR(REG_BADBR); 462 } 463 break; 464 } 465 466 if (!MORE()) 467 return; 468 c = PEEK(); --- 6 unchanged lines hidden (view full) --- 475/* 476 - p_str - string (no metacharacters) "parser" 477 == static void p_str(register struct parse *p); 478 */ 479static void 480p_str(p) 481register struct parse *p; 482{ |
483 (void)REQUIRE(MORE(), REG_EMPTY); |
484 while (MORE()) 485 ordinary(p, GETNEXT()); 486} 487 488/* 489 - p_bre - BRE parser top level, anchoring and concatenation 490 == static void p_bre(register struct parse *p, register int end1, \ 491 == register int end2); --- 26 unchanged lines hidden (view full) --- 518 } 519 if (wasdollar) { /* oops, that was a trailing anchor */ 520 DROP(1); 521 EMIT(OEOL, 0); 522 p->g->iflags |= USEEOL; 523 p->g->neol++; 524 } 525 |
526 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ |
527} 528 529/* 530 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 531 == static int p_simp_re(register struct parse *p, int starordinary); 532 */ 533static int /* was the simple RE an unbackslashed $? */ 534p_simp_re(p, starordinary) --- 8 unchanged lines hidden (view full) --- 543 register sopno subno; 544# define BACKSL (1<<CHAR_BIT) 545 546 pos = HERE(); /* repetion op, if any, covers from here */ 547 548 assert(MORE()); /* caller should have ensured this */ 549 c = GETNEXT(); 550 if (c == '\\') { |
551 (void)REQUIRE(MORE(), REG_EESCAPE); |
552 c = BACKSL | (unsigned char)GETNEXT(); 553 } 554 switch (c) { 555 case '.': 556 if (p->g->cflags®_NEWLINE) 557 nonnewline(p); 558 else 559 EMIT(OANY, 0); --- 13 unchanged lines hidden (view full) --- 573 /* the MORE here is an error heuristic */ 574 if (MORE() && !SEETWO('\\', ')')) 575 p_bre(p, '\\', ')'); 576 if (subno < NPAREN) { 577 p->pend[subno] = HERE(); 578 assert(p->pend[subno] != 0); 579 } 580 EMIT(ORPAREN, subno); |
581 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); |
582 break; 583 case BACKSL|')': /* should not get here -- must be user */ 584 case BACKSL|'}': 585 SETERROR(REG_EPAREN); 586 break; 587 case BACKSL|'1': 588 case BACKSL|'2': 589 case BACKSL|'3': --- 13 unchanged lines hidden (view full) --- 603 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 604 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 605 EMIT(O_BACK, i); 606 } else 607 SETERROR(REG_ESUBREG); 608 p->g->backrefs = 1; 609 break; 610 case '*': |
611 (void)REQUIRE(starordinary, REG_BADRPT); |
612 /* FALLTHROUGH */ 613 default: 614 ordinary(p, c &~ BACKSL); 615 break; 616 } 617 618 if (EAT('*')) { /* implemented as +? */ 619 /* this case does not require the (y|) trick, noKLUDGE */ 620 INSERT(OPLUS_, pos); 621 ASTERN(O_PLUS, pos); 622 INSERT(OQUEST_, pos); 623 ASTERN(O_QUEST, pos); 624 } else if (EATTWO('\\', '{')) { 625 count = p_count(p); 626 if (EAT(',')) { 627 if (MORE() && isdigit(PEEK())) { 628 count2 = p_count(p); |
629 (void)REQUIRE(count <= count2, REG_BADBR); |
630 } else /* single number with comma */ 631 count2 = INFINITY; 632 } else /* just a single number */ 633 count2 = count; 634 repeat(p, pos, count, count2); 635 if (!EATTWO('\\', '}')) { /* error heuristics */ 636 while (MORE() && !SEETWO('\\', '}')) 637 NEXT(); |
638 (void)REQUIRE(MORE(), REG_EBRACE); |
639 SETERROR(REG_BADBR); 640 } 641 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 642 return(1); 643 644 return(0); 645} 646 --- 8 unchanged lines hidden (view full) --- 655 register int count = 0; 656 register int ndigits = 0; 657 658 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 659 count = count*10 + (GETNEXT() - '0'); 660 ndigits++; 661 } 662 |
663 (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); |
664 return(count); 665} 666 667/* 668 - p_bracket - parse a bracketed character list 669 == static void p_bracket(register struct parse *p); 670 * 671 * Note a significant property of this code: if the allocset() did SETERROR, --- 23 unchanged lines hidden (view full) --- 695 if (EAT(']')) 696 CHadd(cs, ']'); 697 else if (EAT('-')) 698 CHadd(cs, '-'); 699 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 700 p_b_term(p, cs); 701 if (EAT('-')) 702 CHadd(cs, '-'); |
703 (void)MUSTEAT(']', REG_EBRACK); |
704 705 if (p->error != 0) /* don't mess things up further */ 706 return; 707 708 if (p->g->cflags®_ICASE) { 709 register int i; 710 register int ci; 711 --- 54 unchanged lines hidden (view full) --- 766 default: 767 c = '\0'; 768 break; 769 } 770 771 switch (c) { 772 case ':': /* character class */ 773 NEXT2(); |
774 (void)REQUIRE(MORE(), REG_EBRACK); |
775 c = PEEK(); |
776 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); |
777 p_b_cclass(p, cs); |
778 (void)REQUIRE(MORE(), REG_EBRACK); 779 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); |
780 break; 781 case '=': /* equivalence class */ 782 NEXT2(); |
783 (void)REQUIRE(MORE(), REG_EBRACK); |
784 c = PEEK(); |
785 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); |
786 p_b_eclass(p, cs); |
787 (void)REQUIRE(MORE(), REG_EBRACK); 788 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); |
789 break; 790 default: /* symbol, ordinary character, or range */ 791/* xxx revision needed for multichar stuff */ 792 start = p_b_symbol(p); 793 if (SEE('-') && MORE2() && PEEK2() != ']') { 794 /* range */ 795 NEXT(); 796 if (EAT('-')) 797 finish = '-'; 798 else 799 finish = p_b_symbol(p); 800 } else 801 finish = start; 802/* xxx what about signed chars here... */ |
803 (void)REQUIRE(start <= finish, REG_ERANGE); |
804 for (i = start; i <= finish; i++) 805 CHadd(cs, i); 806 break; 807 } 808} 809 810/* 811 - p_b_cclass - parse a character-class name and deal with it --- 51 unchanged lines hidden (view full) --- 863 == static char p_b_symbol(register struct parse *p); 864 */ 865static char /* value of symbol */ 866p_b_symbol(p) 867register struct parse *p; 868{ 869 register char value; 870 |
871 (void)REQUIRE(MORE(), REG_EBRACK); |
872 if (!EATTWO('[', '.')) 873 return(GETNEXT()); 874 875 /* collating symbol */ 876 value = p_b_coll_elem(p, '.'); |
877 (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); |
878 return(value); 879} 880 881/* 882 - p_b_coll_elem - parse a collating-element name and look it up 883 == static char p_b_coll_elem(register struct parse *p, int endc); 884 */ 885static char /* value of collating element */ --- 375 unchanged lines hidden (view full) --- 1261 SETERROR(REG_ESPACE); 1262 return; 1263 } 1264 1265 (void) strcpy(cs->multis + oldend - 1, cp); 1266 cs->multis[cs->smultis - 1] = '\0'; 1267} 1268 |
1269#if used |
1270/* 1271 - mcsub - subtract a collating element from a cset 1272 == static void mcsub(register cset *cs, register char *cp); 1273 */ 1274static void 1275mcsub(cs, cp) 1276register cset *cs; 1277register char *cp; --- 41 unchanged lines hidden (view full) --- 1319 1320 if (cs->multis == NULL) 1321 return(NULL); 1322 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1323 if (strcmp(cp, p) == 0) 1324 return(p); 1325 return(NULL); 1326} |
1327#endif |
1328 1329/* 1330 - mcinvert - invert the list of collating elements in a cset 1331 == static void mcinvert(register struct parse *p, register cset *cs); 1332 * 1333 * This would have to know the set of possibilities. Implementation 1334 * is deferred. 1335 */ --- 367 unchanged lines hidden --- |