1#include <sys/types.h> 2#include <stdio.h> 3#include <string.h> 4#include <ctype.h> 5#include <limits.h> 6#include <stdlib.h> 7#include <regex.h> 8 9#include "utils.h" 10#include "regex2.h" 11 12#include "cclass.h" 13#include "cname.h" 14 15/* 16 * parse structure, passed up and down to avoid global variables and 17 * other clumsinesses 18 */ 19struct parse { 20 char *next; /* next character in RE */ 21 char *end; /* end of string (-> NUL normally) */ 22 int error; /* has an error been seen? */ 23 sop *strip; /* malloced strip */ 24 sopno ssize; /* malloced strip size (allocated) */ 25 sopno slen; /* malloced strip length (used) */ 26 int ncsalloc; /* number of csets allocated */ 27 struct re_guts *g; 28# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 30 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 31}; 32 33#include "regcomp.ih" 34 35static char nuls[10]; /* place to point scanner in event of error */ 36 37/* 38 * macros for use with parse structure 39 * BEWARE: these know that the parse structure is named `p' !!! 40 */ 41#define PEEK() (*p->next) 42#define PEEK2() (*(p->next+1)) 43#define MORE() (p->next < p->end) 44#define MORE2() (p->next+1 < p->end) 45#define SEE(c) (MORE() && PEEK() == (c)) 46#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 47#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 48#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 49#define NEXT() (p->next++) 50#define NEXT2() (p->next += 2) 51#define NEXTn(n) (p->next += (n)) 52#define GETNEXT() (*p->next++) 53#define SETERROR(e) seterr(p, (e)) 54#define REQUIRE(co, e) ((co) || SETERROR(e)) 55#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 56#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 57#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 58#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 59#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 60#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 61#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 62#define HERE() (p->slen) 63#define THERE() (p->slen - 1) 64#define THERETHERE() (p->slen - 2) 65#define DROP(n) (p->slen -= (n)) 66 67#ifndef NDEBUG 68static int never = 0; /* for use in asserts; shuts lint up */ 69#else 70#define never 0 /* some <assert.h>s have bugs too */ 71#endif 72 73/* 74 - regcomp - interface for parser and compilation 75 = extern int regcomp(regex_t *, const char *, int); 76 = #define REG_BASIC 0000 77 = #define REG_EXTENDED 0001 78 = #define REG_ICASE 0002 79 = #define REG_NOSUB 0004 80 = #define REG_NEWLINE 0010 81 = #define REG_NOSPEC 0020 82 = #define REG_PEND 0040 83 = #define REG_DUMP 0200 84 */ 85int /* 0 success, otherwise REG_something */ 86regcomp(preg, pattern, cflags) 87regex_t *preg; 88const char *pattern; 89int cflags; 90{ 91 struct parse pa; 92 register struct re_guts *g; 93 register struct parse *p = &pa; 94 register int i; 95 register size_t len; 96#ifdef REDEBUG 97# define GOODFLAGS(f) (f) 98#else 99# define GOODFLAGS(f) ((f)&~REG_DUMP) 100#endif 101 102 cflags = GOODFLAGS(cflags); 103 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 104 return(REG_INVARG); 105 106 if (cflags®_PEND) { 107 if (preg->re_endp < pattern) 108 return(REG_INVARG); 109 len = preg->re_endp - pattern; 110 } else 111 len = strlen((char *)pattern); 112 113 /* do the mallocs early so failure handling is easy */ 114 g = (struct re_guts *)malloc(sizeof(struct re_guts) + 115 (NC-1)*sizeof(cat_t)); 116 if (g == NULL) 117 return(REG_ESPACE); 118 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 119 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 120 p->slen = 0; 121 if (p->strip == NULL) { 122 free((char *)g); 123 return(REG_ESPACE); 124 } 125 126 /* set things up */ 127 p->g = g; 128 p->next = (char *)pattern; /* convenience; we do not modify it */ 129 p->end = p->next + len; 130 p->error = 0; 131 p->ncsalloc = 0; 132 for (i = 0; i < NPAREN; i++) { 133 p->pbegin[i] = 0; 134 p->pend[i] = 0; 135 } 136 g->csetsize = NC; 137 g->sets = NULL; 138 g->setbits = NULL; 139 g->ncsets = 0; 140 g->cflags = cflags; 141 g->iflags = 0; 142 g->nbol = 0; 143 g->neol = 0; 144 g->must = NULL; 145 g->mlen = 0; 146 g->nsub = 0; 147 g->ncategories = 1; /* category 0 is "everything else" */ 148 g->categories = &g->catspace[-(CHAR_MIN)]; 149 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 150 g->backrefs = 0; 151 152 /* do it */ 153 EMIT(OEND, 0); 154 g->firststate = THERE(); 155 if (cflags®_EXTENDED) 156 p_ere(p, OUT); 157 else if (cflags®_NOSPEC) 158 p_str(p); 159 else 160 p_bre(p, OUT, OUT); 161 EMIT(OEND, 0); 162 g->laststate = THERE(); 163 164 /* tidy up loose ends and fill things in */ 165 categorize(p, g); 166 stripsnug(p, g); 167 findmust(p, g); 168 g->nplus = pluscount(p, g); 169 g->magic = MAGIC2; 170 preg->re_nsub = g->nsub; 171 preg->re_g = g; 172 preg->re_magic = MAGIC1; 173#ifndef REDEBUG 174 /* not debugging, so can't rely on the assert() in regexec() */ 175 if (g->iflags&BAD) 176 SETERROR(REG_ASSERT); 177#endif 178 179 /* win or lose, we're done */ 180 if (p->error != 0) /* lose */ 181 regfree(preg); 182 return(p->error); 183} 184 185/* 186 - p_ere - ERE parser top level, concatenation and alternation 187 == static void p_ere(register struct parse *p, int stop); 188 */ 189static void 190p_ere(p, stop) 191register struct parse *p; 192int stop; /* character this ERE should end at */ 193{ 194 register char c; 195 register sopno prevback; 196 register sopno prevfwd; 197 register sopno conc; 198 register int first = 1; /* is this the first alternative? */ 199 200 for (;;) { 201 /* do a bunch of concatenated expressions */ 202 conc = HERE(); 203 while (MORE() && (c = PEEK()) != '|' && c != stop) 204 p_ere_exp(p); 205 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 206 207 if (!EAT('|')) 208 break; /* NOTE BREAK OUT */ 209 210 if (first) { 211 INSERT(OCH_, conc); /* offset is wrong */ 212 prevfwd = conc; 213 prevback = conc; 214 first = 0; 215 } 216 ASTERN(OOR1, prevback); 217 prevback = THERE(); 218 AHEAD(prevfwd); /* fix previous offset */ 219 prevfwd = HERE(); 220 EMIT(OOR2, 0); /* offset is very wrong */ 221 } 222 223 if (!first) { /* tail-end fixups */ 224 AHEAD(prevfwd); 225 ASTERN(O_CH, prevback); 226 } 227 228 assert(!MORE() || SEE(stop)); 229} 230 231/* 232 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 233 == static void p_ere_exp(register struct parse *p); 234 */ 235static void 236p_ere_exp(p) 237register struct parse *p; 238{ 239 register char c; 240 register sopno pos; 241 register int count; 242 register int count2; 243 register sopno subno; 244 int wascaret = 0; 245 246 assert(MORE()); /* caller should have ensured this */ 247 c = GETNEXT(); 248 249 pos = HERE(); 250 switch (c) { 251 case '(': 252 REQUIRE(MORE(), REG_EPAREN); 253 p->g->nsub++; 254 subno = p->g->nsub; 255 if (subno < NPAREN) 256 p->pbegin[subno] = HERE(); 257 EMIT(OLPAREN, subno); 258 if (!SEE(')')) 259 p_ere(p, ')'); 260 if (subno < NPAREN) { 261 p->pend[subno] = HERE(); 262 assert(p->pend[subno] != 0); 263 } 264 EMIT(ORPAREN, subno); 265 MUSTEAT(')', REG_EPAREN); 266 break; 267#ifndef POSIX_MISTAKE 268 case ')': /* happens only if no current unmatched ( */ 269 /* 270 * You may ask, why the ifndef? Because I didn't notice 271 * this until slightly too late for 1003.2, and none of the 272 * other 1003.2 regular-expression reviewers noticed it at 273 * all. So an unmatched ) is legal POSIX, at least until 274 * we can get it fixed. 275 */ 276 SETERROR(REG_EPAREN); 277 break; 278#endif 279 case '^': 280 EMIT(OBOL, 0); 281 p->g->iflags |= USEBOL; 282 p->g->nbol++; 283 wascaret = 1; 284 break; 285 case '$': 286 EMIT(OEOL, 0); 287 p->g->iflags |= USEEOL; 288 p->g->neol++; 289 break; 290 case '|': 291 SETERROR(REG_EMPTY); 292 break; 293 case '*': 294 case '+': 295 case '?': 296 SETERROR(REG_BADRPT); 297 break; 298 case '.': 299 if (p->g->cflags®_NEWLINE) 300 nonnewline(p); 301 else 302 EMIT(OANY, 0); 303 break; 304 case '[': 305 p_bracket(p); 306 break; 307 case '\\': 308 REQUIRE(MORE(), REG_EESCAPE); 309 c = GETNEXT(); 310 ordinary(p, c); 311 break; 312 case '{': /* okay as ordinary except if digit follows */ 313 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 314 /* FALLTHROUGH */ 315 default: 316 ordinary(p, c); 317 break; 318 } 319 320 if (!MORE()) 321 return; 322 c = PEEK(); 323 /* we call { a repetition if followed by a digit */ 324 if (!( c == '*' || c == '+' || c == '?' || 325 (c == '{' && MORE2() && isdigit(PEEK2())) )) 326 return; /* no repetition, we're done */ 327 NEXT(); 328 329 REQUIRE(!wascaret, REG_BADRPT); 330 switch (c) { 331 case '*': /* implemented as +? */ 332 /* this case does not require the (y|) trick, noKLUDGE */ 333 INSERT(OPLUS_, pos); 334 ASTERN(O_PLUS, pos); 335 INSERT(OQUEST_, pos); 336 ASTERN(O_QUEST, pos); 337 break; 338 case '+': 339 INSERT(OPLUS_, pos); 340 ASTERN(O_PLUS, pos); 341 break; 342 case '?': 343 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 344 INSERT(OCH_, pos); /* offset slightly wrong */ 345 ASTERN(OOR1, pos); /* this one's right */ 346 AHEAD(pos); /* fix the OCH_ */ 347 EMIT(OOR2, 0); /* offset very wrong... */ 348 AHEAD(THERE()); /* ...so fix it */ 349 ASTERN(O_CH, THERETHERE()); 350 break; 351 case '{': 352 count = p_count(p); 353 if (EAT(',')) { 354 if (isdigit(PEEK())) { 355 count2 = p_count(p); 356 REQUIRE(count <= count2, REG_BADBR); 357 } else /* single number with comma */ 358 count2 = INFINITY; 359 } else /* just a single number */ 360 count2 = count; 361 repeat(p, pos, count, count2); 362 if (!EAT('}')) { /* error heuristics */ 363 while (MORE() && PEEK() != '}') 364 NEXT(); 365 REQUIRE(MORE(), REG_EBRACE); 366 SETERROR(REG_BADBR); 367 } 368 break; 369 } 370 371 if (!MORE()) 372 return; 373 c = PEEK(); 374 if (!( c == '*' || c == '+' || c == '?' || 375 (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 376 return; 377 SETERROR(REG_BADRPT); 378} 379 380/* 381 - p_str - string (no metacharacters) "parser" 382 == static void p_str(register struct parse *p); 383 */ 384static void 385p_str(p) 386register struct parse *p; 387{ 388 REQUIRE(MORE(), REG_EMPTY); 389 while (MORE()) 390 ordinary(p, GETNEXT()); 391} 392 393/* 394 - p_bre - BRE parser top level, anchoring and concatenation 395 == static void p_bre(register struct parse *p, register int end1, \ 396 == register int end2); 397 * Giving end1 as OUT essentially eliminates the end1/end2 check. 398 * 399 * This implementation is a bit of a kludge, in that a trailing $ is first 400 * taken as an ordinary character and then revised to be an anchor. The 401 * only undesirable side effect is that '$' gets included as a character 402 * category in such cases. This is fairly harmless; not worth fixing. 403 * The amount of lookahead needed to avoid this kludge is excessive. 404 */ 405static void 406p_bre(p, end1, end2) 407register struct parse *p; 408register int end1; /* first terminating character */ 409register int end2; /* second terminating character */ 410{ 411 register sopno start = HERE(); 412 register int first = 1; /* first subexpression? */ 413 register int wasdollar = 0; 414 415 if (EAT('^')) { 416 EMIT(OBOL, 0); 417 p->g->iflags |= USEBOL; 418 p->g->nbol++; 419 } 420 while (MORE() && !SEETWO(end1, end2)) { 421 wasdollar = p_simp_re(p, first); 422 first = 0; 423 } 424 if (wasdollar) { /* oops, that was a trailing anchor */ 425 DROP(1); 426 EMIT(OEOL, 0); 427 p->g->iflags |= USEEOL; 428 p->g->neol++; 429 } 430 431 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 432} 433 434/* 435 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 436 == static int p_simp_re(register struct parse *p, int starordinary); 437 */ 438static int /* was the simple RE an unbackslashed $? */ 439p_simp_re(p, starordinary) 440register struct parse *p; 441int starordinary; /* is a leading * an ordinary character? */ 442{ 443 register int c; 444 register int count; 445 register int count2; 446 register sopno pos; 447 register int i; 448 register sopno subno; 449# define BACKSL (1<<CHAR_BIT) 450 451 pos = HERE(); /* repetion op, if any, covers from here */ 452 453 assert(MORE()); /* caller should have ensured this */ 454 c = GETNEXT(); 455 if (c == '\\') { 456 REQUIRE(MORE(), REG_EESCAPE); 457 c = BACKSL | (unsigned char)GETNEXT(); 458 } 459 switch (c) { 460 case '.': 461 if (p->g->cflags®_NEWLINE) 462 nonnewline(p); 463 else 464 EMIT(OANY, 0); 465 break; 466 case '[': 467 p_bracket(p); 468 break; 469 case BACKSL|'{': 470 SETERROR(REG_BADRPT); 471 break; 472 case BACKSL|'(': 473 p->g->nsub++; 474 subno = p->g->nsub; 475 if (subno < NPAREN) 476 p->pbegin[subno] = HERE(); 477 EMIT(OLPAREN, subno); 478 /* the MORE here is an error heuristic */ 479 if (MORE() && !SEETWO('\\', ')')) 480 p_bre(p, '\\', ')'); 481 if (subno < NPAREN) { 482 p->pend[subno] = HERE(); 483 assert(p->pend[subno] != 0); 484 } 485 EMIT(ORPAREN, subno); 486 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 487 break; 488 case BACKSL|')': /* should not get here -- must be user */ 489 case BACKSL|'}': 490 SETERROR(REG_EPAREN); 491 break; 492 case BACKSL|'1': 493 case BACKSL|'2': 494 case BACKSL|'3': 495 case BACKSL|'4': 496 case BACKSL|'5': 497 case BACKSL|'6': 498 case BACKSL|'7': 499 case BACKSL|'8': 500 case BACKSL|'9': 501 i = (c&~BACKSL) - '0'; 502 assert(i < NPAREN); 503 if (p->pend[i] != 0) { 504 assert(i <= p->g->nsub); 505 EMIT(OBACK_, i); 506 assert(p->pbegin[i] != 0); 507 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 508 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 509 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 510 EMIT(O_BACK, i); 511 } else 512 SETERROR(REG_ESUBREG); 513 p->g->backrefs = 1; 514 break; 515 case '*': 516 REQUIRE(starordinary, REG_BADRPT); 517 /* FALLTHROUGH */ 518 default: 519 ordinary(p, (char)c); /* takes off BACKSL, if any */ 520 break; 521 } 522 523 if (EAT('*')) { /* implemented as +? */ 524 /* this case does not require the (y|) trick, noKLUDGE */ 525 INSERT(OPLUS_, pos); 526 ASTERN(O_PLUS, pos); 527 INSERT(OQUEST_, pos); 528 ASTERN(O_QUEST, pos); 529 } else if (EATTWO('\\', '{')) { 530 count = p_count(p); 531 if (EAT(',')) { 532 if (MORE() && isdigit(PEEK())) { 533 count2 = p_count(p); 534 REQUIRE(count <= count2, REG_BADBR); 535 } else /* single number with comma */ 536 count2 = INFINITY; 537 } else /* just a single number */ 538 count2 = count; 539 repeat(p, pos, count, count2); 540 if (!EATTWO('\\', '}')) { /* error heuristics */ 541 while (MORE() && !SEETWO('\\', '}')) 542 NEXT(); 543 REQUIRE(MORE(), REG_EBRACE); 544 SETERROR(REG_BADBR); 545 } 546 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 547 return(1); 548 549 return(0); 550} 551 552/* 553 - p_count - parse a repetition count 554 == static int p_count(register struct parse *p); 555 */ 556static int /* the value */ 557p_count(p) 558register struct parse *p; 559{ 560 register int count = 0; 561 register int ndigits = 0; 562 563 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 564 count = count*10 + (GETNEXT() - '0'); 565 ndigits++; 566 } 567 568 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 569 return(count); 570} 571 572/* 573 - p_bracket - parse a bracketed character list 574 == static void p_bracket(register struct parse *p); 575 * 576 * Note a significant property of this code: if the allocset() did SETERROR, 577 * no set operations are done. 578 */ 579static void 580p_bracket(p) 581register struct parse *p; 582{ 583 register cset *cs = allocset(p); 584 register int invert = 0; 585 586 /* Dept of Truly Sickening Special-Case Kludges */ 587 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 588 EMIT(OBOW, 0); 589 NEXTn(6); 590 return; 591 } 592 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 593 EMIT(OEOW, 0); 594 NEXTn(6); 595 return; 596 } 597 598 if (EAT('^')) 599 invert++; /* make note to invert set at end */ 600 if (EAT(']')) 601 CHadd(cs, ']'); 602 else if (EAT('-')) 603 CHadd(cs, '-'); 604 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 605 p_b_term(p, cs); 606 if (EAT('-')) 607 CHadd(cs, '-'); 608 MUSTEAT(']', REG_EBRACK); 609 610 if (p->error != 0) /* don't mess things up further */ 611 return; 612 613 if (p->g->cflags®_ICASE) { 614 register int i; 615 register int ci; 616 617 for (i = p->g->csetsize - 1; i >= 0; i--) 618 if (CHIN(cs, i) && isalpha(i)) { 619 ci = othercase(i); 620 if (ci != i) 621 CHadd(cs, ci); 622 } 623 if (cs->multis != NULL) 624 mccase(p, cs); 625 } 626 if (invert) { 627 register int i; 628 629 for (i = p->g->csetsize - 1; i >= 0; i--) 630 if (CHIN(cs, i)) 631 CHsub(cs, i); 632 else 633 CHadd(cs, i); 634 if (p->g->cflags®_NEWLINE) 635 CHsub(cs, '\n'); 636 if (cs->multis != NULL) 637 mcinvert(p, cs); 638 } 639 640 assert(cs->multis == NULL); /* xxx */ 641 642 if (nch(p, cs) == 1) { /* optimize singleton sets */ 643 ordinary(p, firstch(p, cs)); 644 freeset(p, cs); 645 } else 646 EMIT(OANYOF, freezeset(p, cs)); 647} 648 649/* 650 - p_b_term - parse one term of a bracketed character list 651 == static void p_b_term(register struct parse *p, register cset *cs); 652 */ 653static void 654p_b_term(p, cs) 655register struct parse *p; 656register cset *cs; 657{ 658 register char c; 659 register char start, finish; 660 register int i; 661 662 /* classify what we've got */ 663 switch ((MORE()) ? PEEK() : '\0') { 664 case '[': 665 c = (MORE2()) ? PEEK2() : '\0'; 666 break; 667 case '-': 668 SETERROR(REG_ERANGE); 669 return; /* NOTE RETURN */ 670 break; 671 default: 672 c = '\0'; 673 break; 674 } 675 676 switch (c) { 677 case ':': /* character class */ 678 NEXT2(); 679 REQUIRE(MORE(), REG_EBRACK); 680 c = PEEK(); 681 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 682 p_b_cclass(p, cs); 683 REQUIRE(MORE(), REG_EBRACK); 684 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 685 break; 686 case '=': /* equivalence class */ 687 NEXT2(); 688 REQUIRE(MORE(), REG_EBRACK); 689 c = PEEK(); 690 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 691 p_b_eclass(p, cs); 692 REQUIRE(MORE(), REG_EBRACK); 693 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 694 break; 695 default: /* symbol, ordinary character, or range */ 696/* xxx revision needed for multichar stuff */ 697 start = p_b_symbol(p); 698 if (SEE('-') && MORE2() && PEEK2() != ']') { 699 /* range */ 700 NEXT(); 701 if (EAT('-')) 702 finish = '-'; 703 else 704 finish = p_b_symbol(p); 705 } else 706 finish = start; 707/* xxx what about signed chars here... */ 708 REQUIRE(start <= finish, REG_ERANGE); 709 for (i = start; i <= finish; i++) 710 CHadd(cs, i); 711 break; 712 } 713} 714 715/* 716 - p_b_cclass - parse a character-class name and deal with it 717 == static void p_b_cclass(register struct parse *p, register cset *cs); 718 */ 719static void 720p_b_cclass(p, cs) 721register struct parse *p; 722register cset *cs; 723{ 724 register char *sp = p->next; 725 register struct cclass *cp; 726 register size_t len; 727 register char *u; 728 register char c; 729 730 while (MORE() && isalpha(PEEK())) 731 NEXT(); 732 len = p->next - sp; 733 for (cp = cclasses; cp->name != NULL; cp++) 734 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 735 break; 736 if (cp->name == NULL) { 737 /* oops, didn't find it */ 738 SETERROR(REG_ECTYPE); 739 return; 740 } 741 742 u = cp->chars; 743 while ((c = *u++) != '\0') 744 CHadd(cs, c); 745 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 746 MCadd(p, cs, u); 747} 748 749/* 750 - p_b_eclass - parse an equivalence-class name and deal with it 751 == static void p_b_eclass(register struct parse *p, register cset *cs); 752 * 753 * This implementation is incomplete. xxx 754 */ 755static void 756p_b_eclass(p, cs) 757register struct parse *p; 758register cset *cs; 759{ 760 register char c; 761 762 c = p_b_coll_elem(p, '='); 763 CHadd(cs, c); 764} 765 766/* 767 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 768 == static char p_b_symbol(register struct parse *p); 769 */ 770static char /* value of symbol */ 771p_b_symbol(p) 772register struct parse *p; 773{ 774 register char value; 775 776 REQUIRE(MORE(), REG_EBRACK); 777 if (!EATTWO('[', '.')) 778 return(GETNEXT()); 779 780 /* collating symbol */ 781 value = p_b_coll_elem(p, '.'); 782 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 783 return(value); 784} 785 786/* 787 - p_b_coll_elem - parse a collating-element name and look it up 788 == static char p_b_coll_elem(register struct parse *p, int endc); 789 */ 790static char /* value of collating element */ 791p_b_coll_elem(p, endc) 792register struct parse *p; 793int endc; /* name ended by endc,']' */ 794{ 795 register char *sp = p->next; 796 register struct cname *cp; 797 register int len; 798 799 while (MORE() && !SEETWO(endc, ']')) 800 NEXT(); 801 if (!MORE()) { 802 SETERROR(REG_EBRACK); 803 return(0); 804 } 805 len = p->next - sp; 806 for (cp = cnames; cp->name != NULL; cp++) 807 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 808 return(cp->code); /* known name */ 809 if (len == 1) 810 return(*sp); /* single character */ 811 SETERROR(REG_ECOLLATE); /* neither */ 812 return(0); 813} 814 815/* 816 - othercase - return the case counterpart of an alphabetic 817 == static char othercase(int ch); 818 */ 819static char /* if no counterpart, return ch */ 820othercase(ch) 821int ch; 822{ 823 assert(isalpha(ch)); 824 if (isupper(ch)) 825 return(tolower(ch)); 826 else if (islower(ch)) 827 return(toupper(ch)); 828 else /* peculiar, but could happen */ 829 return(ch); 830} 831 832/* 833 - bothcases - emit a dualcase version of a two-case character 834 == static void bothcases(register struct parse *p, int ch); 835 * 836 * Boy, is this implementation ever a kludge... 837 */ 838static void 839bothcases(p, ch) 840register struct parse *p; 841int ch; 842{ 843 register char *oldnext = p->next; 844 register char *oldend = p->end; 845 char bracket[3]; 846 847 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 848 p->next = bracket; 849 p->end = bracket+2; 850 bracket[0] = ch; 851 bracket[1] = ']'; 852 bracket[2] = '\0'; 853 p_bracket(p); 854 assert(p->next == bracket+2); 855 p->next = oldnext; 856 p->end = oldend; 857} 858 859/* 860 - ordinary - emit an ordinary character 861 == static void ordinary(register struct parse *p, register int ch); 862 */ 863static void 864ordinary(p, ch) 865register struct parse *p; 866register int ch; 867{ 868 register cat_t *cap = p->g->categories; 869 870 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 871 bothcases(p, ch); 872 else { 873 EMIT(OCHAR, (unsigned char)ch); 874 if (cap[ch] == 0) 875 cap[ch] = p->g->ncategories++; 876 } 877} 878 879/* 880 - nonnewline - emit REG_NEWLINE version of OANY 881 == static void nonnewline(register struct parse *p); 882 * 883 * Boy, is this implementation ever a kludge... 884 */ 885static void 886nonnewline(p) 887register struct parse *p; 888{ 889 register char *oldnext = p->next; 890 register char *oldend = p->end; 891 char bracket[4]; 892 893 p->next = bracket; 894 p->end = bracket+3; 895 bracket[0] = '^'; 896 bracket[1] = '\n'; 897 bracket[2] = ']'; 898 bracket[3] = '\0'; 899 p_bracket(p); 900 assert(p->next == bracket+3); 901 p->next = oldnext; 902 p->end = oldend; 903} 904 905/* 906 - repeat - generate code for a bounded repetition, recursively if needed 907 == static void repeat(register struct parse *p, sopno start, int from, int to); 908 */ 909static void 910repeat(p, start, from, to) 911register struct parse *p; 912sopno start; /* operand from here to end of strip */ 913int from; /* repeated from this number */ 914int to; /* to this number of times (maybe INFINITY) */ 915{ 916 register sopno finish = HERE(); 917# define N 2 918# define INF 3 919# define REP(f, t) ((f)*8 + (t)) 920# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 921 register sopno copy; 922 923 if (p->error != 0) /* head off possible runaway recursion */ 924 return; 925 926 assert(from <= to); 927 928 switch (REP(MAP(from), MAP(to))) { 929 case REP(0, 0): /* must be user doing this */ 930 DROP(finish-start); /* drop the operand */ 931 break; 932 case REP(0, 1): /* as x{1,1}? */ 933 case REP(0, N): /* as x{1,n}? */ 934 case REP(0, INF): /* as x{1,}? */ 935 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 936 INSERT(OCH_, start); /* offset is wrong... */ 937 repeat(p, start+1, 1, to); 938 ASTERN(OOR1, start); 939 AHEAD(start); /* ... fix it */ 940 EMIT(OOR2, 0); 941 AHEAD(THERE()); 942 ASTERN(O_CH, THERETHERE()); 943 break; 944 case REP(1, 1): /* trivial case */ 945 /* done */ 946 break; 947 case REP(1, N): /* as x?x{1,n-1} */ 948 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 949 INSERT(OCH_, start); 950 ASTERN(OOR1, start); 951 AHEAD(start); 952 EMIT(OOR2, 0); /* offset very wrong... */ 953 AHEAD(THERE()); /* ...so fix it */ 954 ASTERN(O_CH, THERETHERE()); 955 copy = dupl(p, start+1, finish+1); 956 assert(copy == finish+4); 957 repeat(p, copy, 1, to-1); 958 break; 959 case REP(1, INF): /* as x+ */ 960 INSERT(OPLUS_, start); 961 ASTERN(O_PLUS, start); 962 break; 963 case REP(N, N): /* as xx{m-1,n-1} */ 964 copy = dupl(p, start, finish); 965 repeat(p, copy, from-1, to-1); 966 break; 967 case REP(N, INF): /* as xx{n-1,INF} */ 968 copy = dupl(p, start, finish); 969 repeat(p, copy, from-1, to); 970 break; 971 default: /* "can't happen" */ 972 SETERROR(REG_ASSERT); /* just in case */ 973 break; 974 } 975} 976 977/* 978 - seterr - set an error condition 979 == static int seterr(register struct parse *p, int e); 980 */ 981static int /* useless but makes type checking happy */ 982seterr(p, e) 983register struct parse *p; 984int e; 985{ 986 if (p->error == 0) /* keep earliest error condition */ 987 p->error = e; 988 p->next = nuls; /* try to bring things to a halt */ 989 p->end = nuls; 990 return(0); /* make the return value well-defined */ 991} 992 993/* 994 - allocset - allocate a set of characters for [] 995 == static cset *allocset(register struct parse *p); 996 */ 997static cset * 998allocset(p) 999register struct parse *p; 1000{ 1001 register int no = p->g->ncsets++; 1002 register size_t nc; 1003 register size_t nbytes; 1004 register cset *cs; 1005 register size_t css = (size_t)p->g->csetsize; 1006 register int i; 1007 1008 if (no >= p->ncsalloc) { /* need another column of space */ 1009 p->ncsalloc += CHAR_BIT; 1010 nc = p->ncsalloc; 1011 assert(nc % CHAR_BIT == 0); 1012 nbytes = nc / CHAR_BIT * css; 1013 if (p->g->sets == NULL) 1014 p->g->sets = (cset *)malloc(nc * sizeof(cset)); 1015 else 1016 p->g->sets = (cset *)realloc((char *)p->g->sets, 1017 nc * sizeof(cset)); 1018 if (p->g->setbits == NULL) 1019 p->g->setbits = (uch *)malloc(nbytes); 1020 else { 1021 p->g->setbits = (uch *)realloc((char *)p->g->setbits, 1022 nbytes); 1023 /* xxx this isn't right if setbits is now NULL */ 1024 for (i = 0; i < no; i++) 1025 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1026 } 1027 if (p->g->sets != NULL && p->g->setbits != NULL) 1028 (void) memset((char *)p->g->setbits + (nbytes - css), 1029 0, css); 1030 else { 1031 no = 0; 1032 SETERROR(REG_ESPACE); 1033 /* caller's responsibility not to do set ops */ 1034 } 1035 } 1036 1037 assert(p->g->sets != NULL); /* xxx */ 1038 cs = &p->g->sets[no]; 1039 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1040 cs->mask = 1 << ((no) % CHAR_BIT); 1041 cs->hash = 0; 1042 cs->smultis = 0; 1043 cs->multis = NULL; 1044 1045 return(cs); 1046} 1047 1048/* 1049 - freeset - free a now-unused set 1050 == static void freeset(register struct parse *p, register cset *cs); 1051 */ 1052static void 1053freeset(p, cs) 1054register struct parse *p; 1055register cset *cs; 1056{ 1057 register int i; 1058 register cset *top = &p->g->sets[p->g->ncsets]; 1059 register size_t css = (size_t)p->g->csetsize; 1060 1061 for (i = 0; i < css; i++) 1062 CHsub(cs, i); 1063 if (cs == top-1) /* recover only the easy case */ 1064 p->g->ncsets--; 1065} 1066 1067/* 1068 - freezeset - final processing on a set of characters 1069 == static int freezeset(register struct parse *p, register cset *cs); 1070 * 1071 * The main task here is merging identical sets. This is usually a waste 1072 * of time (although the hash code minimizes the overhead), but can win 1073 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1074 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1075 * the same value! 1076 */ 1077static int /* set number */ 1078freezeset(p, cs) 1079register struct parse *p; 1080register cset *cs; 1081{ 1082 register uch h = cs->hash; 1083 register int i; 1084 register cset *top = &p->g->sets[p->g->ncsets]; 1085 register cset *cs2; 1086 register size_t css = (size_t)p->g->csetsize; 1087 1088 /* look for an earlier one which is the same */ 1089 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1090 if (cs2->hash == h && cs2 != cs) { 1091 /* maybe */ 1092 for (i = 0; i < css; i++) 1093 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1094 break; /* no */ 1095 if (i == css) 1096 break; /* yes */ 1097 } 1098 1099 if (cs2 < top) { /* found one */ 1100 freeset(p, cs); 1101 cs = cs2; 1102 } 1103 1104 return((int)(cs - p->g->sets)); 1105} 1106 1107/* 1108 - firstch - return first character in a set (which must have at least one) 1109 == static int firstch(register struct parse *p, register cset *cs); 1110 */ 1111static int /* character; there is no "none" value */ 1112firstch(p, cs) 1113register struct parse *p; 1114register cset *cs; 1115{ 1116 register int i; 1117 register size_t css = (size_t)p->g->csetsize; 1118 1119 for (i = 0; i < css; i++) 1120 if (CHIN(cs, i)) 1121 return((char)i); 1122 assert(never); 1123 return(0); /* arbitrary */ 1124} 1125 1126/* 1127 - nch - number of characters in a set 1128 == static int nch(register struct parse *p, register cset *cs); 1129 */ 1130static int 1131nch(p, cs) 1132register struct parse *p; 1133register cset *cs; 1134{ 1135 register int i; 1136 register size_t css = (size_t)p->g->csetsize; 1137 register int n = 0; 1138 1139 for (i = 0; i < css; i++) 1140 if (CHIN(cs, i)) 1141 n++; 1142 return(n); 1143} 1144 1145/* 1146 - mcadd - add a collating element to a cset 1147 == static void mcadd(register struct parse *p, register cset *cs, \ 1148 == register char *cp); 1149 */ 1150static void 1151mcadd(p, cs, cp) 1152register struct parse *p; 1153register cset *cs; 1154register char *cp; 1155{ 1156 register size_t oldend = cs->smultis; 1157 1158 cs->smultis += strlen(cp) + 1; 1159 if (cs->multis == NULL) 1160 cs->multis = malloc(cs->smultis); 1161 else 1162 cs->multis = realloc(cs->multis, cs->smultis); 1163 if (cs->multis == NULL) { 1164 SETERROR(REG_ESPACE); 1165 return; 1166 } 1167 1168 (void) strcpy(cs->multis + oldend - 1, cp); 1169 cs->multis[cs->smultis - 1] = '\0'; 1170} 1171 1172/* 1173 - mcsub - subtract a collating element from a cset 1174 == static void mcsub(register cset *cs, register char *cp); 1175 */ 1176static void 1177mcsub(cs, cp) 1178register cset *cs; 1179register char *cp; 1180{ 1181 register char *fp = mcfind(cs, cp); 1182 register size_t len = strlen(fp); 1183 1184 assert(fp != NULL); 1185 (void) memmove(fp, fp + len + 1, 1186 cs->smultis - (fp + len + 1 - cs->multis)); 1187 cs->smultis -= len; 1188 1189 if (cs->smultis == 0) { 1190 free(cs->multis); 1191 cs->multis = NULL; 1192 return; 1193 } 1194 1195 cs->multis = realloc(cs->multis, cs->smultis); 1196 assert(cs->multis != NULL); 1197} 1198 1199/* 1200 - mcin - is a collating element in a cset? 1201 == static int mcin(register cset *cs, register char *cp); 1202 */ 1203static int 1204mcin(cs, cp) 1205register cset *cs; 1206register char *cp; 1207{ 1208 return(mcfind(cs, cp) != NULL); 1209} 1210 1211/* 1212 - mcfind - find a collating element in a cset 1213 == static char *mcfind(register cset *cs, register char *cp); 1214 */ 1215static char * 1216mcfind(cs, cp) 1217register cset *cs; 1218register char *cp; 1219{ 1220 register char *p; 1221 1222 if (cs->multis == NULL) 1223 return(NULL); 1224 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1225 if (strcmp(cp, p) == 0) 1226 return(p); 1227 return(NULL); 1228} 1229 1230/* 1231 - mcinvert - invert the list of collating elements in a cset 1232 == static void mcinvert(register struct parse *p, register cset *cs); 1233 * 1234 * This would have to know the set of possibilities. Implementation 1235 * is deferred. 1236 */ 1237static void 1238mcinvert(p, cs) 1239register struct parse *p; 1240register cset *cs; 1241{ 1242 assert(cs->multis == NULL); /* xxx */ 1243} 1244 1245/* 1246 - mccase - add case counterparts of the list of collating elements in a cset 1247 == static void mccase(register struct parse *p, register cset *cs); 1248 * 1249 * This would have to know the set of possibilities. Implementation 1250 * is deferred. 1251 */ 1252static void 1253mccase(p, cs) 1254register struct parse *p; 1255register cset *cs; 1256{ 1257 assert(cs->multis == NULL); /* xxx */ 1258} 1259 1260/* 1261 - isinsets - is this character in any sets? 1262 == static int isinsets(register struct re_guts *g, int c); 1263 */ 1264static int /* predicate */ 1265isinsets(g, c) 1266register struct re_guts *g; 1267int c; 1268{ 1269 register uch *col; 1270 register int i; 1271 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1272 register unsigned uc = (unsigned char)c; 1273 1274 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1275 if (col[uc] != 0) 1276 return(1); 1277 return(0); 1278} 1279 1280/* 1281 - samesets - are these two characters in exactly the same sets? 1282 == static int samesets(register struct re_guts *g, int c1, int c2); 1283 */ 1284static int /* predicate */ 1285samesets(g, c1, c2) 1286register struct re_guts *g; 1287int c1; 1288int c2; 1289{ 1290 register uch *col; 1291 register int i; 1292 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1293 register unsigned uc1 = (unsigned char)c1; 1294 register unsigned uc2 = (unsigned char)c2; 1295 1296 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1297 if (col[uc1] != col[uc2]) 1298 return(0); 1299 return(1); 1300} 1301 1302/* 1303 - categorize - sort out character categories 1304 == static void categorize(struct parse *p, register struct re_guts *g); 1305 */ 1306static void 1307categorize(p, g) 1308struct parse *p; 1309register struct re_guts *g; 1310{ 1311 register cat_t *cats = g->categories; 1312 register int c; 1313 register int c2; 1314 register cat_t cat; 1315 1316 /* avoid making error situations worse */ 1317 if (p->error != 0) 1318 return; 1319 1320 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1321 if (cats[c] == 0 && isinsets(g, c)) { 1322 cat = g->ncategories++; 1323 cats[c] = cat; 1324 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1325 if (cats[c2] == 0 && samesets(g, c, c2)) 1326 cats[c2] = cat; 1327 } 1328} 1329 1330/* 1331 - dupl - emit a duplicate of a bunch of sops 1332 == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1333 */ 1334static sopno /* start of duplicate */ 1335dupl(p, start, finish) 1336register struct parse *p; 1337sopno start; /* from here */ 1338sopno finish; /* to this less one */ 1339{ 1340 register sopno ret = HERE(); 1341 register sopno len = finish - start; 1342 1343 assert(finish >= start); 1344 if (len == 0) 1345 return(ret); 1346 enlarge(p, p->ssize + len); /* this many unexpected additions */ 1347 assert(p->ssize >= p->slen + len); 1348 (void) memcpy((char *)(p->strip + p->slen), 1349 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1350 p->slen += len; 1351 return(ret); 1352} 1353 1354/* 1355 - doemit - emit a strip operator 1356 == static void doemit(register struct parse *p, sop op, size_t opnd); 1357 * 1358 * It might seem better to implement this as a macro with a function as 1359 * hard-case backup, but it's just too big and messy unless there are 1360 * some changes to the data structures. Maybe later. 1361 */ 1362static void 1363doemit(p, op, opnd) 1364register struct parse *p; 1365sop op; 1366size_t opnd; 1367{ 1368 /* avoid making error situations worse */ 1369 if (p->error != 0) 1370 return; 1371 1372 /* deal with oversize operands ("can't happen", more or less) */ 1373 assert(opnd < 1<<OPSHIFT); 1374 1375 /* deal with undersized strip */ 1376 if (p->slen >= p->ssize) 1377 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1378 assert(p->slen < p->ssize); 1379 1380 /* finally, it's all reduced to the easy case */ 1381 p->strip[p->slen++] = SOP(op, opnd); 1382} 1383 1384/* 1385 - doinsert - insert a sop into the strip 1386 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1387 */ 1388static void 1389doinsert(p, op, opnd, pos) 1390register struct parse *p; 1391sop op; 1392size_t opnd; 1393sopno pos; 1394{ 1395 register sopno sn; 1396 register sop s; 1397 register int i; 1398 1399 /* avoid making error situations worse */ 1400 if (p->error != 0) 1401 return; 1402 1403 sn = HERE(); 1404 EMIT(op, opnd); /* do checks, ensure space */ 1405 assert(HERE() == sn+1); 1406 s = p->strip[sn]; 1407 1408 /* adjust paren pointers */ 1409 assert(pos > 0); 1410 for (i = 1; i < NPAREN; i++) { 1411 if (p->pbegin[i] >= pos) { 1412 p->pbegin[i]++; 1413 } 1414 if (p->pend[i] >= pos) { 1415 p->pend[i]++; 1416 } 1417 } 1418 1419 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1420 (HERE()-pos-1)*sizeof(sop)); 1421 p->strip[pos] = s; 1422} 1423 1424/* 1425 - dofwd - complete a forward reference 1426 == static void dofwd(register struct parse *p, sopno pos, sop value); 1427 */ 1428static void 1429dofwd(p, pos, value) 1430register struct parse *p; 1431register sopno pos; 1432sop value; 1433{ 1434 /* avoid making error situations worse */ 1435 if (p->error != 0) 1436 return; 1437 1438 assert(value < 1<<OPSHIFT); 1439 p->strip[pos] = OP(p->strip[pos]) | value; 1440} 1441 1442/* 1443 - enlarge - enlarge the strip 1444 == static void enlarge(register struct parse *p, sopno size); 1445 */ 1446static void 1447enlarge(p, size) 1448register struct parse *p; 1449register sopno size; 1450{ 1451 register sop *sp; 1452 1453 if (p->ssize >= size) 1454 return; 1455 1456 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1457 if (sp == NULL) { 1458 SETERROR(REG_ESPACE); 1459 return; 1460 } 1461 p->strip = sp; 1462 p->ssize = size; 1463} 1464 1465/* 1466 - stripsnug - compact the strip 1467 == static void stripsnug(register struct parse *p, register struct re_guts *g); 1468 */ 1469static void 1470stripsnug(p, g) 1471register struct parse *p; 1472register struct re_guts *g; 1473{ 1474 g->nstates = p->slen; 1475 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 1476 if (g->strip == NULL) { 1477 SETERROR(REG_ESPACE); 1478 g->strip = p->strip; 1479 } 1480} 1481 1482/* 1483 - findmust - fill in must and mlen with longest mandatory literal string 1484 == static void findmust(register struct parse *p, register struct re_guts *g); 1485 * 1486 * This algorithm could do fancy things like analyzing the operands of | 1487 * for common subsequences. Someday. This code is simple and finds most 1488 * of the interesting cases. 1489 * 1490 * Note that must and mlen got initialized during setup. 1491 */ 1492static void 1493findmust(p, g) 1494struct parse *p; 1495register struct re_guts *g; 1496{ 1497 register sop *scan; 1498 sop *start; 1499 register sop *newstart; 1500 register sopno newlen; 1501 register sop s; 1502 register char *cp; 1503 register sopno i; 1504 1505 /* avoid making error situations worse */ 1506 if (p->error != 0) 1507 return; 1508 1509 /* find the longest OCHAR sequence in strip */ 1510 newlen = 0; 1511 scan = g->strip + 1; 1512 do { 1513 s = *scan++; 1514 switch (OP(s)) { 1515 case OCHAR: /* sequence member */ 1516 if (newlen == 0) /* new sequence */ 1517 newstart = scan - 1; 1518 newlen++; 1519 break; 1520 case OPLUS_: /* things that don't break one */ 1521 case OLPAREN: 1522 case ORPAREN: 1523 break; 1524 case OQUEST_: /* things that must be skipped */ 1525 case OCH_: 1526 scan--; 1527 do { 1528 scan += OPND(s); 1529 s = *scan; 1530 /* assert() interferes w debug printouts */ 1531 if (OP(s) != O_QUEST && OP(s) != O_CH && 1532 OP(s) != OOR2) { 1533 g->iflags |= BAD; 1534 return; 1535 } 1536 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1537 /* fallthrough */ 1538 default: /* things that break a sequence */ 1539 if (newlen > g->mlen) { /* ends one */ 1540 start = newstart; 1541 g->mlen = newlen; 1542 } 1543 newlen = 0; 1544 break; 1545 } 1546 } while (OP(s) != OEND); 1547 1548 if (g->mlen == 0) /* there isn't one */ 1549 return; 1550 1551 /* turn it into a character string */ 1552 g->must = malloc((size_t)g->mlen + 1); 1553 if (g->must == NULL) { /* argh; just forget it */ 1554 g->mlen = 0; 1555 return; 1556 } 1557 cp = g->must; 1558 scan = start; 1559 for (i = g->mlen; i > 0; i--) { 1560 while (OP(s = *scan++) != OCHAR) 1561 continue; 1562 assert(cp < g->must + g->mlen); 1563 *cp++ = (char)OPND(s); 1564 } 1565 assert(cp == g->must + g->mlen); 1566 *cp++ = '\0'; /* just on general principles */ 1567} 1568 1569/* 1570 - pluscount - count + nesting 1571 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1572 */ 1573static sopno /* nesting depth */ 1574pluscount(p, g) 1575struct parse *p; 1576register struct re_guts *g; 1577{ 1578 register sop *scan; 1579 register sop s; 1580 register sopno plusnest = 0; 1581 register sopno maxnest = 0; 1582 1583 if (p->error != 0) 1584 return(0); /* there may not be an OEND */ 1585 1586 scan = g->strip + 1; 1587 do { 1588 s = *scan++; 1589 switch (OP(s)) { 1590 case OPLUS_: 1591 plusnest++; 1592 break; 1593 case O_PLUS: 1594 if (plusnest > maxnest) 1595 maxnest = plusnest; 1596 plusnest--; 1597 break; 1598 } 1599 } while (OP(s) != OEND); 1600 if (plusnest != 0) 1601 g->iflags |= BAD; 1602 return(maxnest); 1603} 1604