Deleted Added
full compact
b.c (146299) b.c (170331)
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this

--- 8 unchanged lines hidden (view full) ---

17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this

--- 8 unchanged lines hidden (view full) ---

17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
25/* lasciate ogne speranza, voi ch'intrate. */
26
27#define DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"

--- 5 unchanged lines hidden (view full) ---

39
40#define type(v) (v)->nobj /* badly overloaded here */
41#define info(v) (v)->ntype /* badly overloaded here */
42#define left(v) (v)->narg[0]
43#define right(v) (v)->narg[1]
44#define parent(v) (v)->nnext
45
46#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
26
27#define DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"

--- 5 unchanged lines hidden (view full) ---

39
40#define type(v) (v)->nobj /* badly overloaded here */
41#define info(v) (v)->ntype /* badly overloaded here */
42#define left(v) (v)->narg[0]
43#define right(v) (v)->narg[1]
44#define parent(v) (v)->nnext
45
46#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define ELEAF case EMPTYRE: /* empty string in regexp */
47#define UNARY case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
48#define UNARY case STAR: case PLUS: case QUEST:
49
50/* encoding in tree Nodes:
50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
51 left is index, right contains value or pointer to value
52 unary (STAR, PLUS, QUEST): left is child, right is null
53 binary (CAT, OR): left and right are children
54 parent contains pointer to parent
55*/
56
57
58int *setvec;

--- 118 unchanged lines hidden (view full) ---

177 --(*f->posns[f->curstat]);
178 }
179 return f->curstat;
180}
181
182void penter(Node *p) /* set up parent pointers and leaf indices */
183{
184 switch (type(p)) {
52 left is index, right contains value or pointer to value
53 unary (STAR, PLUS, QUEST): left is child, right is null
54 binary (CAT, OR): left and right are children
55 parent contains pointer to parent
56*/
57
58
59int *setvec;

--- 118 unchanged lines hidden (view full) ---

178 --(*f->posns[f->curstat]);
179 }
180 return f->curstat;
181}
182
183void penter(Node *p) /* set up parent pointers and leaf indices */
184{
185 switch (type(p)) {
186 ELEAF
185 LEAF
186 info(p) = poscnt;
187 poscnt++;
188 break;
189 UNARY
190 penter(left(p));
191 parent(left(p)) = p;
192 break;

--- 8 unchanged lines hidden (view full) ---

201 FATAL("can't happen: unknown type %d in penter", type(p));
202 break;
203 }
204}
205
206void freetr(Node *p) /* free parse tree */
207{
208 switch (type(p)) {
187 LEAF
188 info(p) = poscnt;
189 poscnt++;
190 break;
191 UNARY
192 penter(left(p));
193 parent(left(p)) = p;
194 break;

--- 8 unchanged lines hidden (view full) ---

203 FATAL("can't happen: unknown type %d in penter", type(p));
204 break;
205 }
206}
207
208void freetr(Node *p) /* free parse tree */
209{
210 switch (type(p)) {
211 ELEAF
209 LEAF
210 xfree(p);
211 break;
212 UNARY
213 freetr(left(p));
214 xfree(p);
215 break;
216 case CAT:

--- 87 unchanged lines hidden (view full) ---

304 if (c2 == '\\')
305 c2 = quoted((char **) &p);
306 if (c > c2) { /* empty; ignore */
307 bp--;
308 i--;
309 continue;
310 }
311 while (c < c2) {
212 LEAF
213 xfree(p);
214 break;
215 UNARY
216 freetr(left(p));
217 xfree(p);
218 break;
219 case CAT:

--- 87 unchanged lines hidden (view full) ---

307 if (c2 == '\\')
308 c2 = quoted((char **) &p);
309 if (c > c2) { /* empty; ignore */
310 bp--;
311 i--;
312 continue;
313 }
314 while (c < c2) {
312 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
315 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
313 FATAL("out of space for character class [%.10s...] 2", p);
314 *bp++ = ++c;
315 i++;
316 }
317 continue;
318 }
319 }
316 FATAL("out of space for character class [%.10s...] 2", p);
317 *bp++ = ++c;
318 i++;
319 }
320 continue;
321 }
322 }
320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
323 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
321 FATAL("out of space for character class [%.10s...] 3", p);
322 *bp++ = c;
323 i++;
324 }
325 *bp = 0;
326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327 xfree(op);
328 return (char *) tostring((char *) buf);

--- 5 unchanged lines hidden (view full) ---

334}
335
336void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
337{
338 int i;
339 int *p;
340
341 switch (type(v)) {
324 FATAL("out of space for character class [%.10s...] 3", p);
325 *bp++ = c;
326 i++;
327 }
328 *bp = 0;
329 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
330 xfree(op);
331 return (char *) tostring((char *) buf);

--- 5 unchanged lines hidden (view full) ---

337}
338
339void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
340{
341 int i;
342 int *p;
343
344 switch (type(v)) {
345 ELEAF
342 LEAF
343 f->re[info(v)].ltype = type(v);
344 f->re[info(v)].lval.np = right(v);
345 while (f->accept >= maxsetvec) { /* guessing here! */
346 maxsetvec *= 4;
347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349 if (setvec == 0 || tmpset == 0)

--- 20 unchanged lines hidden (view full) ---

370 cfoll(f,right(v));
371 break;
372 default: /* can't happen */
373 FATAL("can't happen: unknown type %d in cfoll", type(v));
374 }
375}
376
377int first(Node *p) /* collects initially active leaves of p into setvec */
346 LEAF
347 f->re[info(v)].ltype = type(v);
348 f->re[info(v)].lval.np = right(v);
349 while (f->accept >= maxsetvec) { /* guessing here! */
350 maxsetvec *= 4;
351 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
352 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
353 if (setvec == 0 || tmpset == 0)

--- 20 unchanged lines hidden (view full) ---

374 cfoll(f,right(v));
375 break;
376 default: /* can't happen */
377 FATAL("can't happen: unknown type %d in cfoll", type(v));
378 }
379}
380
381int first(Node *p) /* collects initially active leaves of p into setvec */
378 /* returns 1 if p matches empty string */
382 /* returns 0 if p matches empty string */
379{
380 int b, lp;
381
382 switch (type(p)) {
383{
384 int b, lp;
385
386 switch (type(p)) {
387 ELEAF
383 LEAF
384 lp = info(p); /* look for high-water mark of subscripts */
385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
386 maxsetvec *= 4;
387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389 if (setvec == 0 || tmpset == 0)
390 overflo("out of space in first()");
391 }
388 LEAF
389 lp = info(p); /* look for high-water mark of subscripts */
390 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
391 maxsetvec *= 4;
392 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
393 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
394 if (setvec == 0 || tmpset == 0)
395 overflo("out of space in first()");
396 }
397 if (type(p) == EMPTYRE) {
398 setvec[lp] = 0;
399 return(0);
400 }
392 if (setvec[lp] != 1) {
393 setvec[lp] = 1;
394 setcnt++;
395 }
396 if (type(p) == CCL && (*(char *) right(p)) == '\0')
397 return(0); /* empty CCL */
398 else return(1);
399 case PLUS:

--- 60 unchanged lines hidden (view full) ---

460{
461 int s, ns;
462 uschar *p = (uschar *) p0;
463
464 s = f->reset ? makeinit(f,0) : f->initstat;
465 if (f->out[s])
466 return(1);
467 do {
401 if (setvec[lp] != 1) {
402 setvec[lp] = 1;
403 setcnt++;
404 }
405 if (type(p) == CCL && (*(char *) right(p)) == '\0')
406 return(0); /* empty CCL */
407 else return(1);
408 case PLUS:

--- 60 unchanged lines hidden (view full) ---

469{
470 int s, ns;
471 uschar *p = (uschar *) p0;
472
473 s = f->reset ? makeinit(f,0) : f->initstat;
474 if (f->out[s])
475 return(1);
476 do {
468 assert(*p < NCHARS);
477 /* assert(*p < NCHARS); */
469 if ((ns = f->gototab[s][*p]) != 0)
470 s = ns;
471 else
472 s = cgoto(f, s, *p);
473 if (f->out[s])
474 return(1);
475 } while (*p++ != 0);
476 return(0);

--- 14 unchanged lines hidden (view full) ---

491 }
492 patbeg = (char *) p;
493 patlen = -1;
494 do {
495 q = p;
496 do {
497 if (f->out[s]) /* final state */
498 patlen = q-p;
478 if ((ns = f->gototab[s][*p]) != 0)
479 s = ns;
480 else
481 s = cgoto(f, s, *p);
482 if (f->out[s])
483 return(1);
484 } while (*p++ != 0);
485 return(0);

--- 14 unchanged lines hidden (view full) ---

500 }
501 patbeg = (char *) p;
502 patlen = -1;
503 do {
504 q = p;
505 do {
506 if (f->out[s]) /* final state */
507 patlen = q-p;
499 assert(*q < NCHARS);
508 /* assert(*q < NCHARS); */
500 if ((ns = f->gototab[s][*q]) != 0)
501 s = ns;
502 else
503 s = cgoto(f, s, *q);
504 if (s == 1) { /* no transition */
505 if (patlen >= 0) {
506 patbeg = (char *) p;
507 return(1);

--- 41 unchanged lines hidden (view full) ---

549 s = f->initstat;
550 }
551 patlen = -1;
552 while (*p) {
553 q = p;
554 do {
555 if (f->out[s]) /* final state */
556 patlen = q-p;
509 if ((ns = f->gototab[s][*q]) != 0)
510 s = ns;
511 else
512 s = cgoto(f, s, *q);
513 if (s == 1) { /* no transition */
514 if (patlen >= 0) {
515 patbeg = (char *) p;
516 return(1);

--- 41 unchanged lines hidden (view full) ---

558 s = f->initstat;
559 }
560 patlen = -1;
561 while (*p) {
562 q = p;
563 do {
564 if (f->out[s]) /* final state */
565 patlen = q-p;
557 assert(*q < NCHARS);
566 /* assert(*q < NCHARS); */
558 if ((ns = f->gototab[s][*q]) != 0)
559 s = ns;
560 else
561 s = cgoto(f, s, *q);
562 if (s == 1) { /* no transition */
563 if (patlen > 0) {
564 patbeg = (char *) p;
565 return(1);

--- 30 unchanged lines hidden (view full) ---

596Node *reparse(const char *p) /* parses regular expression pointed to by p */
597{ /* uses relex() to scan regular expression */
598 Node *np;
599
600 dprintf( ("reparse <%s>\n", p) );
601 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
602 rtok = relex();
603 /* GNU compatibility: an empty regexp matches anything */
567 if ((ns = f->gototab[s][*q]) != 0)
568 s = ns;
569 else
570 s = cgoto(f, s, *q);
571 if (s == 1) { /* no transition */
572 if (patlen > 0) {
573 patbeg = (char *) p;
574 return(1);

--- 30 unchanged lines hidden (view full) ---

605Node *reparse(const char *p) /* parses regular expression pointed to by p */
606{ /* uses relex() to scan regular expression */
607 Node *np;
608
609 dprintf( ("reparse <%s>\n", p) );
610 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
611 rtok = relex();
612 /* GNU compatibility: an empty regexp matches anything */
604 if (rtok == '\0')
613 if (rtok == '\0') {
605 /* FATAL("empty regular expression"); previous */
614 /* FATAL("empty regular expression"); previous */
606 return(op2(ALL, NIL, NIL));
615 return(op2(EMPTYRE, NIL, NIL));
616 }
607 np = regexp();
608 if (rtok != '\0')
609 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
610 return(np);
611}
612
613Node *regexp(void) /* top-level parse of reg expr */
614{

--- 7 unchanged lines hidden (view full) ---

622 switch (rtok) {
623 case CHAR:
624 np = op2(CHAR, NIL, itonp(rlxval));
625 rtok = relex();
626 return (unary(np));
627 case ALL:
628 rtok = relex();
629 return (unary(op2(ALL, NIL, NIL)));
617 np = regexp();
618 if (rtok != '\0')
619 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
620 return(np);
621}
622
623Node *regexp(void) /* top-level parse of reg expr */
624{

--- 7 unchanged lines hidden (view full) ---

632 switch (rtok) {
633 case CHAR:
634 np = op2(CHAR, NIL, itonp(rlxval));
635 rtok = relex();
636 return (unary(np));
637 case ALL:
638 rtok = relex();
639 return (unary(op2(ALL, NIL, NIL)));
640 case EMPTYRE:
641 rtok = relex();
642 return (unary(op2(ALL, NIL, NIL)));
630 case DOT:
631 rtok = relex();
632 return (unary(op2(DOT, NIL, NIL)));
633 case CCL:
634 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
635 rtok = relex();
636 return (unary(np));
637 case NCCL:

--- 23 unchanged lines hidden (view full) ---

661 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
662 }
663 return 0; /*NOTREACHED*/
664}
665
666Node *concat(Node *np)
667{
668 switch (rtok) {
643 case DOT:
644 rtok = relex();
645 return (unary(op2(DOT, NIL, NIL)));
646 case CCL:
647 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
648 rtok = relex();
649 return (unary(np));
650 case NCCL:

--- 23 unchanged lines hidden (view full) ---

674 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
675 }
676 return 0; /*NOTREACHED*/
677}
678
679Node *concat(Node *np)
680{
681 switch (rtok) {
669 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
682 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
670 return (concat(op2(CAT, np, primary())));
671 }
672 return (np);
673}
674
675Node *alt(Node *np)
676{
677 if (rtok == OR) {

--- 104 unchanged lines hidden (view full) ---

782 bp = buf;
783 if (*prestr == '^') {
784 cflag = 1;
785 prestr++;
786 }
787 else
788 cflag = 0;
789 n = 2 * strlen((const char *) prestr)+1;
683 return (concat(op2(CAT, np, primary())));
684 }
685 return (np);
686}
687
688Node *alt(Node *np)
689{
690 if (rtok == OR) {

--- 104 unchanged lines hidden (view full) ---

795 bp = buf;
796 if (*prestr == '^') {
797 cflag = 1;
798 prestr++;
799 }
800 else
801 cflag = 0;
802 n = 2 * strlen((const char *) prestr)+1;
790 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
803 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
791 FATAL("out of space for reg expr %.10s...", lastre);
792 for (; ; ) {
793 if ((c = *prestr++) == '\\') {
794 *bp++ = '\\';
795 if ((c = *prestr++) == '\0')
796 FATAL("nonterminated character class %.20s...", lastre);
797 *bp++ = c;
798 /* } else if (c == '\n') { */
799 /* FATAL("newline in character class %.20s...", lastre); */
800 } else if (c == '[' && *prestr == ':') {
801 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
802 for (cc = charclasses; cc->cc_name; cc++)
803 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
804 break;
805 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
806 prestr[2 + cc->cc_namelen] == ']') {
807 prestr += cc->cc_namelen + 3;
808 for (i = 0; i < NCHARS; i++) {
804 FATAL("out of space for reg expr %.10s...", lastre);
805 for (; ; ) {
806 if ((c = *prestr++) == '\\') {
807 *bp++ = '\\';
808 if ((c = *prestr++) == '\0')
809 FATAL("nonterminated character class %.20s...", lastre);
810 *bp++ = c;
811 /* } else if (c == '\n') { */
812 /* FATAL("newline in character class %.20s...", lastre); */
813 } else if (c == '[' && *prestr == ':') {
814 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
815 for (cc = charclasses; cc->cc_name; cc++)
816 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
817 break;
818 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
819 prestr[2 + cc->cc_namelen] == ']') {
820 prestr += cc->cc_namelen + 3;
821 for (i = 0; i < NCHARS; i++) {
809 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
822 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
810 FATAL("out of space for reg expr %.10s...", lastre);
811 if (cc->cc_func(i)) {
812 *bp++ = i;
813 n++;
814 }
815 }
816 } else
817 *bp++ = c;

--- 32 unchanged lines hidden (view full) ---

850 setcnt = 0;
851 /* compute positions of gototab[s,c] into setvec */
852 p = f->posns[s];
853 for (i = 1; i <= *p; i++) {
854 if ((k = f->re[p[i]].ltype) != FINAL) {
855 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
856 || (k == DOT && c != 0 && c != HAT)
857 || (k == ALL && c != 0)
823 FATAL("out of space for reg expr %.10s...", lastre);
824 if (cc->cc_func(i)) {
825 *bp++ = i;
826 n++;
827 }
828 }
829 } else
830 *bp++ = c;

--- 32 unchanged lines hidden (view full) ---

863 setcnt = 0;
864 /* compute positions of gototab[s,c] into setvec */
865 p = f->posns[s];
866 for (i = 1; i <= *p; i++) {
867 if ((k = f->re[p[i]].ltype) != FINAL) {
868 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
869 || (k == DOT && c != 0 && c != HAT)
870 || (k == ALL && c != 0)
871 || (k == EMPTYRE && c != 0)
858 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
859 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
860 q = f->re[p[i]].lfollow;
861 for (j = 1; j <= *q; j++) {
862 if (q[j] >= maxsetvec) {
863 maxsetvec *= 4;
864 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
865 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));

--- 74 unchanged lines hidden ---
872 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
873 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
874 q = f->re[p[i]].lfollow;
875 for (j = 1; j <= *q; j++) {
876 if (q[j] >= maxsetvec) {
877 maxsetvec *= 4;
878 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
879 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));

--- 74 unchanged lines hidden ---