reader.c revision 1591
1/* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Robert Paul Corbett. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 1/20/91"; 39#endif /* not lint */ 40 41#include "defs.h" 42 43/* The line size must be a positive integer. One hundred was chosen */ 44/* because few lines in Yacc input grammars exceed 100 characters. */ 45/* Note that if a line exceeds LINESIZE characters, the line buffer */ 46/* will be expanded to accomodate it. */ 47 48#define LINESIZE 100 49 50char *cache; 51int cinc, cache_size; 52 53int ntags, tagmax; 54char **tag_table; 55 56char saw_eof, unionized; 57char *cptr, *line; 58int linesize; 59 60bucket *goal; 61int prec; 62int gensym; 63char last_was_action; 64 65int maxitems; 66bucket **pitem; 67 68int maxrules; 69bucket **plhs; 70 71int name_pool_size; 72char *name_pool; 73 74char line_format[] = "#line %d \"%s\"\n"; 75 76 77cachec(c) 78int c; 79{ 80 assert(cinc >= 0); 81 if (cinc >= cache_size) 82 { 83 cache_size += 256; 84 cache = REALLOC(cache, cache_size); 85 if (cache == 0) no_space(); 86 } 87 cache[cinc] = c; 88 ++cinc; 89} 90 91 92get_line() 93{ 94 register FILE *f = input_file; 95 register int c; 96 register int i; 97 98 if (saw_eof || (c = getc(f)) == EOF) 99 { 100 if (line) { FREE(line); line = 0; } 101 cptr = 0; 102 saw_eof = 1; 103 return; 104 } 105 106 if (line == 0 || linesize != (LINESIZE + 1)) 107 { 108 if (line) FREE(line); 109 linesize = LINESIZE + 1; 110 line = MALLOC(linesize); 111 if (line == 0) no_space(); 112 } 113 114 i = 0; 115 ++lineno; 116 for (;;) 117 { 118 line[i] = c; 119 if (c == '\n') { cptr = line; return; } 120 if (++i >= linesize) 121 { 122 linesize += LINESIZE; 123 line = REALLOC(line, linesize); 124 if (line == 0) no_space(); 125 } 126 c = getc(f); 127 if (c == EOF) 128 { 129 line[i] = '\n'; 130 saw_eof = 1; 131 cptr = line; 132 return; 133 } 134 } 135} 136 137 138char * 139dup_line() 140{ 141 register char *p, *s, *t; 142 143 if (line == 0) return (0); 144 s = line; 145 while (*s != '\n') ++s; 146 p = MALLOC(s - line + 1); 147 if (p == 0) no_space(); 148 149 s = line; 150 t = p; 151 while ((*t++ = *s++) != '\n') continue; 152 return (p); 153} 154 155 156skip_comment() 157{ 158 register char *s; 159 160 int st_lineno = lineno; 161 char *st_line = dup_line(); 162 char *st_cptr = st_line + (cptr - line); 163 164 s = cptr + 2; 165 for (;;) 166 { 167 if (*s == '*' && s[1] == '/') 168 { 169 cptr = s + 2; 170 FREE(st_line); 171 return; 172 } 173 if (*s == '\n') 174 { 175 get_line(); 176 if (line == 0) 177 unterminated_comment(st_lineno, st_line, st_cptr); 178 s = cptr; 179 } 180 else 181 ++s; 182 } 183} 184 185 186int 187nextc() 188{ 189 register char *s; 190 191 if (line == 0) 192 { 193 get_line(); 194 if (line == 0) 195 return (EOF); 196 } 197 198 s = cptr; 199 for (;;) 200 { 201 switch (*s) 202 { 203 case '\n': 204 get_line(); 205 if (line == 0) return (EOF); 206 s = cptr; 207 break; 208 209 case ' ': 210 case '\t': 211 case '\f': 212 case '\r': 213 case '\v': 214 case ',': 215 case ';': 216 ++s; 217 break; 218 219 case '\\': 220 cptr = s; 221 return ('%'); 222 223 case '/': 224 if (s[1] == '*') 225 { 226 cptr = s; 227 skip_comment(); 228 s = cptr; 229 break; 230 } 231 else if (s[1] == '/') 232 { 233 get_line(); 234 if (line == 0) return (EOF); 235 s = cptr; 236 break; 237 } 238 /* fall through */ 239 240 default: 241 cptr = s; 242 return (*s); 243 } 244 } 245} 246 247 248int 249keyword() 250{ 251 register int c; 252 char *t_cptr = cptr; 253 254 c = *++cptr; 255 if (isalpha(c)) 256 { 257 cinc = 0; 258 for (;;) 259 { 260 if (isalpha(c)) 261 { 262 if (isupper(c)) c = tolower(c); 263 cachec(c); 264 } 265 else if (isdigit(c) || c == '_' || c == '.' || c == '$') 266 cachec(c); 267 else 268 break; 269 c = *++cptr; 270 } 271 cachec(NUL); 272 273 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0) 274 return (TOKEN); 275 if (strcmp(cache, "type") == 0) 276 return (TYPE); 277 if (strcmp(cache, "left") == 0) 278 return (LEFT); 279 if (strcmp(cache, "right") == 0) 280 return (RIGHT); 281 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0) 282 return (NONASSOC); 283 if (strcmp(cache, "start") == 0) 284 return (START); 285 if (strcmp(cache, "union") == 0) 286 return (UNION); 287 if (strcmp(cache, "ident") == 0) 288 return (IDENT); 289 } 290 else 291 { 292 ++cptr; 293 if (c == '{') 294 return (TEXT); 295 if (c == '%' || c == '\\') 296 return (MARK); 297 if (c == '<') 298 return (LEFT); 299 if (c == '>') 300 return (RIGHT); 301 if (c == '0') 302 return (TOKEN); 303 if (c == '2') 304 return (NONASSOC); 305 } 306 syntax_error(lineno, line, t_cptr); 307 /*NOTREACHED*/ 308} 309 310 311copy_ident() 312{ 313 register int c; 314 register FILE *f = output_file; 315 316 c = nextc(); 317 if (c == EOF) unexpected_EOF(); 318 if (c != '"') syntax_error(lineno, line, cptr); 319 ++outline; 320 fprintf(f, "#ident \""); 321 for (;;) 322 { 323 c = *++cptr; 324 if (c == '\n') 325 { 326 fprintf(f, "\"\n"); 327 return; 328 } 329 putc(c, f); 330 if (c == '"') 331 { 332 putc('\n', f); 333 ++cptr; 334 return; 335 } 336 } 337} 338 339 340copy_text() 341{ 342 register int c; 343 int quote; 344 register FILE *f = text_file; 345 int need_newline = 0; 346 int t_lineno = lineno; 347 char *t_line = dup_line(); 348 char *t_cptr = t_line + (cptr - line - 2); 349 350 if (*cptr == '\n') 351 { 352 get_line(); 353 if (line == 0) 354 unterminated_text(t_lineno, t_line, t_cptr); 355 } 356 if (!lflag) fprintf(f, line_format, lineno, input_file_name); 357 358loop: 359 c = *cptr++; 360 switch (c) 361 { 362 case '\n': 363 next_line: 364 putc('\n', f); 365 need_newline = 0; 366 get_line(); 367 if (line) goto loop; 368 unterminated_text(t_lineno, t_line, t_cptr); 369 370 case '\'': 371 case '"': 372 { 373 int s_lineno = lineno; 374 char *s_line = dup_line(); 375 char *s_cptr = s_line + (cptr - line - 1); 376 377 quote = c; 378 putc(c, f); 379 for (;;) 380 { 381 c = *cptr++; 382 putc(c, f); 383 if (c == quote) 384 { 385 need_newline = 1; 386 FREE(s_line); 387 goto loop; 388 } 389 if (c == '\n') 390 unterminated_string(s_lineno, s_line, s_cptr); 391 if (c == '\\') 392 { 393 c = *cptr++; 394 putc(c, f); 395 if (c == '\n') 396 { 397 get_line(); 398 if (line == 0) 399 unterminated_string(s_lineno, s_line, s_cptr); 400 } 401 } 402 } 403 } 404 405 case '/': 406 putc(c, f); 407 need_newline = 1; 408 c = *cptr; 409 if (c == '/') 410 { 411 putc('*', f); 412 while ((c = *++cptr) != '\n') 413 { 414 if (c == '*' && cptr[1] == '/') 415 fprintf(f, "* "); 416 else 417 putc(c, f); 418 } 419 fprintf(f, "*/"); 420 goto next_line; 421 } 422 if (c == '*') 423 { 424 int c_lineno = lineno; 425 char *c_line = dup_line(); 426 char *c_cptr = c_line + (cptr - line - 1); 427 428 putc('*', f); 429 ++cptr; 430 for (;;) 431 { 432 c = *cptr++; 433 putc(c, f); 434 if (c == '*' && *cptr == '/') 435 { 436 putc('/', f); 437 ++cptr; 438 FREE(c_line); 439 goto loop; 440 } 441 if (c == '\n') 442 { 443 get_line(); 444 if (line == 0) 445 unterminated_comment(c_lineno, c_line, c_cptr); 446 } 447 } 448 } 449 need_newline = 1; 450 goto loop; 451 452 case '%': 453 case '\\': 454 if (*cptr == '}') 455 { 456 if (need_newline) putc('\n', f); 457 ++cptr; 458 FREE(t_line); 459 return; 460 } 461 /* fall through */ 462 463 default: 464 putc(c, f); 465 need_newline = 1; 466 goto loop; 467 } 468} 469 470 471copy_union() 472{ 473 register int c; 474 int quote; 475 int depth; 476 int u_lineno = lineno; 477 char *u_line = dup_line(); 478 char *u_cptr = u_line + (cptr - line - 6); 479 480 if (unionized) over_unionized(cptr - 6); 481 unionized = 1; 482 483 if (!lflag) 484 fprintf(text_file, line_format, lineno, input_file_name); 485 486 fprintf(text_file, "typedef union"); 487 if (dflag) fprintf(union_file, "typedef union"); 488 489 depth = 0; 490loop: 491 c = *cptr++; 492 putc(c, text_file); 493 if (dflag) putc(c, union_file); 494 switch (c) 495 { 496 case '\n': 497 next_line: 498 get_line(); 499 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr); 500 goto loop; 501 502 case '{': 503 ++depth; 504 goto loop; 505 506 case '}': 507 if (--depth == 0) 508 { 509 fprintf(text_file, " YYSTYPE;\n"); 510 FREE(u_line); 511 return; 512 } 513 goto loop; 514 515 case '\'': 516 case '"': 517 { 518 int s_lineno = lineno; 519 char *s_line = dup_line(); 520 char *s_cptr = s_line + (cptr - line - 1); 521 522 quote = c; 523 for (;;) 524 { 525 c = *cptr++; 526 putc(c, text_file); 527 if (dflag) putc(c, union_file); 528 if (c == quote) 529 { 530 FREE(s_line); 531 goto loop; 532 } 533 if (c == '\n') 534 unterminated_string(s_lineno, s_line, s_cptr); 535 if (c == '\\') 536 { 537 c = *cptr++; 538 putc(c, text_file); 539 if (dflag) putc(c, union_file); 540 if (c == '\n') 541 { 542 get_line(); 543 if (line == 0) 544 unterminated_string(s_lineno, s_line, s_cptr); 545 } 546 } 547 } 548 } 549 550 case '/': 551 c = *cptr; 552 if (c == '/') 553 { 554 putc('*', text_file); 555 if (dflag) putc('*', union_file); 556 while ((c = *++cptr) != '\n') 557 { 558 if (c == '*' && cptr[1] == '/') 559 { 560 fprintf(text_file, "* "); 561 if (dflag) fprintf(union_file, "* "); 562 } 563 else 564 { 565 putc(c, text_file); 566 if (dflag) putc(c, union_file); 567 } 568 } 569 fprintf(text_file, "*/\n"); 570 if (dflag) fprintf(union_file, "*/\n"); 571 goto next_line; 572 } 573 if (c == '*') 574 { 575 int c_lineno = lineno; 576 char *c_line = dup_line(); 577 char *c_cptr = c_line + (cptr - line - 1); 578 579 putc('*', text_file); 580 if (dflag) putc('*', union_file); 581 ++cptr; 582 for (;;) 583 { 584 c = *cptr++; 585 putc(c, text_file); 586 if (dflag) putc(c, union_file); 587 if (c == '*' && *cptr == '/') 588 { 589 putc('/', text_file); 590 if (dflag) putc('/', union_file); 591 ++cptr; 592 FREE(c_line); 593 goto loop; 594 } 595 if (c == '\n') 596 { 597 get_line(); 598 if (line == 0) 599 unterminated_comment(c_lineno, c_line, c_cptr); 600 } 601 } 602 } 603 goto loop; 604 605 default: 606 goto loop; 607 } 608} 609 610 611int 612hexval(c) 613int c; 614{ 615 if (c >= '0' && c <= '9') 616 return (c - '0'); 617 if (c >= 'A' && c <= 'F') 618 return (c - 'A' + 10); 619 if (c >= 'a' && c <= 'f') 620 return (c - 'a' + 10); 621 return (-1); 622} 623 624 625bucket * 626get_literal() 627{ 628 register int c, quote; 629 register int i; 630 register int n; 631 register char *s; 632 register bucket *bp; 633 int s_lineno = lineno; 634 char *s_line = dup_line(); 635 char *s_cptr = s_line + (cptr - line); 636 637 quote = *cptr++; 638 cinc = 0; 639 for (;;) 640 { 641 c = *cptr++; 642 if (c == quote) break; 643 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr); 644 if (c == '\\') 645 { 646 char *c_cptr = cptr - 1; 647 648 c = *cptr++; 649 switch (c) 650 { 651 case '\n': 652 get_line(); 653 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr); 654 continue; 655 656 case '0': case '1': case '2': case '3': 657 case '4': case '5': case '6': case '7': 658 n = c - '0'; 659 c = *cptr; 660 if (IS_OCTAL(c)) 661 { 662 n = (n << 3) + (c - '0'); 663 c = *++cptr; 664 if (IS_OCTAL(c)) 665 { 666 n = (n << 3) + (c - '0'); 667 ++cptr; 668 } 669 } 670 if (n > MAXCHAR) illegal_character(c_cptr); 671 c = n; 672 break; 673 674 case 'x': 675 c = *cptr++; 676 n = hexval(c); 677 if (n < 0 || n >= 16) 678 illegal_character(c_cptr); 679 for (;;) 680 { 681 c = *cptr; 682 i = hexval(c); 683 if (i < 0 || i >= 16) break; 684 ++cptr; 685 n = (n << 4) + i; 686 if (n > MAXCHAR) illegal_character(c_cptr); 687 } 688 c = n; 689 break; 690 691 case 'a': c = 7; break; 692 case 'b': c = '\b'; break; 693 case 'f': c = '\f'; break; 694 case 'n': c = '\n'; break; 695 case 'r': c = '\r'; break; 696 case 't': c = '\t'; break; 697 case 'v': c = '\v'; break; 698 } 699 } 700 cachec(c); 701 } 702 FREE(s_line); 703 704 n = cinc; 705 s = MALLOC(n); 706 if (s == 0) no_space(); 707 708 for (i = 0; i < n; ++i) 709 s[i] = cache[i]; 710 711 cinc = 0; 712 if (n == 1) 713 cachec('\''); 714 else 715 cachec('"'); 716 717 for (i = 0; i < n; ++i) 718 { 719 c = ((unsigned char *)s)[i]; 720 if (c == '\\' || c == cache[0]) 721 { 722 cachec('\\'); 723 cachec(c); 724 } 725 else if (isprint(c)) 726 cachec(c); 727 else 728 { 729 cachec('\\'); 730 switch (c) 731 { 732 case 7: cachec('a'); break; 733 case '\b': cachec('b'); break; 734 case '\f': cachec('f'); break; 735 case '\n': cachec('n'); break; 736 case '\r': cachec('r'); break; 737 case '\t': cachec('t'); break; 738 case '\v': cachec('v'); break; 739 default: 740 cachec(((c >> 6) & 7) + '0'); 741 cachec(((c >> 3) & 7) + '0'); 742 cachec((c & 7) + '0'); 743 break; 744 } 745 } 746 } 747 748 if (n == 1) 749 cachec('\''); 750 else 751 cachec('"'); 752 753 cachec(NUL); 754 bp = lookup(cache); 755 bp->class = TERM; 756 if (n == 1 && bp->value == UNDEFINED) 757 bp->value = *(unsigned char *)s; 758 FREE(s); 759 760 return (bp); 761} 762 763 764int 765is_reserved(name) 766char *name; 767{ 768 char *s; 769 770 if (strcmp(name, ".") == 0 || 771 strcmp(name, "$accept") == 0 || 772 strcmp(name, "$end") == 0) 773 return (1); 774 775 if (name[0] == '$' && name[1] == '$' && isdigit(name[2])) 776 { 777 s = name + 3; 778 while (isdigit(*s)) ++s; 779 if (*s == NUL) return (1); 780 } 781 782 return (0); 783} 784 785 786bucket * 787get_name() 788{ 789 register int c; 790 791 cinc = 0; 792 for (c = *cptr; IS_IDENT(c); c = *++cptr) 793 cachec(c); 794 cachec(NUL); 795 796 if (is_reserved(cache)) used_reserved(cache); 797 798 return (lookup(cache)); 799} 800 801 802int 803get_number() 804{ 805 register int c; 806 register int n; 807 808 n = 0; 809 for (c = *cptr; isdigit(c); c = *++cptr) 810 n = 10*n + (c - '0'); 811 812 return (n); 813} 814 815 816char * 817get_tag() 818{ 819 register int c; 820 register int i; 821 register char *s; 822 int t_lineno = lineno; 823 char *t_line = dup_line(); 824 char *t_cptr = t_line + (cptr - line); 825 826 ++cptr; 827 c = nextc(); 828 if (c == EOF) unexpected_EOF(); 829 if (!isalpha(c) && c != '_' && c != '$') 830 illegal_tag(t_lineno, t_line, t_cptr); 831 832 cinc = 0; 833 do { cachec(c); c = *++cptr; } while (IS_IDENT(c)); 834 cachec(NUL); 835 836 c = nextc(); 837 if (c == EOF) unexpected_EOF(); 838 if (c != '>') 839 illegal_tag(t_lineno, t_line, t_cptr); 840 ++cptr; 841 842 for (i = 0; i < ntags; ++i) 843 { 844 if (strcmp(cache, tag_table[i]) == 0) 845 return (tag_table[i]); 846 } 847 848 if (ntags >= tagmax) 849 { 850 tagmax += 16; 851 tag_table = (char **) 852 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *)) 853 : MALLOC(tagmax*sizeof(char *))); 854 if (tag_table == 0) no_space(); 855 } 856 857 s = MALLOC(cinc); 858 if (s == 0) no_space(); 859 strcpy(s, cache); 860 tag_table[ntags] = s; 861 ++ntags; 862 FREE(t_line); 863 return (s); 864} 865 866 867declare_tokens(assoc) 868int assoc; 869{ 870 register int c; 871 register bucket *bp; 872 int value; 873 char *tag = 0; 874 875 if (assoc != TOKEN) ++prec; 876 877 c = nextc(); 878 if (c == EOF) unexpected_EOF(); 879 if (c == '<') 880 { 881 tag = get_tag(); 882 c = nextc(); 883 if (c == EOF) unexpected_EOF(); 884 } 885 886 for (;;) 887 { 888 if (isalpha(c) || c == '_' || c == '.' || c == '$') 889 bp = get_name(); 890 else if (c == '\'' || c == '"') 891 bp = get_literal(); 892 else 893 return; 894 895 if (bp == goal) tokenized_start(bp->name); 896 bp->class = TERM; 897 898 if (tag) 899 { 900 if (bp->tag && tag != bp->tag) 901 retyped_warning(bp->name); 902 bp->tag = tag; 903 } 904 905 if (assoc != TOKEN) 906 { 907 if (bp->prec && prec != bp->prec) 908 reprec_warning(bp->name); 909 bp->assoc = assoc; 910 bp->prec = prec; 911 } 912 913 c = nextc(); 914 if (c == EOF) unexpected_EOF(); 915 value = UNDEFINED; 916 if (isdigit(c)) 917 { 918 value = get_number(); 919 if (bp->value != UNDEFINED && value != bp->value) 920 revalued_warning(bp->name); 921 bp->value = value; 922 c = nextc(); 923 if (c == EOF) unexpected_EOF(); 924 } 925 } 926} 927 928 929declare_types() 930{ 931 register int c; 932 register bucket *bp; 933 char *tag; 934 935 c = nextc(); 936 if (c == EOF) unexpected_EOF(); 937 if (c != '<') syntax_error(lineno, line, cptr); 938 tag = get_tag(); 939 940 for (;;) 941 { 942 c = nextc(); 943 if (isalpha(c) || c == '_' || c == '.' || c == '$') 944 bp = get_name(); 945 else if (c == '\'' || c == '"') 946 bp = get_literal(); 947 else 948 return; 949 950 if (bp->tag && tag != bp->tag) 951 retyped_warning(bp->name); 952 bp->tag = tag; 953 } 954} 955 956 957declare_start() 958{ 959 register int c; 960 register bucket *bp; 961 962 c = nextc(); 963 if (c == EOF) unexpected_EOF(); 964 if (!isalpha(c) && c != '_' && c != '.' && c != '$') 965 syntax_error(lineno, line, cptr); 966 bp = get_name(); 967 if (bp->class == TERM) 968 terminal_start(bp->name); 969 if (goal && goal != bp) 970 restarted_warning(); 971 goal = bp; 972} 973 974 975read_declarations() 976{ 977 register int c, k; 978 979 cache_size = 256; 980 cache = MALLOC(cache_size); 981 if (cache == 0) no_space(); 982 983 for (;;) 984 { 985 c = nextc(); 986 if (c == EOF) unexpected_EOF(); 987 if (c != '%') syntax_error(lineno, line, cptr); 988 switch (k = keyword()) 989 { 990 case MARK: 991 return; 992 993 case IDENT: 994 copy_ident(); 995 break; 996 997 case TEXT: 998 copy_text(); 999 break; 1000 1001 case UNION: 1002 copy_union(); 1003 break; 1004 1005 case TOKEN: 1006 case LEFT: 1007 case RIGHT: 1008 case NONASSOC: 1009 declare_tokens(k); 1010 break; 1011 1012 case TYPE: 1013 declare_types(); 1014 break; 1015 1016 case START: 1017 declare_start(); 1018 break; 1019 } 1020 } 1021} 1022 1023 1024initialize_grammar() 1025{ 1026 nitems = 4; 1027 maxitems = 300; 1028 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *)); 1029 if (pitem == 0) no_space(); 1030 pitem[0] = 0; 1031 pitem[1] = 0; 1032 pitem[2] = 0; 1033 pitem[3] = 0; 1034 1035 nrules = 3; 1036 maxrules = 100; 1037 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *)); 1038 if (plhs == 0) no_space(); 1039 plhs[0] = 0; 1040 plhs[1] = 0; 1041 plhs[2] = 0; 1042 rprec = (short *) MALLOC(maxrules*sizeof(short)); 1043 if (rprec == 0) no_space(); 1044 rprec[0] = 0; 1045 rprec[1] = 0; 1046 rprec[2] = 0; 1047 rassoc = (char *) MALLOC(maxrules*sizeof(char)); 1048 if (rassoc == 0) no_space(); 1049 rassoc[0] = TOKEN; 1050 rassoc[1] = TOKEN; 1051 rassoc[2] = TOKEN; 1052} 1053 1054 1055expand_items() 1056{ 1057 maxitems += 300; 1058 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *)); 1059 if (pitem == 0) no_space(); 1060} 1061 1062 1063expand_rules() 1064{ 1065 maxrules += 100; 1066 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *)); 1067 if (plhs == 0) no_space(); 1068 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short)); 1069 if (rprec == 0) no_space(); 1070 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char)); 1071 if (rassoc == 0) no_space(); 1072} 1073 1074 1075advance_to_start() 1076{ 1077 register int c; 1078 register bucket *bp; 1079 char *s_cptr; 1080 int s_lineno; 1081 1082 for (;;) 1083 { 1084 c = nextc(); 1085 if (c != '%') break; 1086 s_cptr = cptr; 1087 switch (keyword()) 1088 { 1089 case MARK: 1090 no_grammar(); 1091 1092 case TEXT: 1093 copy_text(); 1094 break; 1095 1096 case START: 1097 declare_start(); 1098 break; 1099 1100 default: 1101 syntax_error(lineno, line, s_cptr); 1102 } 1103 } 1104 1105 c = nextc(); 1106 if (!isalpha(c) && c != '_' && c != '.' && c != '_') 1107 syntax_error(lineno, line, cptr); 1108 bp = get_name(); 1109 if (goal == 0) 1110 { 1111 if (bp->class == TERM) 1112 terminal_start(bp->name); 1113 goal = bp; 1114 } 1115 1116 s_lineno = lineno; 1117 c = nextc(); 1118 if (c == EOF) unexpected_EOF(); 1119 if (c != ':') syntax_error(lineno, line, cptr); 1120 start_rule(bp, s_lineno); 1121 ++cptr; 1122} 1123 1124 1125start_rule(bp, s_lineno) 1126register bucket *bp; 1127int s_lineno; 1128{ 1129 if (bp->class == TERM) 1130 terminal_lhs(s_lineno); 1131 bp->class = NONTERM; 1132 if (nrules >= maxrules) 1133 expand_rules(); 1134 plhs[nrules] = bp; 1135 rprec[nrules] = UNDEFINED; 1136 rassoc[nrules] = TOKEN; 1137} 1138 1139 1140end_rule() 1141{ 1142 register int i; 1143 1144 if (!last_was_action && plhs[nrules]->tag) 1145 { 1146 for (i = nitems - 1; pitem[i]; --i) continue; 1147 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag) 1148 default_action_warning(); 1149 } 1150 1151 last_was_action = 0; 1152 if (nitems >= maxitems) expand_items(); 1153 pitem[nitems] = 0; 1154 ++nitems; 1155 ++nrules; 1156} 1157 1158 1159insert_empty_rule() 1160{ 1161 register bucket *bp, **bpp; 1162 1163 assert(cache); 1164 sprintf(cache, "$$%d", ++gensym); 1165 bp = make_bucket(cache); 1166 last_symbol->next = bp; 1167 last_symbol = bp; 1168 bp->tag = plhs[nrules]->tag; 1169 bp->class = NONTERM; 1170 1171 if ((nitems += 2) > maxitems) 1172 expand_items(); 1173 bpp = pitem + nitems - 1; 1174 *bpp-- = bp; 1175 while (bpp[0] = bpp[-1]) --bpp; 1176 1177 if (++nrules >= maxrules) 1178 expand_rules(); 1179 plhs[nrules] = plhs[nrules-1]; 1180 plhs[nrules-1] = bp; 1181 rprec[nrules] = rprec[nrules-1]; 1182 rprec[nrules-1] = 0; 1183 rassoc[nrules] = rassoc[nrules-1]; 1184 rassoc[nrules-1] = TOKEN; 1185} 1186 1187 1188add_symbol() 1189{ 1190 register int c; 1191 register bucket *bp; 1192 int s_lineno = lineno; 1193 1194 c = *cptr; 1195 if (c == '\'' || c == '"') 1196 bp = get_literal(); 1197 else 1198 bp = get_name(); 1199 1200 c = nextc(); 1201 if (c == ':') 1202 { 1203 end_rule(); 1204 start_rule(bp, s_lineno); 1205 ++cptr; 1206 return; 1207 } 1208 1209 if (last_was_action) 1210 insert_empty_rule(); 1211 last_was_action = 0; 1212 1213 if (++nitems > maxitems) 1214 expand_items(); 1215 pitem[nitems-1] = bp; 1216} 1217 1218 1219copy_action() 1220{ 1221 register int c; 1222 register int i, n; 1223 int depth; 1224 int quote; 1225 char *tag; 1226 register FILE *f = action_file; 1227 int a_lineno = lineno; 1228 char *a_line = dup_line(); 1229 char *a_cptr = a_line + (cptr - line); 1230 1231 if (last_was_action) 1232 insert_empty_rule(); 1233 last_was_action = 1; 1234 1235 fprintf(f, "case %d:\n", nrules - 2); 1236 if (!lflag) 1237 fprintf(f, line_format, lineno, input_file_name); 1238 if (*cptr == '=') ++cptr; 1239 1240 n = 0; 1241 for (i = nitems - 1; pitem[i]; --i) ++n; 1242 1243 depth = 0; 1244loop: 1245 c = *cptr; 1246 if (c == '$') 1247 { 1248 if (cptr[1] == '<') 1249 { 1250 int d_lineno = lineno; 1251 char *d_line = dup_line(); 1252 char *d_cptr = d_line + (cptr - line); 1253 1254 ++cptr; 1255 tag = get_tag(); 1256 c = *cptr; 1257 if (c == '$') 1258 { 1259 fprintf(f, "yyval.%s", tag); 1260 ++cptr; 1261 FREE(d_line); 1262 goto loop; 1263 } 1264 else if (isdigit(c)) 1265 { 1266 i = get_number(); 1267 if (i > n) dollar_warning(d_lineno, i); 1268 fprintf(f, "yyvsp[%d].%s", i - n, tag); 1269 FREE(d_line); 1270 goto loop; 1271 } 1272 else if (c == '-' && isdigit(cptr[1])) 1273 { 1274 ++cptr; 1275 i = -get_number() - n; 1276 fprintf(f, "yyvsp[%d].%s", i, tag); 1277 FREE(d_line); 1278 goto loop; 1279 } 1280 else 1281 dollar_error(d_lineno, d_line, d_cptr); 1282 } 1283 else if (cptr[1] == '$') 1284 { 1285 if (ntags) 1286 { 1287 tag = plhs[nrules]->tag; 1288 if (tag == 0) untyped_lhs(); 1289 fprintf(f, "yyval.%s", tag); 1290 } 1291 else 1292 fprintf(f, "yyval"); 1293 cptr += 2; 1294 goto loop; 1295 } 1296 else if (isdigit(cptr[1])) 1297 { 1298 ++cptr; 1299 i = get_number(); 1300 if (ntags) 1301 { 1302 if (i <= 0 || i > n) 1303 unknown_rhs(i); 1304 tag = pitem[nitems + i - n - 1]->tag; 1305 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name); 1306 fprintf(f, "yyvsp[%d].%s", i - n, tag); 1307 } 1308 else 1309 { 1310 if (i > n) 1311 dollar_warning(lineno, i); 1312 fprintf(f, "yyvsp[%d]", i - n); 1313 } 1314 goto loop; 1315 } 1316 else if (cptr[1] == '-') 1317 { 1318 cptr += 2; 1319 i = get_number(); 1320 if (ntags) 1321 unknown_rhs(-i); 1322 fprintf(f, "yyvsp[%d]", -i - n); 1323 goto loop; 1324 } 1325 } 1326 if (isalpha(c) || c == '_' || c == '$') 1327 { 1328 do 1329 { 1330 putc(c, f); 1331 c = *++cptr; 1332 } while (isalnum(c) || c == '_' || c == '$'); 1333 goto loop; 1334 } 1335 putc(c, f); 1336 ++cptr; 1337 switch (c) 1338 { 1339 case '\n': 1340 next_line: 1341 get_line(); 1342 if (line) goto loop; 1343 unterminated_action(a_lineno, a_line, a_cptr); 1344 1345 case ';': 1346 if (depth > 0) goto loop; 1347 fprintf(f, "\nbreak;\n"); 1348 return; 1349 1350 case '{': 1351 ++depth; 1352 goto loop; 1353 1354 case '}': 1355 if (--depth > 0) goto loop; 1356 fprintf(f, "\nbreak;\n"); 1357 return; 1358 1359 case '\'': 1360 case '"': 1361 { 1362 int s_lineno = lineno; 1363 char *s_line = dup_line(); 1364 char *s_cptr = s_line + (cptr - line - 1); 1365 1366 quote = c; 1367 for (;;) 1368 { 1369 c = *cptr++; 1370 putc(c, f); 1371 if (c == quote) 1372 { 1373 FREE(s_line); 1374 goto loop; 1375 } 1376 if (c == '\n') 1377 unterminated_string(s_lineno, s_line, s_cptr); 1378 if (c == '\\') 1379 { 1380 c = *cptr++; 1381 putc(c, f); 1382 if (c == '\n') 1383 { 1384 get_line(); 1385 if (line == 0) 1386 unterminated_string(s_lineno, s_line, s_cptr); 1387 } 1388 } 1389 } 1390 } 1391 1392 case '/': 1393 c = *cptr; 1394 if (c == '/') 1395 { 1396 putc('*', f); 1397 while ((c = *++cptr) != '\n') 1398 { 1399 if (c == '*' && cptr[1] == '/') 1400 fprintf(f, "* "); 1401 else 1402 putc(c, f); 1403 } 1404 fprintf(f, "*/\n"); 1405 goto next_line; 1406 } 1407 if (c == '*') 1408 { 1409 int c_lineno = lineno; 1410 char *c_line = dup_line(); 1411 char *c_cptr = c_line + (cptr - line - 1); 1412 1413 putc('*', f); 1414 ++cptr; 1415 for (;;) 1416 { 1417 c = *cptr++; 1418 putc(c, f); 1419 if (c == '*' && *cptr == '/') 1420 { 1421 putc('/', f); 1422 ++cptr; 1423 FREE(c_line); 1424 goto loop; 1425 } 1426 if (c == '\n') 1427 { 1428 get_line(); 1429 if (line == 0) 1430 unterminated_comment(c_lineno, c_line, c_cptr); 1431 } 1432 } 1433 } 1434 goto loop; 1435 1436 default: 1437 goto loop; 1438 } 1439} 1440 1441 1442int 1443mark_symbol() 1444{ 1445 register int c; 1446 register bucket *bp; 1447 1448 c = cptr[1]; 1449 if (c == '%' || c == '\\') 1450 { 1451 cptr += 2; 1452 return (1); 1453 } 1454 1455 if (c == '=') 1456 cptr += 2; 1457 else if ((c == 'p' || c == 'P') && 1458 ((c = cptr[2]) == 'r' || c == 'R') && 1459 ((c = cptr[3]) == 'e' || c == 'E') && 1460 ((c = cptr[4]) == 'c' || c == 'C') && 1461 ((c = cptr[5], !IS_IDENT(c)))) 1462 cptr += 5; 1463 else 1464 syntax_error(lineno, line, cptr); 1465 1466 c = nextc(); 1467 if (isalpha(c) || c == '_' || c == '.' || c == '$') 1468 bp = get_name(); 1469 else if (c == '\'' || c == '"') 1470 bp = get_literal(); 1471 else 1472 { 1473 syntax_error(lineno, line, cptr); 1474 /*NOTREACHED*/ 1475 } 1476 1477 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules]) 1478 prec_redeclared(); 1479 1480 rprec[nrules] = bp->prec; 1481 rassoc[nrules] = bp->assoc; 1482 return (0); 1483} 1484 1485 1486read_grammar() 1487{ 1488 register int c; 1489 1490 initialize_grammar(); 1491 advance_to_start(); 1492 1493 for (;;) 1494 { 1495 c = nextc(); 1496 if (c == EOF) break; 1497 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' || 1498 c == '"') 1499 add_symbol(); 1500 else if (c == '{' || c == '=') 1501 copy_action(); 1502 else if (c == '|') 1503 { 1504 end_rule(); 1505 start_rule(plhs[nrules-1], 0); 1506 ++cptr; 1507 } 1508 else if (c == '%') 1509 { 1510 if (mark_symbol()) break; 1511 } 1512 else 1513 syntax_error(lineno, line, cptr); 1514 } 1515 end_rule(); 1516} 1517 1518 1519free_tags() 1520{ 1521 register int i; 1522 1523 if (tag_table == 0) return; 1524 1525 for (i = 0; i < ntags; ++i) 1526 { 1527 assert(tag_table[i]); 1528 FREE(tag_table[i]); 1529 } 1530 FREE(tag_table); 1531} 1532 1533 1534pack_names() 1535{ 1536 register bucket *bp; 1537 register char *p, *s, *t; 1538 1539 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */ 1540 for (bp = first_symbol; bp; bp = bp->next) 1541 name_pool_size += strlen(bp->name) + 1; 1542 name_pool = MALLOC(name_pool_size); 1543 if (name_pool == 0) no_space(); 1544 1545 strcpy(name_pool, "$accept"); 1546 strcpy(name_pool+8, "$end"); 1547 t = name_pool + 13; 1548 for (bp = first_symbol; bp; bp = bp->next) 1549 { 1550 p = t; 1551 s = bp->name; 1552 while (*t++ = *s++) continue; 1553 FREE(bp->name); 1554 bp->name = p; 1555 } 1556} 1557 1558 1559check_symbols() 1560{ 1561 register bucket *bp; 1562 1563 if (goal->class == UNKNOWN) 1564 undefined_goal(goal->name); 1565 1566 for (bp = first_symbol; bp; bp = bp->next) 1567 { 1568 if (bp->class == UNKNOWN) 1569 { 1570 undefined_symbol_warning(bp->name); 1571 bp->class = TERM; 1572 } 1573 } 1574} 1575 1576 1577pack_symbols() 1578{ 1579 register bucket *bp; 1580 register bucket **v; 1581 register int i, j, k, n; 1582 1583 nsyms = 2; 1584 ntokens = 1; 1585 for (bp = first_symbol; bp; bp = bp->next) 1586 { 1587 ++nsyms; 1588 if (bp->class == TERM) ++ntokens; 1589 } 1590 start_symbol = ntokens; 1591 nvars = nsyms - ntokens; 1592 1593 symbol_name = (char **) MALLOC(nsyms*sizeof(char *)); 1594 if (symbol_name == 0) no_space(); 1595 symbol_value = (short *) MALLOC(nsyms*sizeof(short)); 1596 if (symbol_value == 0) no_space(); 1597 symbol_prec = (short *) MALLOC(nsyms*sizeof(short)); 1598 if (symbol_prec == 0) no_space(); 1599 symbol_assoc = MALLOC(nsyms); 1600 if (symbol_assoc == 0) no_space(); 1601 1602 v = (bucket **) MALLOC(nsyms*sizeof(bucket *)); 1603 if (v == 0) no_space(); 1604 1605 v[0] = 0; 1606 v[start_symbol] = 0; 1607 1608 i = 1; 1609 j = start_symbol + 1; 1610 for (bp = first_symbol; bp; bp = bp->next) 1611 { 1612 if (bp->class == TERM) 1613 v[i++] = bp; 1614 else 1615 v[j++] = bp; 1616 } 1617 assert(i == ntokens && j == nsyms); 1618 1619 for (i = 1; i < ntokens; ++i) 1620 v[i]->index = i; 1621 1622 goal->index = start_symbol + 1; 1623 k = start_symbol + 2; 1624 while (++i < nsyms) 1625 if (v[i] != goal) 1626 { 1627 v[i]->index = k; 1628 ++k; 1629 } 1630 1631 goal->value = 0; 1632 k = 1; 1633 for (i = start_symbol + 1; i < nsyms; ++i) 1634 { 1635 if (v[i] != goal) 1636 { 1637 v[i]->value = k; 1638 ++k; 1639 } 1640 } 1641 1642 k = 0; 1643 for (i = 1; i < ntokens; ++i) 1644 { 1645 n = v[i]->value; 1646 if (n > 256) 1647 { 1648 for (j = k++; j > 0 && symbol_value[j-1] > n; --j) 1649 symbol_value[j] = symbol_value[j-1]; 1650 symbol_value[j] = n; 1651 } 1652 } 1653 1654 if (v[1]->value == UNDEFINED) 1655 v[1]->value = 256; 1656 1657 j = 0; 1658 n = 257; 1659 for (i = 2; i < ntokens; ++i) 1660 { 1661 if (v[i]->value == UNDEFINED) 1662 { 1663 while (j < k && n == symbol_value[j]) 1664 { 1665 while (++j < k && n == symbol_value[j]) continue; 1666 ++n; 1667 } 1668 v[i]->value = n; 1669 ++n; 1670 } 1671 } 1672 1673 symbol_name[0] = name_pool + 8; 1674 symbol_value[0] = 0; 1675 symbol_prec[0] = 0; 1676 symbol_assoc[0] = TOKEN; 1677 for (i = 1; i < ntokens; ++i) 1678 { 1679 symbol_name[i] = v[i]->name; 1680 symbol_value[i] = v[i]->value; 1681 symbol_prec[i] = v[i]->prec; 1682 symbol_assoc[i] = v[i]->assoc; 1683 } 1684 symbol_name[start_symbol] = name_pool; 1685 symbol_value[start_symbol] = -1; 1686 symbol_prec[start_symbol] = 0; 1687 symbol_assoc[start_symbol] = TOKEN; 1688 for (++i; i < nsyms; ++i) 1689 { 1690 k = v[i]->index; 1691 symbol_name[k] = v[i]->name; 1692 symbol_value[k] = v[i]->value; 1693 symbol_prec[k] = v[i]->prec; 1694 symbol_assoc[k] = v[i]->assoc; 1695 } 1696 1697 FREE(v); 1698} 1699 1700 1701pack_grammar() 1702{ 1703 register int i, j; 1704 int assoc, prec; 1705 1706 ritem = (short *) MALLOC(nitems*sizeof(short)); 1707 if (ritem == 0) no_space(); 1708 rlhs = (short *) MALLOC(nrules*sizeof(short)); 1709 if (rlhs == 0) no_space(); 1710 rrhs = (short *) MALLOC((nrules+1)*sizeof(short)); 1711 if (rrhs == 0) no_space(); 1712 rprec = (short *) REALLOC(rprec, nrules*sizeof(short)); 1713 if (rprec == 0) no_space(); 1714 rassoc = REALLOC(rassoc, nrules); 1715 if (rassoc == 0) no_space(); 1716 1717 ritem[0] = -1; 1718 ritem[1] = goal->index; 1719 ritem[2] = 0; 1720 ritem[3] = -2; 1721 rlhs[0] = 0; 1722 rlhs[1] = 0; 1723 rlhs[2] = start_symbol; 1724 rrhs[0] = 0; 1725 rrhs[1] = 0; 1726 rrhs[2] = 1; 1727 1728 j = 4; 1729 for (i = 3; i < nrules; ++i) 1730 { 1731 rlhs[i] = plhs[i]->index; 1732 rrhs[i] = j; 1733 assoc = TOKEN; 1734 prec = 0; 1735 while (pitem[j]) 1736 { 1737 ritem[j] = pitem[j]->index; 1738 if (pitem[j]->class == TERM) 1739 { 1740 prec = pitem[j]->prec; 1741 assoc = pitem[j]->assoc; 1742 } 1743 ++j; 1744 } 1745 ritem[j] = -i; 1746 ++j; 1747 if (rprec[i] == UNDEFINED) 1748 { 1749 rprec[i] = prec; 1750 rassoc[i] = assoc; 1751 } 1752 } 1753 rrhs[i] = j; 1754 1755 FREE(plhs); 1756 FREE(pitem); 1757} 1758 1759 1760print_grammar() 1761{ 1762 register int i, j, k; 1763 int spacing; 1764 register FILE *f = verbose_file; 1765 1766 if (!vflag) return; 1767 1768 k = 1; 1769 for (i = 2; i < nrules; ++i) 1770 { 1771 if (rlhs[i] != rlhs[i-1]) 1772 { 1773 if (i != 2) fprintf(f, "\n"); 1774 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]); 1775 spacing = strlen(symbol_name[rlhs[i]]) + 1; 1776 } 1777 else 1778 { 1779 fprintf(f, "%4d ", i - 2); 1780 j = spacing; 1781 while (--j >= 0) putc(' ', f); 1782 putc('|', f); 1783 } 1784 1785 while (ritem[k] >= 0) 1786 { 1787 fprintf(f, " %s", symbol_name[ritem[k]]); 1788 ++k; 1789 } 1790 ++k; 1791 putc('\n', f); 1792 } 1793} 1794 1795 1796reader() 1797{ 1798 write_section(banner); 1799 create_symbol_table(); 1800 read_declarations(); 1801 read_grammar(); 1802 free_symbol_table(); 1803 free_tags(); 1804 pack_names(); 1805 check_symbols(); 1806 pack_symbols(); 1807 pack_grammar(); 1808 free_symbols(); 1809 print_grammar(); 1810} 1811