reader.c revision 1591
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Robert Paul Corbett.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static char sccsid[] = "@(#)reader.c	5.7 (Berkeley) 1/20/91";
39#endif /* not lint */
40
41#include "defs.h"
42
43/*  The line size must be a positive integer.  One hundred was chosen	*/
44/*  because few lines in Yacc input grammars exceed 100 characters.	*/
45/*  Note that if a line exceeds LINESIZE characters, the line buffer	*/
46/*  will be expanded to accomodate it.					*/
47
48#define LINESIZE 100
49
50char *cache;
51int cinc, cache_size;
52
53int ntags, tagmax;
54char **tag_table;
55
56char saw_eof, unionized;
57char *cptr, *line;
58int linesize;
59
60bucket *goal;
61int prec;
62int gensym;
63char last_was_action;
64
65int maxitems;
66bucket **pitem;
67
68int maxrules;
69bucket **plhs;
70
71int name_pool_size;
72char *name_pool;
73
74char line_format[] = "#line %d \"%s\"\n";
75
76
77cachec(c)
78int c;
79{
80    assert(cinc >= 0);
81    if (cinc >= cache_size)
82    {
83	cache_size += 256;
84	cache = REALLOC(cache, cache_size);
85	if (cache == 0) no_space();
86    }
87    cache[cinc] = c;
88    ++cinc;
89}
90
91
92get_line()
93{
94    register FILE *f = input_file;
95    register int c;
96    register int i;
97
98    if (saw_eof || (c = getc(f)) == EOF)
99    {
100	if (line) { FREE(line); line = 0; }
101	cptr = 0;
102	saw_eof = 1;
103	return;
104    }
105
106    if (line == 0 || linesize != (LINESIZE + 1))
107    {
108	if (line) FREE(line);
109	linesize = LINESIZE + 1;
110	line = MALLOC(linesize);
111	if (line == 0) no_space();
112    }
113
114    i = 0;
115    ++lineno;
116    for (;;)
117    {
118	line[i]  =  c;
119	if (c == '\n') { cptr = line; return; }
120	if (++i >= linesize)
121	{
122	    linesize += LINESIZE;
123	    line = REALLOC(line, linesize);
124	    if (line ==  0) no_space();
125	}
126	c = getc(f);
127	if (c ==  EOF)
128	{
129	    line[i] = '\n';
130	    saw_eof = 1;
131	    cptr = line;
132	    return;
133	}
134    }
135}
136
137
138char *
139dup_line()
140{
141    register char *p, *s, *t;
142
143    if (line == 0) return (0);
144    s = line;
145    while (*s != '\n') ++s;
146    p = MALLOC(s - line + 1);
147    if (p == 0) no_space();
148
149    s = line;
150    t = p;
151    while ((*t++ = *s++) != '\n') continue;
152    return (p);
153}
154
155
156skip_comment()
157{
158    register char *s;
159
160    int st_lineno = lineno;
161    char *st_line = dup_line();
162    char *st_cptr = st_line + (cptr - line);
163
164    s = cptr + 2;
165    for (;;)
166    {
167	if (*s == '*' && s[1] == '/')
168	{
169	    cptr = s + 2;
170	    FREE(st_line);
171	    return;
172	}
173	if (*s == '\n')
174	{
175	    get_line();
176	    if (line == 0)
177		unterminated_comment(st_lineno, st_line, st_cptr);
178	    s = cptr;
179	}
180	else
181	    ++s;
182    }
183}
184
185
186int
187nextc()
188{
189    register char *s;
190
191    if (line == 0)
192    {
193	get_line();
194	if (line == 0)
195	    return (EOF);
196    }
197
198    s = cptr;
199    for (;;)
200    {
201	switch (*s)
202	{
203	case '\n':
204	    get_line();
205	    if (line == 0) return (EOF);
206	    s = cptr;
207	    break;
208
209	case ' ':
210	case '\t':
211	case '\f':
212	case '\r':
213	case '\v':
214	case ',':
215	case ';':
216	    ++s;
217	    break;
218
219	case '\\':
220	    cptr = s;
221	    return ('%');
222
223	case '/':
224	    if (s[1] == '*')
225	    {
226		cptr = s;
227		skip_comment();
228		s = cptr;
229		break;
230	    }
231	    else if (s[1] == '/')
232	    {
233		get_line();
234		if (line == 0) return (EOF);
235		s = cptr;
236		break;
237	    }
238	    /* fall through */
239
240	default:
241	    cptr = s;
242	    return (*s);
243	}
244    }
245}
246
247
248int
249keyword()
250{
251    register int c;
252    char *t_cptr = cptr;
253
254    c = *++cptr;
255    if (isalpha(c))
256    {
257	cinc = 0;
258	for (;;)
259	{
260	    if (isalpha(c))
261	    {
262		if (isupper(c)) c = tolower(c);
263		cachec(c);
264	    }
265	    else if (isdigit(c) || c == '_' || c == '.' || c == '$')
266		cachec(c);
267	    else
268		break;
269	    c = *++cptr;
270	}
271	cachec(NUL);
272
273	if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
274	    return (TOKEN);
275	if (strcmp(cache, "type") == 0)
276	    return (TYPE);
277	if (strcmp(cache, "left") == 0)
278	    return (LEFT);
279	if (strcmp(cache, "right") == 0)
280	    return (RIGHT);
281	if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
282	    return (NONASSOC);
283	if (strcmp(cache, "start") == 0)
284	    return (START);
285	if (strcmp(cache, "union") == 0)
286	    return (UNION);
287	if (strcmp(cache, "ident") == 0)
288	    return (IDENT);
289    }
290    else
291    {
292	++cptr;
293	if (c == '{')
294	    return (TEXT);
295	if (c == '%' || c == '\\')
296	    return (MARK);
297	if (c == '<')
298	    return (LEFT);
299	if (c == '>')
300	    return (RIGHT);
301	if (c == '0')
302	    return (TOKEN);
303	if (c == '2')
304	    return (NONASSOC);
305    }
306    syntax_error(lineno, line, t_cptr);
307    /*NOTREACHED*/
308}
309
310
311copy_ident()
312{
313    register int c;
314    register FILE *f = output_file;
315
316    c = nextc();
317    if (c == EOF) unexpected_EOF();
318    if (c != '"') syntax_error(lineno, line, cptr);
319    ++outline;
320    fprintf(f, "#ident \"");
321    for (;;)
322    {
323	c = *++cptr;
324	if (c == '\n')
325	{
326	    fprintf(f, "\"\n");
327	    return;
328	}
329	putc(c, f);
330	if (c == '"')
331	{
332	    putc('\n', f);
333	    ++cptr;
334	    return;
335	}
336    }
337}
338
339
340copy_text()
341{
342    register int c;
343    int quote;
344    register FILE *f = text_file;
345    int need_newline = 0;
346    int t_lineno = lineno;
347    char *t_line = dup_line();
348    char *t_cptr = t_line + (cptr - line - 2);
349
350    if (*cptr == '\n')
351    {
352	get_line();
353	if (line == 0)
354	    unterminated_text(t_lineno, t_line, t_cptr);
355    }
356    if (!lflag) fprintf(f, line_format, lineno, input_file_name);
357
358loop:
359    c = *cptr++;
360    switch (c)
361    {
362    case '\n':
363    next_line:
364	putc('\n', f);
365	need_newline = 0;
366	get_line();
367	if (line) goto loop;
368	unterminated_text(t_lineno, t_line, t_cptr);
369
370    case '\'':
371    case '"':
372	{
373	    int s_lineno = lineno;
374	    char *s_line = dup_line();
375	    char *s_cptr = s_line + (cptr - line - 1);
376
377	    quote = c;
378	    putc(c, f);
379	    for (;;)
380	    {
381		c = *cptr++;
382		putc(c, f);
383		if (c == quote)
384		{
385		    need_newline = 1;
386		    FREE(s_line);
387		    goto loop;
388		}
389		if (c == '\n')
390		    unterminated_string(s_lineno, s_line, s_cptr);
391		if (c == '\\')
392		{
393		    c = *cptr++;
394		    putc(c, f);
395		    if (c == '\n')
396		    {
397			get_line();
398			if (line == 0)
399			    unterminated_string(s_lineno, s_line, s_cptr);
400		    }
401		}
402	    }
403	}
404
405    case '/':
406	putc(c, f);
407	need_newline = 1;
408	c = *cptr;
409	if (c == '/')
410	{
411	    putc('*', f);
412	    while ((c = *++cptr) != '\n')
413	    {
414		if (c == '*' && cptr[1] == '/')
415		    fprintf(f, "* ");
416		else
417		    putc(c, f);
418	    }
419	    fprintf(f, "*/");
420	    goto next_line;
421	}
422	if (c == '*')
423	{
424	    int c_lineno = lineno;
425	    char *c_line = dup_line();
426	    char *c_cptr = c_line + (cptr - line - 1);
427
428	    putc('*', f);
429	    ++cptr;
430	    for (;;)
431	    {
432		c = *cptr++;
433		putc(c, f);
434		if (c == '*' && *cptr == '/')
435		{
436		    putc('/', f);
437		    ++cptr;
438		    FREE(c_line);
439		    goto loop;
440		}
441		if (c == '\n')
442		{
443		    get_line();
444		    if (line == 0)
445			unterminated_comment(c_lineno, c_line, c_cptr);
446		}
447	    }
448	}
449	need_newline = 1;
450	goto loop;
451
452    case '%':
453    case '\\':
454	if (*cptr == '}')
455	{
456	    if (need_newline) putc('\n', f);
457	    ++cptr;
458	    FREE(t_line);
459	    return;
460	}
461	/* fall through */
462
463    default:
464	putc(c, f);
465	need_newline = 1;
466	goto loop;
467    }
468}
469
470
471copy_union()
472{
473    register int c;
474    int quote;
475    int depth;
476    int u_lineno = lineno;
477    char *u_line = dup_line();
478    char *u_cptr = u_line + (cptr - line - 6);
479
480    if (unionized) over_unionized(cptr - 6);
481    unionized = 1;
482
483    if (!lflag)
484	fprintf(text_file, line_format, lineno, input_file_name);
485
486    fprintf(text_file, "typedef union");
487    if (dflag) fprintf(union_file, "typedef union");
488
489    depth = 0;
490loop:
491    c = *cptr++;
492    putc(c, text_file);
493    if (dflag) putc(c, union_file);
494    switch (c)
495    {
496    case '\n':
497    next_line:
498	get_line();
499	if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
500	goto loop;
501
502    case '{':
503	++depth;
504	goto loop;
505
506    case '}':
507	if (--depth == 0)
508	{
509	    fprintf(text_file, " YYSTYPE;\n");
510	    FREE(u_line);
511	    return;
512	}
513	goto loop;
514
515    case '\'':
516    case '"':
517	{
518	    int s_lineno = lineno;
519	    char *s_line = dup_line();
520	    char *s_cptr = s_line + (cptr - line - 1);
521
522	    quote = c;
523	    for (;;)
524	    {
525		c = *cptr++;
526		putc(c, text_file);
527		if (dflag) putc(c, union_file);
528		if (c == quote)
529		{
530		    FREE(s_line);
531		    goto loop;
532		}
533		if (c == '\n')
534		    unterminated_string(s_lineno, s_line, s_cptr);
535		if (c == '\\')
536		{
537		    c = *cptr++;
538		    putc(c, text_file);
539		    if (dflag) putc(c, union_file);
540		    if (c == '\n')
541		    {
542			get_line();
543			if (line == 0)
544			    unterminated_string(s_lineno, s_line, s_cptr);
545		    }
546		}
547	    }
548	}
549
550    case '/':
551	c = *cptr;
552	if (c == '/')
553	{
554	    putc('*', text_file);
555	    if (dflag) putc('*', union_file);
556	    while ((c = *++cptr) != '\n')
557	    {
558		if (c == '*' && cptr[1] == '/')
559		{
560		    fprintf(text_file, "* ");
561		    if (dflag) fprintf(union_file, "* ");
562		}
563		else
564		{
565		    putc(c, text_file);
566		    if (dflag) putc(c, union_file);
567		}
568	    }
569	    fprintf(text_file, "*/\n");
570	    if (dflag) fprintf(union_file, "*/\n");
571	    goto next_line;
572	}
573	if (c == '*')
574	{
575	    int c_lineno = lineno;
576	    char *c_line = dup_line();
577	    char *c_cptr = c_line + (cptr - line - 1);
578
579	    putc('*', text_file);
580	    if (dflag) putc('*', union_file);
581	    ++cptr;
582	    for (;;)
583	    {
584		c = *cptr++;
585		putc(c, text_file);
586		if (dflag) putc(c, union_file);
587		if (c == '*' && *cptr == '/')
588		{
589		    putc('/', text_file);
590		    if (dflag) putc('/', union_file);
591		    ++cptr;
592		    FREE(c_line);
593		    goto loop;
594		}
595		if (c == '\n')
596		{
597		    get_line();
598		    if (line == 0)
599			unterminated_comment(c_lineno, c_line, c_cptr);
600		}
601	    }
602	}
603	goto loop;
604
605    default:
606	goto loop;
607    }
608}
609
610
611int
612hexval(c)
613int c;
614{
615    if (c >= '0' && c <= '9')
616	return (c - '0');
617    if (c >= 'A' && c <= 'F')
618	return (c - 'A' + 10);
619    if (c >= 'a' && c <= 'f')
620	return (c - 'a' + 10);
621    return (-1);
622}
623
624
625bucket *
626get_literal()
627{
628    register int c, quote;
629    register int i;
630    register int n;
631    register char *s;
632    register bucket *bp;
633    int s_lineno = lineno;
634    char *s_line = dup_line();
635    char *s_cptr = s_line + (cptr - line);
636
637    quote = *cptr++;
638    cinc = 0;
639    for (;;)
640    {
641	c = *cptr++;
642	if (c == quote) break;
643	if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
644	if (c == '\\')
645	{
646	    char *c_cptr = cptr - 1;
647
648	    c = *cptr++;
649	    switch (c)
650	    {
651	    case '\n':
652		get_line();
653		if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
654		continue;
655
656	    case '0': case '1': case '2': case '3':
657	    case '4': case '5': case '6': case '7':
658		n = c - '0';
659		c = *cptr;
660		if (IS_OCTAL(c))
661		{
662		    n = (n << 3) + (c - '0');
663		    c = *++cptr;
664		    if (IS_OCTAL(c))
665		    {
666			n = (n << 3) + (c - '0');
667			++cptr;
668		    }
669		}
670		if (n > MAXCHAR) illegal_character(c_cptr);
671		c = n;
672	    	break;
673
674	    case 'x':
675		c = *cptr++;
676		n = hexval(c);
677		if (n < 0 || n >= 16)
678		    illegal_character(c_cptr);
679		for (;;)
680		{
681		    c = *cptr;
682		    i = hexval(c);
683		    if (i < 0 || i >= 16) break;
684		    ++cptr;
685		    n = (n << 4) + i;
686		    if (n > MAXCHAR) illegal_character(c_cptr);
687		}
688		c = n;
689		break;
690
691	    case 'a': c = 7; break;
692	    case 'b': c = '\b'; break;
693	    case 'f': c = '\f'; break;
694	    case 'n': c = '\n'; break;
695	    case 'r': c = '\r'; break;
696	    case 't': c = '\t'; break;
697	    case 'v': c = '\v'; break;
698	    }
699	}
700	cachec(c);
701    }
702    FREE(s_line);
703
704    n = cinc;
705    s = MALLOC(n);
706    if (s == 0) no_space();
707
708    for (i = 0; i < n; ++i)
709	s[i] = cache[i];
710
711    cinc = 0;
712    if (n == 1)
713	cachec('\'');
714    else
715	cachec('"');
716
717    for (i = 0; i < n; ++i)
718    {
719	c = ((unsigned char *)s)[i];
720	if (c == '\\' || c == cache[0])
721	{
722	    cachec('\\');
723	    cachec(c);
724	}
725	else if (isprint(c))
726	    cachec(c);
727	else
728	{
729	    cachec('\\');
730	    switch (c)
731	    {
732	    case 7: cachec('a'); break;
733	    case '\b': cachec('b'); break;
734	    case '\f': cachec('f'); break;
735	    case '\n': cachec('n'); break;
736	    case '\r': cachec('r'); break;
737	    case '\t': cachec('t'); break;
738	    case '\v': cachec('v'); break;
739	    default:
740		cachec(((c >> 6) & 7) + '0');
741		cachec(((c >> 3) & 7) + '0');
742		cachec((c & 7) + '0');
743		break;
744	    }
745	}
746    }
747
748    if (n == 1)
749	cachec('\'');
750    else
751	cachec('"');
752
753    cachec(NUL);
754    bp = lookup(cache);
755    bp->class = TERM;
756    if (n == 1 && bp->value == UNDEFINED)
757	bp->value = *(unsigned char *)s;
758    FREE(s);
759
760    return (bp);
761}
762
763
764int
765is_reserved(name)
766char *name;
767{
768    char *s;
769
770    if (strcmp(name, ".") == 0 ||
771	    strcmp(name, "$accept") == 0 ||
772	    strcmp(name, "$end") == 0)
773	return (1);
774
775    if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
776    {
777	s = name + 3;
778	while (isdigit(*s)) ++s;
779	if (*s == NUL) return (1);
780    }
781
782    return (0);
783}
784
785
786bucket *
787get_name()
788{
789    register int c;
790
791    cinc = 0;
792    for (c = *cptr; IS_IDENT(c); c = *++cptr)
793	cachec(c);
794    cachec(NUL);
795
796    if (is_reserved(cache)) used_reserved(cache);
797
798    return (lookup(cache));
799}
800
801
802int
803get_number()
804{
805    register int c;
806    register int n;
807
808    n = 0;
809    for (c = *cptr; isdigit(c); c = *++cptr)
810	n = 10*n + (c - '0');
811
812    return (n);
813}
814
815
816char *
817get_tag()
818{
819    register int c;
820    register int i;
821    register char *s;
822    int t_lineno = lineno;
823    char *t_line = dup_line();
824    char *t_cptr = t_line + (cptr - line);
825
826    ++cptr;
827    c = nextc();
828    if (c == EOF) unexpected_EOF();
829    if (!isalpha(c) && c != '_' && c != '$')
830	illegal_tag(t_lineno, t_line, t_cptr);
831
832    cinc = 0;
833    do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
834    cachec(NUL);
835
836    c = nextc();
837    if (c == EOF) unexpected_EOF();
838    if (c != '>')
839	illegal_tag(t_lineno, t_line, t_cptr);
840    ++cptr;
841
842    for (i = 0; i < ntags; ++i)
843    {
844	if (strcmp(cache, tag_table[i]) == 0)
845	    return (tag_table[i]);
846    }
847
848    if (ntags >= tagmax)
849    {
850	tagmax += 16;
851	tag_table = (char **)
852			(tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
853				   : MALLOC(tagmax*sizeof(char *)));
854	if (tag_table == 0) no_space();
855    }
856
857    s = MALLOC(cinc);
858    if  (s == 0) no_space();
859    strcpy(s, cache);
860    tag_table[ntags] = s;
861    ++ntags;
862    FREE(t_line);
863    return (s);
864}
865
866
867declare_tokens(assoc)
868int assoc;
869{
870    register int c;
871    register bucket *bp;
872    int value;
873    char *tag = 0;
874
875    if (assoc != TOKEN) ++prec;
876
877    c = nextc();
878    if (c == EOF) unexpected_EOF();
879    if (c == '<')
880    {
881	tag = get_tag();
882	c = nextc();
883	if (c == EOF) unexpected_EOF();
884    }
885
886    for (;;)
887    {
888	if (isalpha(c) || c == '_' || c == '.' || c == '$')
889	    bp = get_name();
890	else if (c == '\'' || c == '"')
891	    bp = get_literal();
892	else
893	    return;
894
895	if (bp == goal) tokenized_start(bp->name);
896	bp->class = TERM;
897
898	if (tag)
899	{
900	    if (bp->tag && tag != bp->tag)
901		retyped_warning(bp->name);
902	    bp->tag = tag;
903	}
904
905	if (assoc != TOKEN)
906	{
907	    if (bp->prec && prec != bp->prec)
908		reprec_warning(bp->name);
909	    bp->assoc = assoc;
910	    bp->prec = prec;
911	}
912
913	c = nextc();
914	if (c == EOF) unexpected_EOF();
915	value = UNDEFINED;
916	if (isdigit(c))
917	{
918	    value = get_number();
919	    if (bp->value != UNDEFINED && value != bp->value)
920		revalued_warning(bp->name);
921	    bp->value = value;
922	    c = nextc();
923	    if (c == EOF) unexpected_EOF();
924	}
925    }
926}
927
928
929declare_types()
930{
931    register int c;
932    register bucket *bp;
933    char *tag;
934
935    c = nextc();
936    if (c == EOF) unexpected_EOF();
937    if (c != '<') syntax_error(lineno, line, cptr);
938    tag = get_tag();
939
940    for (;;)
941    {
942	c = nextc();
943	if (isalpha(c) || c == '_' || c == '.' || c == '$')
944	    bp = get_name();
945	else if (c == '\'' || c == '"')
946	    bp = get_literal();
947	else
948	    return;
949
950	if (bp->tag && tag != bp->tag)
951	    retyped_warning(bp->name);
952	bp->tag = tag;
953    }
954}
955
956
957declare_start()
958{
959    register int c;
960    register bucket *bp;
961
962    c = nextc();
963    if (c == EOF) unexpected_EOF();
964    if (!isalpha(c) && c != '_' && c != '.' && c != '$')
965	syntax_error(lineno, line, cptr);
966    bp = get_name();
967    if (bp->class == TERM)
968	terminal_start(bp->name);
969    if (goal && goal != bp)
970	restarted_warning();
971    goal = bp;
972}
973
974
975read_declarations()
976{
977    register int c, k;
978
979    cache_size = 256;
980    cache = MALLOC(cache_size);
981    if (cache == 0) no_space();
982
983    for (;;)
984    {
985	c = nextc();
986	if (c == EOF) unexpected_EOF();
987	if (c != '%') syntax_error(lineno, line, cptr);
988	switch (k = keyword())
989	{
990	case MARK:
991	    return;
992
993	case IDENT:
994	    copy_ident();
995	    break;
996
997	case TEXT:
998	    copy_text();
999	    break;
1000
1001	case UNION:
1002	    copy_union();
1003	    break;
1004
1005	case TOKEN:
1006	case LEFT:
1007	case RIGHT:
1008	case NONASSOC:
1009	    declare_tokens(k);
1010	    break;
1011
1012	case TYPE:
1013	    declare_types();
1014	    break;
1015
1016	case START:
1017	    declare_start();
1018	    break;
1019	}
1020    }
1021}
1022
1023
1024initialize_grammar()
1025{
1026    nitems = 4;
1027    maxitems = 300;
1028    pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1029    if (pitem == 0) no_space();
1030    pitem[0] = 0;
1031    pitem[1] = 0;
1032    pitem[2] = 0;
1033    pitem[3] = 0;
1034
1035    nrules = 3;
1036    maxrules = 100;
1037    plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1038    if (plhs == 0) no_space();
1039    plhs[0] = 0;
1040    plhs[1] = 0;
1041    plhs[2] = 0;
1042    rprec = (short *) MALLOC(maxrules*sizeof(short));
1043    if (rprec == 0) no_space();
1044    rprec[0] = 0;
1045    rprec[1] = 0;
1046    rprec[2] = 0;
1047    rassoc = (char *) MALLOC(maxrules*sizeof(char));
1048    if (rassoc == 0) no_space();
1049    rassoc[0] = TOKEN;
1050    rassoc[1] = TOKEN;
1051    rassoc[2] = TOKEN;
1052}
1053
1054
1055expand_items()
1056{
1057    maxitems += 300;
1058    pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1059    if (pitem == 0) no_space();
1060}
1061
1062
1063expand_rules()
1064{
1065    maxrules += 100;
1066    plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1067    if (plhs == 0) no_space();
1068    rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1069    if (rprec == 0) no_space();
1070    rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1071    if (rassoc == 0) no_space();
1072}
1073
1074
1075advance_to_start()
1076{
1077    register int c;
1078    register bucket *bp;
1079    char *s_cptr;
1080    int s_lineno;
1081
1082    for (;;)
1083    {
1084	c = nextc();
1085	if (c != '%') break;
1086	s_cptr = cptr;
1087	switch (keyword())
1088	{
1089	case MARK:
1090	    no_grammar();
1091
1092	case TEXT:
1093	    copy_text();
1094	    break;
1095
1096	case START:
1097	    declare_start();
1098	    break;
1099
1100	default:
1101	    syntax_error(lineno, line, s_cptr);
1102	}
1103    }
1104
1105    c = nextc();
1106    if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1107	syntax_error(lineno, line, cptr);
1108    bp = get_name();
1109    if (goal == 0)
1110    {
1111	if (bp->class == TERM)
1112	    terminal_start(bp->name);
1113	goal = bp;
1114    }
1115
1116    s_lineno = lineno;
1117    c = nextc();
1118    if (c == EOF) unexpected_EOF();
1119    if (c != ':') syntax_error(lineno, line, cptr);
1120    start_rule(bp, s_lineno);
1121    ++cptr;
1122}
1123
1124
1125start_rule(bp, s_lineno)
1126register bucket *bp;
1127int s_lineno;
1128{
1129    if (bp->class == TERM)
1130	terminal_lhs(s_lineno);
1131    bp->class = NONTERM;
1132    if (nrules >= maxrules)
1133	expand_rules();
1134    plhs[nrules] = bp;
1135    rprec[nrules] = UNDEFINED;
1136    rassoc[nrules] = TOKEN;
1137}
1138
1139
1140end_rule()
1141{
1142    register int i;
1143
1144    if (!last_was_action && plhs[nrules]->tag)
1145    {
1146	for (i = nitems - 1; pitem[i]; --i) continue;
1147	if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1148	    default_action_warning();
1149    }
1150
1151    last_was_action = 0;
1152    if (nitems >= maxitems) expand_items();
1153    pitem[nitems] = 0;
1154    ++nitems;
1155    ++nrules;
1156}
1157
1158
1159insert_empty_rule()
1160{
1161    register bucket *bp, **bpp;
1162
1163    assert(cache);
1164    sprintf(cache, "$$%d", ++gensym);
1165    bp = make_bucket(cache);
1166    last_symbol->next = bp;
1167    last_symbol = bp;
1168    bp->tag = plhs[nrules]->tag;
1169    bp->class = NONTERM;
1170
1171    if ((nitems += 2) > maxitems)
1172	expand_items();
1173    bpp = pitem + nitems - 1;
1174    *bpp-- = bp;
1175    while (bpp[0] = bpp[-1]) --bpp;
1176
1177    if (++nrules >= maxrules)
1178	expand_rules();
1179    plhs[nrules] = plhs[nrules-1];
1180    plhs[nrules-1] = bp;
1181    rprec[nrules] = rprec[nrules-1];
1182    rprec[nrules-1] = 0;
1183    rassoc[nrules] = rassoc[nrules-1];
1184    rassoc[nrules-1] = TOKEN;
1185}
1186
1187
1188add_symbol()
1189{
1190    register int c;
1191    register bucket *bp;
1192    int s_lineno = lineno;
1193
1194    c = *cptr;
1195    if (c == '\'' || c == '"')
1196	bp = get_literal();
1197    else
1198	bp = get_name();
1199
1200    c = nextc();
1201    if (c == ':')
1202    {
1203	end_rule();
1204	start_rule(bp, s_lineno);
1205	++cptr;
1206	return;
1207    }
1208
1209    if (last_was_action)
1210	insert_empty_rule();
1211    last_was_action = 0;
1212
1213    if (++nitems > maxitems)
1214	expand_items();
1215    pitem[nitems-1] = bp;
1216}
1217
1218
1219copy_action()
1220{
1221    register int c;
1222    register int i, n;
1223    int depth;
1224    int quote;
1225    char *tag;
1226    register FILE *f = action_file;
1227    int a_lineno = lineno;
1228    char *a_line = dup_line();
1229    char *a_cptr = a_line + (cptr - line);
1230
1231    if (last_was_action)
1232	insert_empty_rule();
1233    last_was_action = 1;
1234
1235    fprintf(f, "case %d:\n", nrules - 2);
1236    if (!lflag)
1237	fprintf(f, line_format, lineno, input_file_name);
1238    if (*cptr == '=') ++cptr;
1239
1240    n = 0;
1241    for (i = nitems - 1; pitem[i]; --i) ++n;
1242
1243    depth = 0;
1244loop:
1245    c = *cptr;
1246    if (c == '$')
1247    {
1248	if (cptr[1] == '<')
1249	{
1250	    int d_lineno = lineno;
1251	    char *d_line = dup_line();
1252	    char *d_cptr = d_line + (cptr - line);
1253
1254	    ++cptr;
1255	    tag = get_tag();
1256	    c = *cptr;
1257	    if (c == '$')
1258	    {
1259		fprintf(f, "yyval.%s", tag);
1260		++cptr;
1261		FREE(d_line);
1262		goto loop;
1263	    }
1264	    else if (isdigit(c))
1265	    {
1266		i = get_number();
1267		if (i > n) dollar_warning(d_lineno, i);
1268		fprintf(f, "yyvsp[%d].%s", i - n, tag);
1269		FREE(d_line);
1270		goto loop;
1271	    }
1272	    else if (c == '-' && isdigit(cptr[1]))
1273	    {
1274		++cptr;
1275		i = -get_number() - n;
1276		fprintf(f, "yyvsp[%d].%s", i, tag);
1277		FREE(d_line);
1278		goto loop;
1279	    }
1280	    else
1281		dollar_error(d_lineno, d_line, d_cptr);
1282	}
1283	else if (cptr[1] == '$')
1284	{
1285	    if (ntags)
1286	    {
1287		tag = plhs[nrules]->tag;
1288		if (tag == 0) untyped_lhs();
1289		fprintf(f, "yyval.%s", tag);
1290	    }
1291	    else
1292		fprintf(f, "yyval");
1293	    cptr += 2;
1294	    goto loop;
1295	}
1296	else if (isdigit(cptr[1]))
1297	{
1298	    ++cptr;
1299	    i = get_number();
1300	    if (ntags)
1301	    {
1302		if (i <= 0 || i > n)
1303		    unknown_rhs(i);
1304		tag = pitem[nitems + i - n - 1]->tag;
1305		if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1306		fprintf(f, "yyvsp[%d].%s", i - n, tag);
1307	    }
1308	    else
1309	    {
1310		if (i > n)
1311		    dollar_warning(lineno, i);
1312		fprintf(f, "yyvsp[%d]", i - n);
1313	    }
1314	    goto loop;
1315	}
1316	else if (cptr[1] == '-')
1317	{
1318	    cptr += 2;
1319	    i = get_number();
1320	    if (ntags)
1321		unknown_rhs(-i);
1322	    fprintf(f, "yyvsp[%d]", -i - n);
1323	    goto loop;
1324	}
1325    }
1326    if (isalpha(c) || c == '_' || c == '$')
1327    {
1328	do
1329	{
1330	    putc(c, f);
1331	    c = *++cptr;
1332	} while (isalnum(c) || c == '_' || c == '$');
1333	goto loop;
1334    }
1335    putc(c, f);
1336    ++cptr;
1337    switch (c)
1338    {
1339    case '\n':
1340    next_line:
1341	get_line();
1342	if (line) goto loop;
1343	unterminated_action(a_lineno, a_line, a_cptr);
1344
1345    case ';':
1346	if (depth > 0) goto loop;
1347	fprintf(f, "\nbreak;\n");
1348	return;
1349
1350    case '{':
1351	++depth;
1352	goto loop;
1353
1354    case '}':
1355	if (--depth > 0) goto loop;
1356	fprintf(f, "\nbreak;\n");
1357	return;
1358
1359    case '\'':
1360    case '"':
1361	{
1362	    int s_lineno = lineno;
1363	    char *s_line = dup_line();
1364	    char *s_cptr = s_line + (cptr - line - 1);
1365
1366	    quote = c;
1367	    for (;;)
1368	    {
1369		c = *cptr++;
1370		putc(c, f);
1371		if (c == quote)
1372		{
1373		    FREE(s_line);
1374		    goto loop;
1375		}
1376		if (c == '\n')
1377		    unterminated_string(s_lineno, s_line, s_cptr);
1378		if (c == '\\')
1379		{
1380		    c = *cptr++;
1381		    putc(c, f);
1382		    if (c == '\n')
1383		    {
1384			get_line();
1385			if (line == 0)
1386			    unterminated_string(s_lineno, s_line, s_cptr);
1387		    }
1388		}
1389	    }
1390	}
1391
1392    case '/':
1393	c = *cptr;
1394	if (c == '/')
1395	{
1396	    putc('*', f);
1397	    while ((c = *++cptr) != '\n')
1398	    {
1399		if (c == '*' && cptr[1] == '/')
1400		    fprintf(f, "* ");
1401		else
1402		    putc(c, f);
1403	    }
1404	    fprintf(f, "*/\n");
1405	    goto next_line;
1406	}
1407	if (c == '*')
1408	{
1409	    int c_lineno = lineno;
1410	    char *c_line = dup_line();
1411	    char *c_cptr = c_line + (cptr - line - 1);
1412
1413	    putc('*', f);
1414	    ++cptr;
1415	    for (;;)
1416	    {
1417		c = *cptr++;
1418		putc(c, f);
1419		if (c == '*' && *cptr == '/')
1420		{
1421		    putc('/', f);
1422		    ++cptr;
1423		    FREE(c_line);
1424		    goto loop;
1425		}
1426		if (c == '\n')
1427		{
1428		    get_line();
1429		    if (line == 0)
1430			unterminated_comment(c_lineno, c_line, c_cptr);
1431		}
1432	    }
1433	}
1434	goto loop;
1435
1436    default:
1437	goto loop;
1438    }
1439}
1440
1441
1442int
1443mark_symbol()
1444{
1445    register int c;
1446    register bucket *bp;
1447
1448    c = cptr[1];
1449    if (c == '%' || c == '\\')
1450    {
1451	cptr += 2;
1452	return (1);
1453    }
1454
1455    if (c == '=')
1456	cptr += 2;
1457    else if ((c == 'p' || c == 'P') &&
1458	     ((c = cptr[2]) == 'r' || c == 'R') &&
1459	     ((c = cptr[3]) == 'e' || c == 'E') &&
1460	     ((c = cptr[4]) == 'c' || c == 'C') &&
1461	     ((c = cptr[5], !IS_IDENT(c))))
1462	cptr += 5;
1463    else
1464	syntax_error(lineno, line, cptr);
1465
1466    c = nextc();
1467    if (isalpha(c) || c == '_' || c == '.' || c == '$')
1468	bp = get_name();
1469    else if (c == '\'' || c == '"')
1470	bp = get_literal();
1471    else
1472    {
1473	syntax_error(lineno, line, cptr);
1474	/*NOTREACHED*/
1475    }
1476
1477    if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1478	prec_redeclared();
1479
1480    rprec[nrules] = bp->prec;
1481    rassoc[nrules] = bp->assoc;
1482    return (0);
1483}
1484
1485
1486read_grammar()
1487{
1488    register int c;
1489
1490    initialize_grammar();
1491    advance_to_start();
1492
1493    for (;;)
1494    {
1495	c = nextc();
1496	if (c == EOF) break;
1497	if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1498		c == '"')
1499	    add_symbol();
1500	else if (c == '{' || c == '=')
1501	    copy_action();
1502	else if (c == '|')
1503	{
1504	    end_rule();
1505	    start_rule(plhs[nrules-1], 0);
1506	    ++cptr;
1507	}
1508	else if (c == '%')
1509	{
1510	    if (mark_symbol()) break;
1511	}
1512	else
1513	    syntax_error(lineno, line, cptr);
1514    }
1515    end_rule();
1516}
1517
1518
1519free_tags()
1520{
1521    register int i;
1522
1523    if (tag_table == 0) return;
1524
1525    for (i = 0; i < ntags; ++i)
1526    {
1527	assert(tag_table[i]);
1528	FREE(tag_table[i]);
1529    }
1530    FREE(tag_table);
1531}
1532
1533
1534pack_names()
1535{
1536    register bucket *bp;
1537    register char *p, *s, *t;
1538
1539    name_pool_size = 13;  /* 13 == sizeof("$end") + sizeof("$accept") */
1540    for (bp = first_symbol; bp; bp = bp->next)
1541	name_pool_size += strlen(bp->name) + 1;
1542    name_pool = MALLOC(name_pool_size);
1543    if (name_pool == 0) no_space();
1544
1545    strcpy(name_pool, "$accept");
1546    strcpy(name_pool+8, "$end");
1547    t = name_pool + 13;
1548    for (bp = first_symbol; bp; bp = bp->next)
1549    {
1550	p = t;
1551	s = bp->name;
1552	while (*t++ = *s++) continue;
1553	FREE(bp->name);
1554	bp->name = p;
1555    }
1556}
1557
1558
1559check_symbols()
1560{
1561    register bucket *bp;
1562
1563    if (goal->class == UNKNOWN)
1564	undefined_goal(goal->name);
1565
1566    for (bp = first_symbol; bp; bp = bp->next)
1567    {
1568	if (bp->class == UNKNOWN)
1569	{
1570	    undefined_symbol_warning(bp->name);
1571	    bp->class = TERM;
1572	}
1573    }
1574}
1575
1576
1577pack_symbols()
1578{
1579    register bucket *bp;
1580    register bucket **v;
1581    register int i, j, k, n;
1582
1583    nsyms = 2;
1584    ntokens = 1;
1585    for (bp = first_symbol; bp; bp = bp->next)
1586    {
1587	++nsyms;
1588	if (bp->class == TERM) ++ntokens;
1589    }
1590    start_symbol = ntokens;
1591    nvars = nsyms - ntokens;
1592
1593    symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1594    if (symbol_name == 0) no_space();
1595    symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1596    if (symbol_value == 0) no_space();
1597    symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1598    if (symbol_prec == 0) no_space();
1599    symbol_assoc = MALLOC(nsyms);
1600    if (symbol_assoc == 0) no_space();
1601
1602    v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1603    if (v == 0) no_space();
1604
1605    v[0] = 0;
1606    v[start_symbol] = 0;
1607
1608    i = 1;
1609    j = start_symbol + 1;
1610    for (bp = first_symbol; bp; bp = bp->next)
1611    {
1612	if (bp->class == TERM)
1613	    v[i++] = bp;
1614	else
1615	    v[j++] = bp;
1616    }
1617    assert(i == ntokens && j == nsyms);
1618
1619    for (i = 1; i < ntokens; ++i)
1620	v[i]->index = i;
1621
1622    goal->index = start_symbol + 1;
1623    k = start_symbol + 2;
1624    while (++i < nsyms)
1625	if (v[i] != goal)
1626	{
1627	    v[i]->index = k;
1628	    ++k;
1629	}
1630
1631    goal->value = 0;
1632    k = 1;
1633    for (i = start_symbol + 1; i < nsyms; ++i)
1634    {
1635	if (v[i] != goal)
1636	{
1637	    v[i]->value = k;
1638	    ++k;
1639	}
1640    }
1641
1642    k = 0;
1643    for (i = 1; i < ntokens; ++i)
1644    {
1645	n = v[i]->value;
1646	if (n > 256)
1647	{
1648	    for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1649		symbol_value[j] = symbol_value[j-1];
1650	    symbol_value[j] = n;
1651	}
1652    }
1653
1654    if (v[1]->value == UNDEFINED)
1655	v[1]->value = 256;
1656
1657    j = 0;
1658    n = 257;
1659    for (i = 2; i < ntokens; ++i)
1660    {
1661	if (v[i]->value == UNDEFINED)
1662	{
1663	    while (j < k && n == symbol_value[j])
1664	    {
1665		while (++j < k && n == symbol_value[j]) continue;
1666		++n;
1667	    }
1668	    v[i]->value = n;
1669	    ++n;
1670	}
1671    }
1672
1673    symbol_name[0] = name_pool + 8;
1674    symbol_value[0] = 0;
1675    symbol_prec[0] = 0;
1676    symbol_assoc[0] = TOKEN;
1677    for (i = 1; i < ntokens; ++i)
1678    {
1679	symbol_name[i] = v[i]->name;
1680	symbol_value[i] = v[i]->value;
1681	symbol_prec[i] = v[i]->prec;
1682	symbol_assoc[i] = v[i]->assoc;
1683    }
1684    symbol_name[start_symbol] = name_pool;
1685    symbol_value[start_symbol] = -1;
1686    symbol_prec[start_symbol] = 0;
1687    symbol_assoc[start_symbol] = TOKEN;
1688    for (++i; i < nsyms; ++i)
1689    {
1690	k = v[i]->index;
1691	symbol_name[k] = v[i]->name;
1692	symbol_value[k] = v[i]->value;
1693	symbol_prec[k] = v[i]->prec;
1694	symbol_assoc[k] = v[i]->assoc;
1695    }
1696
1697    FREE(v);
1698}
1699
1700
1701pack_grammar()
1702{
1703    register int i, j;
1704    int assoc, prec;
1705
1706    ritem = (short *) MALLOC(nitems*sizeof(short));
1707    if (ritem == 0) no_space();
1708    rlhs = (short *) MALLOC(nrules*sizeof(short));
1709    if (rlhs == 0) no_space();
1710    rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1711    if (rrhs == 0) no_space();
1712    rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1713    if (rprec == 0) no_space();
1714    rassoc = REALLOC(rassoc, nrules);
1715    if (rassoc == 0) no_space();
1716
1717    ritem[0] = -1;
1718    ritem[1] = goal->index;
1719    ritem[2] = 0;
1720    ritem[3] = -2;
1721    rlhs[0] = 0;
1722    rlhs[1] = 0;
1723    rlhs[2] = start_symbol;
1724    rrhs[0] = 0;
1725    rrhs[1] = 0;
1726    rrhs[2] = 1;
1727
1728    j = 4;
1729    for (i = 3; i < nrules; ++i)
1730    {
1731	rlhs[i] = plhs[i]->index;
1732	rrhs[i] = j;
1733	assoc = TOKEN;
1734	prec = 0;
1735	while (pitem[j])
1736	{
1737	    ritem[j] = pitem[j]->index;
1738	    if (pitem[j]->class == TERM)
1739	    {
1740		prec = pitem[j]->prec;
1741		assoc = pitem[j]->assoc;
1742	    }
1743	    ++j;
1744	}
1745	ritem[j] = -i;
1746	++j;
1747	if (rprec[i] == UNDEFINED)
1748	{
1749	    rprec[i] = prec;
1750	    rassoc[i] = assoc;
1751	}
1752    }
1753    rrhs[i] = j;
1754
1755    FREE(plhs);
1756    FREE(pitem);
1757}
1758
1759
1760print_grammar()
1761{
1762    register int i, j, k;
1763    int spacing;
1764    register FILE *f = verbose_file;
1765
1766    if (!vflag) return;
1767
1768    k = 1;
1769    for (i = 2; i < nrules; ++i)
1770    {
1771	if (rlhs[i] != rlhs[i-1])
1772	{
1773	    if (i != 2) fprintf(f, "\n");
1774	    fprintf(f, "%4d  %s :", i - 2, symbol_name[rlhs[i]]);
1775	    spacing = strlen(symbol_name[rlhs[i]]) + 1;
1776	}
1777	else
1778	{
1779	    fprintf(f, "%4d  ", i - 2);
1780	    j = spacing;
1781	    while (--j >= 0) putc(' ', f);
1782	    putc('|', f);
1783	}
1784
1785	while (ritem[k] >= 0)
1786	{
1787	    fprintf(f, " %s", symbol_name[ritem[k]]);
1788	    ++k;
1789	}
1790	++k;
1791	putc('\n', f);
1792    }
1793}
1794
1795
1796reader()
1797{
1798    write_section(banner);
1799    create_symbol_table();
1800    read_declarations();
1801    read_grammar();
1802    free_symbol_table();
1803    free_tags();
1804    pack_names();
1805    check_symbols();
1806    pack_symbols();
1807    pack_grammar();
1808    free_symbols();
1809    print_grammar();
1810}
1811