1/*
2  G W A R T  --  GNU version of Wart
3
4  A small subset of "lex" sufficient for converting the Kermit
5  protocol state table from lex notation to C.
6
7  Authors:
8    Jeff Damens, Frank da Cruz
9    The Kermit Project, Columbia University
10    http://www.columbia.edu/kermit/
11    kermit@columbia.edu
12
13  Copyright (C) 1984, 1999,
14  The Trustees of Columbia University in the City of New York.
15
16  This program is free software; you can redistribute it and/or modify
17  it under the terms of the GNU General Public License as published by
18  the Free Software Foundation; either version 2 of the License, or
19  (at your option) any later version.
20
21  This program is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  GNU General Public License for more details.
25
26  You should have received a copy of the GNU General Public License
27  along with this program; if not, write to the Free Software
28  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29*/
30
31/*
32 * input format is:
33 *  lines to be copied | %state <state names...>
34 *  %%
35 * <state> | <state,state,...> CHAR  { actions }
36 * ...
37 *  %%
38 *  more lines to be copied
39 */
40
41#include <stdio.h>
42#include <ctype.h>
43#include "gkermit.h"
44
45#define TBL_TYPE "short"		/* C data type of state table */
46
47#define C_L 014				/* Formfeed */
48
49#define SEP 1				/* Token types */
50#define LBRACK 2
51#define RBRACK 3
52#define WORD 4
53#define COMMA 5
54
55/* Storage sizes */
56
57#define MAXSTATES 50			/* max number of states */
58#define MAXWORD 50			/* max # of chars/word */
59#define SBYTES ((MAXSTATES+6)/8)	/* # of bytes for state bitmask */
60
61/* Name of gwart function in generated program */
62
63#ifndef FNAME
64#define FNAME "gwart"
65#endif /* FNAME */
66
67/* Structure for state information */
68
69struct transx {
70    CHAR states[SBYTES];		/* included states */
71    int anyst;				/* true if this good from any state */
72    CHAR inchr;				/* input character */
73    int actno;				/* associated action */
74    struct transx *nxt;
75};					/* next transition */
76typedef struct transx *trans;
77
78/* Function prototypes */
79
80_MYPROTOTYPE( VOID fatal, (char *) );
81_MYPROTOTYPE( VOID setwstate, (int, trans) );
82_MYPROTOTYPE( int teststate, (int, trans) );
83_MYPROTOTYPE( trans rdinput, (FILE *, FILE *) );
84_MYPROTOTYPE( VOID initial, (FILE *, FILE *) );
85_MYPROTOTYPE( int isin, (char *, int) );
86_MYPROTOTYPE( int isword, (int) );
87_MYPROTOTYPE( VOID rdword, (FILE *, char *) );
88_MYPROTOTYPE( VOID rdstates, (FILE *, FILE *) );
89_MYPROTOTYPE( trans newtrans, (void) );
90_MYPROTOTYPE( trans rdrules, (FILE *, FILE *) );
91_MYPROTOTYPE( VOID statelist, (FILE *, trans) );
92_MYPROTOTYPE( VOID copyact, (FILE *, FILE *, int) );
93_MYPROTOTYPE( int faction, (trans, int, int) );
94_MYPROTOTYPE( VOID emptytbl, (void) );
95_MYPROTOTYPE( VOID addaction, (int, int, int) );
96_MYPROTOTYPE( VOID writetbl, (FILE *) );
97_MYPROTOTYPE( VOID warray, (FILE *, char *, int [], int, char *) );
98_MYPROTOTYPE( VOID prolog, (FILE *) );
99_MYPROTOTYPE( VOID epilogue, (FILE *) );
100_MYPROTOTYPE( VOID copyrest, (FILE *, FILE *) );
101_MYPROTOTYPE( int gettoken, (FILE *) );
102_MYPROTOTYPE( VOID rdcmnt, (FILE *) );
103_MYPROTOTYPE( VOID clrhash, (void) );
104_MYPROTOTYPE( int hash, (char *) );
105_MYPROTOTYPE( VOID enter, (char *, int) );
106_MYPROTOTYPE( int lkup, (char *) );
107_MYPROTOTYPE( static char* copy, (char *s) );
108
109/* Variables and tables */
110
111int lines, nstates, nacts;
112int tbl[MAXSTATES*96];
113char tokval[MAXWORD];
114char *tbl_type = TBL_TYPE;
115
116char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n";
117
118char *fname = FNAME;			/* Generated function name goes here */
119
120/* Rest of program... */
121
122char *txt2 = "()\n\
123{\n\
124    int c,actno;\n\
125    extern ";
126
127/* Data type of state table is inserted here (short or int) */
128
129char *txt2a =
130" tbl[];\n\
131    while (1) {\n\
132	c = input() - 32;\n\
133	if (c < 0 || c > 95) c = 0;\n";
134
135char *txt2b = "	if ((actno = tbl[c + state*96]) != -1)\n\
136	    switch(actno) {\n";
137
138/* this program's output goes here, followed by final text... */
139
140char *txt3 = "\n	    }\n    }\n}\n\n";
141
142
143/*
144 * turn on the bit associated with the given state
145 *
146 */
147VOID
148setwstate(state,t) int state; trans t; {
149    int idx,msk;
150    idx = state/8;			/* byte associated with state */
151    msk = 0x80 >> (state % 8);		/* bit mask for state */
152    t->states[idx] |= msk;
153}
154
155/*
156 * see if the state is involved in the transition
157 *
158 */
159int
160teststate(state,t) int state; trans t; {
161    int idx,msk;
162    idx = state/8;
163    msk = 0x80 >> (state % 8);
164    return(t->states[idx] & msk);
165}
166
167
168/*
169 * read input from here...
170 *
171 */
172
173trans
174rdinput(infp,outfp) FILE *infp, *outfp; {
175    trans x;
176    lines = 1;				/* line counter */
177    nstates = 0;			/* no states */
178    nacts = 0;				/* no actions yet */
179    fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
180    fprintf(outfp,"gwart preprocessor. */\n");
181    fprintf(outfp,"%c* Do not edit this file; edit the gwart-format ",'/');
182    fprintf(outfp,"source file instead, */\n");
183    fprintf(outfp,"%c* and then run it through gwart to produce a new ",'/');
184    fprintf(outfp,"C source file.     */\n\n");
185
186    initial(infp,outfp);		/* read state names, initial defs */
187    prolog(outfp);			/* write out our initial code */
188    x = rdrules(infp,outfp);		/* read rules */
189    epilogue(outfp);			/* write out epilogue code */
190    return(x);
191}
192
193
194/*
195 * initial - read initial definitions and state names.  Returns
196 * on EOF or %%.
197 *
198 */
199VOID
200initial(infp,outfp) FILE *infp, *outfp; {
201    int c;
202    char wordbuf[MAXWORD];
203    while ((c = getc(infp)) != EOF) {
204	if (c == '%') {
205	    rdword(infp,wordbuf);
206	    if (strcmp(wordbuf,"states") == 0)
207	      rdstates(infp,outfp);
208	    else if (strcmp(wordbuf,"%") == 0) return;
209	    else fprintf(outfp,"%%%s",wordbuf);
210	}
211	else putc(c,outfp);
212	if (c == '\n') lines++;
213    }
214}
215
216/*
217 * boolean function to tell if the given character can be part of
218 * a word.
219 *
220 */
221int
222isin(s,c) char *s; int c; {
223    for (; *s != '\0'; s++)
224      if (*s == (char) c) return(1);
225    return(0);
226}
227int
228isword(c) int c; {
229    static char special[] = ".%_-$@";	/* these are allowable */
230    return(isalnum(c) || isin(special,c));
231}
232
233/*
234 * read the next word into the given buffer.
235 *
236 */
237VOID
238rdword(fp,buf) FILE *fp; char *buf; {
239    int len = 0,c;
240    while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c;
241    *buf++ = '\0';			/* tie off word */
242    ungetc(c,fp);			/* put break char back */
243}
244
245/*
246 * read state names, up to a newline.
247 *
248 */
249VOID
250rdstates(fp,ofp) FILE *fp,*ofp; {
251    int c;
252    char wordbuf[MAXWORD];
253    while ((c = getc(fp)) != EOF && c != '\n') {
254	if (isspace(c) || c == C_L) continue;	/* skip whitespace */
255	ungetc(c,fp);			/* put char back */
256	rdword(fp,wordbuf);		/* read the whole word */
257	enter(wordbuf,++nstates);	/* put into symbol tbl */
258	fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
259    }
260    lines++;
261}
262
263/*
264 * allocate a new, empty transition node
265 *
266 */
267trans
268newtrans() {
269    trans new;
270    int i;
271    new = (trans) malloc(sizeof (struct transx));
272    for (i=0; i<SBYTES; i++) new->states[i] = 0;
273    new->anyst = 0;
274    new->nxt = NULL;
275    return(new);
276}
277
278
279/*
280 * read all the rules.
281 *
282 */
283
284trans
285rdrules(fp,out) FILE *fp,*out; {
286    trans head,cur,prev;
287    int curtok;
288    head = cur = prev = NULL;
289    while ((curtok = gettoken(fp)) != SEP)
290
291      switch(curtok) {
292	case LBRACK:
293	  if (cur == NULL)
294	    cur = newtrans();
295	  else
296	    fatal("duplicate state list");
297	  statelist(fp,cur);		/* set states */
298	  continue;			/* prepare to read char */
299
300	case WORD:
301	  if ((int)strlen(tokval) != 1)
302	    fatal("multiple chars in state");
303	  if (cur == NULL) {
304	      cur = newtrans();
305	      cur->anyst = 1;
306	  }
307	  cur->actno = ++nacts;
308	  cur->inchr = (char) (tokval[0] - 32);
309	  if (head == NULL)
310	    head = cur;
311	  else
312	    prev->nxt = cur;
313	  prev = cur;
314	  cur = NULL;
315	  copyact(fp,out,nacts);
316	  break;
317	default: fatal("bad input format");
318      }
319    return(head);
320}
321
322/*
323 * read a list of (comma-separated) states, set them in the
324 * given transition.
325 *
326 */
327VOID
328statelist(fp,t) FILE *fp; trans t; {
329    int curtok,sval;
330    curtok = COMMA;
331    while (curtok != RBRACK) {
332	if (curtok != COMMA) fatal("missing comma");
333	if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
334	if ((sval = lkup(tokval)) == -1) {
335	    fprintf(stderr,"state %s undefined\n",tokval);
336	    fatal("undefined state");
337	}
338	setwstate(sval,t);
339	curtok = gettoken(fp);
340    }
341}
342
343/*
344 * copy an action from the input to the output file
345 *
346 */
347VOID
348copyact(inp,outp,actno) FILE *inp,*outp; int actno; {
349    int c,bcnt;
350    fprintf(outp,"case %d:\n",actno);
351    while (c = getc(inp), (isspace(c) || c == C_L))
352      if (c == '\n') lines++;
353    if (c == '{') {
354	bcnt = 1;
355	fputs("    {",outp);
356	while (bcnt > 0 && (c = getc(inp)) != EOF) {
357	    if (c == '{') bcnt++;
358	    else if (c == '}') bcnt--;
359	    else if (c == '\n') lines++;
360	    putc(c,outp);
361	}
362	if (bcnt > 0) fatal("action doesn't end");
363    } else {
364	while (c != '\n' && c != EOF) {
365	    putc(c,outp);
366	    c = getc(inp);
367	}
368	lines++;
369    }
370    fprintf(outp,"\n    break;\n");
371}
372
373/*
374 * find the action associated with a given character and state.
375 * returns -1 if one can't be found.
376 *
377 */
378int
379faction(hd,state,chr) trans hd; int state,chr; {
380    while (hd != NULL) {
381	if (hd->anyst || teststate(state,hd))
382	  if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr)
383	    return(hd->actno);
384	hd = hd->nxt;
385    }
386    return(-1);
387}
388
389/*
390 * empty the table...
391 *
392 */
393VOID
394emptytbl() {
395    int i;
396    for (i=0; i<nstates*96; i++) tbl[i] = -1;
397}
398
399/*
400 * add the specified action to the output for the given state and chr.
401 *
402 */
403VOID
404addaction(act,state,chr) int act,state,chr; {
405    tbl[state*96 + chr] = act;
406}
407
408VOID
409writetbl(fp) FILE *fp; {
410    warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE);
411}
412
413
414/*
415 * write an array to the output file, given its name and size.
416 *
417 */
418VOID
419warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; {
420    int i;
421    fprintf(fp,"%s %s[] = {\n",typ,nam);
422    for (i = 0; i < siz - 1; ) {
423	fprintf(fp," %2d,",cont[i]);
424	if ((++i % 16) == 0) putc('\n',fp);
425    }
426    fprintf(fp,"%2d\n};\n",cont[siz-1]);
427}
428
429int
430main(argc,argv) int argc; char **argv; {
431    trans head;
432    int state,c;
433    FILE *infile,*outfile;
434
435    if (argc > 1) {
436	if ((infile = fopen(argv[1],"r")) == NULL) {
437	    fprintf(stderr,"Can't open %s\n",argv[1]);
438	    fatal("unreadable input file");
439	}
440    } else infile = stdin;
441
442    if (argc > 2) {
443	if ((outfile = fopen(argv[2],"w")) == NULL) {
444	    fprintf(stderr,"Can't write to %s\n",argv[2]);
445	    fatal("bad output file");
446	}
447    } else outfile = stdout;
448
449    clrhash();				/* empty hash table */
450    head = rdinput(infile,outfile);	/* read input file */
451    emptytbl();				/* empty our tables */
452    for (state = 0; state <= nstates; state++)
453      for (c = 1; c < 96; c++)		/* find actions, */
454	addaction(faction(head,state,c),state,c); /* add to tbl */
455    writetbl(outfile);
456    copyrest(infile,outfile);
457    printf("%d states, %d actions\n",nstates,nacts);
458    exit(0);
459}
460
461
462/*
463 * fatal error handler
464 *
465 */
466
467VOID
468fatal(msg) char *msg; {
469    fprintf(stderr,"error in line %d: %s\n",lines,msg);
470    exit(1);
471}
472
473VOID
474prolog(outfp) FILE *outfp; {
475    int c;
476    while ((c = *txt1++)     != '\0') putc(c,outfp);
477    while ((c = *fname++)    != '\0') putc(c,outfp);
478    while ((c = *txt2++)     != '\0') putc(c,outfp);
479    while ((c = *tbl_type++) != '\0') putc(c,outfp);
480    while ((c = *txt2a++)    != '\0') putc(c,outfp);
481    while ((c = *txt2b++)    != '\0') putc(c,outfp);
482}
483
484VOID
485epilogue(outfp) FILE *outfp; {
486    int c;
487    while ((c = *txt3++) != '\0') putc(c,outfp);
488}
489
490VOID
491copyrest(in,out) FILE *in,*out; {
492    int c;
493    while ((c = getc(in)) != EOF) putc(c,out);
494}
495
496/*
497 * gettoken - returns token type of next token, sets tokval
498 * to the string value of the token if appropriate.
499 *
500 */
501
502int
503gettoken(fp) FILE *fp; {
504    int c;
505    while (1) {				/* loop if reading comments... */
506	do {
507	    c = getc(fp);
508	    if (c == '\n') lines++;
509	} while ((isspace(c) || c == C_L)); /* skip whitespace */
510	switch(c) {
511	  case EOF:
512	    return(SEP);
513	  case '%':
514	    if ((c = getc(fp)) == '%') return(SEP);
515	    tokval[0] = '%';
516	    tokval[1] = (char) c;
517	    rdword(fp,tokval+2);
518	    return(WORD);
519	  case '<':
520	    return(LBRACK);
521	  case '>':
522	    return(RBRACK);
523	  case ',':
524	    return(COMMA);
525	  case '/':
526	    if ((c = getc(fp)) == '*') {
527		rdcmnt(fp);		/* skip over the comment */
528		continue;
529	    } else {			/* and keep looping */
530		ungetc(c,fp);		/* put this back into input */
531		c = '/';		/* put character back, fall thru */
532	    }
533
534	  default:
535	    if (isword(c)) {
536		ungetc(c,fp);
537		rdword(fp,tokval);
538		return(WORD);
539	    } else fatal("Invalid character in input");
540	}
541    }
542}
543
544/*
545 * skip over a comment
546 *
547 */
548
549VOID
550rdcmnt(fp) FILE *fp; {
551    int c,star,prcnt;
552    prcnt = star = 0;			/* no star seen yet */
553    while (!((c = getc(fp)) == '/' && star)) {
554	if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
555	prcnt = (c == '%');
556	star = (c == '*');
557	if (c == '\n') lines++;
558    }
559}
560
561/*
562 * symbol table management for gwart
563 *
564 * entry points:
565 *   clrhash - empty hash table.
566 *   enter - enter a name into the symbol table
567 *   lkup - find a name's value in the symbol table.
568 */
569#define HASHSIZE 101			/* # of entries in hash table */
570
571struct sym {
572    char *name;				/* symbol name */
573    int val;				/* value */
574    struct sym *hnxt;			/* next on collision chain */
575} *htab[HASHSIZE];			/* the hash table */
576
577/*
578 * empty the hash table before using it...
579 *
580 */
581VOID
582clrhash() {
583    int i;
584    for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
585}
586
587/*
588 * compute the value of the hash for a symbol
589 *
590 */
591int
592hash(name) char *name; {
593    int sum;
594    for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
595    sum %= HASHSIZE;			/* take sum mod hashsize */
596    if (sum < 0) sum += HASHSIZE;	/* disallow negative hash value */
597    return(sum);
598}
599
600/*
601 * make a private copy of a string...
602 *
603 */
604static char*
605copy(s) char *s; {
606    char *new;
607    new = (char *) malloc((int)strlen(s) + 1);
608    strcpy(new,s);
609    return(new);
610}
611
612/*
613 * enter state name into the hash table
614 *
615 */
616VOID
617enter(name,svalue) char *name; int svalue; {
618    int h;
619    struct sym *cur;
620    if (lkup(name) != -1) {
621	fprintf(stderr,"state \"%s\" appears twice...\n", name);
622	exit(1);
623    }
624    h = hash(name);
625    cur = (struct sym *)malloc(sizeof (struct sym));
626    cur->name = copy(name);
627    cur->val = svalue;
628    cur->hnxt = htab[h];
629    htab[h] = cur;
630}
631
632/*
633 * find name in the symbol table, return its value.  Returns -1
634 * if not found.
635 *
636 */
637int
638lkup(name) char *name; {
639    struct sym *cur;
640    for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
641      if (strcmp(cur->name,name) == 0) return(cur->val);
642    return(-1);
643}
644