1/* 2 G W A R T -- GNU version of Wart 3 4 A small subset of "lex" sufficient for converting the Kermit 5 protocol state table from lex notation to C. 6 7 Authors: 8 Jeff Damens, Frank da Cruz 9 The Kermit Project, Columbia University 10 http://www.columbia.edu/kermit/ 11 kermit@columbia.edu 12 13 Copyright (C) 1984, 1999, 14 The Trustees of Columbia University in the City of New York. 15 16 This program is free software; you can redistribute it and/or modify 17 it under the terms of the GNU General Public License as published by 18 the Free Software Foundation; either version 2 of the License, or 19 (at your option) any later version. 20 21 This program is distributed in the hope that it will be useful, 22 but WITHOUT ANY WARRANTY; without even the implied warranty of 23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 GNU General Public License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with this program; if not, write to the Free Software 28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29*/ 30 31/* 32 * input format is: 33 * lines to be copied | %state <state names...> 34 * %% 35 * <state> | <state,state,...> CHAR { actions } 36 * ... 37 * %% 38 * more lines to be copied 39 */ 40 41#include <stdio.h> 42#include <ctype.h> 43#include "gkermit.h" 44 45#define TBL_TYPE "short" /* C data type of state table */ 46 47#define C_L 014 /* Formfeed */ 48 49#define SEP 1 /* Token types */ 50#define LBRACK 2 51#define RBRACK 3 52#define WORD 4 53#define COMMA 5 54 55/* Storage sizes */ 56 57#define MAXSTATES 50 /* max number of states */ 58#define MAXWORD 50 /* max # of chars/word */ 59#define SBYTES ((MAXSTATES+6)/8) /* # of bytes for state bitmask */ 60 61/* Name of gwart function in generated program */ 62 63#ifndef FNAME 64#define FNAME "gwart" 65#endif /* FNAME */ 66 67/* Structure for state information */ 68 69struct transx { 70 CHAR states[SBYTES]; /* included states */ 71 int anyst; /* true if this good from any state */ 72 CHAR inchr; /* input character */ 73 int actno; /* associated action */ 74 struct transx *nxt; 75}; /* next transition */ 76typedef struct transx *trans; 77 78/* Function prototypes */ 79 80_MYPROTOTYPE( VOID fatal, (char *) ); 81_MYPROTOTYPE( VOID setwstate, (int, trans) ); 82_MYPROTOTYPE( int teststate, (int, trans) ); 83_MYPROTOTYPE( trans rdinput, (FILE *, FILE *) ); 84_MYPROTOTYPE( VOID initial, (FILE *, FILE *) ); 85_MYPROTOTYPE( int isin, (char *, int) ); 86_MYPROTOTYPE( int isword, (int) ); 87_MYPROTOTYPE( VOID rdword, (FILE *, char *) ); 88_MYPROTOTYPE( VOID rdstates, (FILE *, FILE *) ); 89_MYPROTOTYPE( trans newtrans, (void) ); 90_MYPROTOTYPE( trans rdrules, (FILE *, FILE *) ); 91_MYPROTOTYPE( VOID statelist, (FILE *, trans) ); 92_MYPROTOTYPE( VOID copyact, (FILE *, FILE *, int) ); 93_MYPROTOTYPE( int faction, (trans, int, int) ); 94_MYPROTOTYPE( VOID emptytbl, (void) ); 95_MYPROTOTYPE( VOID addaction, (int, int, int) ); 96_MYPROTOTYPE( VOID writetbl, (FILE *) ); 97_MYPROTOTYPE( VOID warray, (FILE *, char *, int [], int, char *) ); 98_MYPROTOTYPE( VOID prolog, (FILE *) ); 99_MYPROTOTYPE( VOID epilogue, (FILE *) ); 100_MYPROTOTYPE( VOID copyrest, (FILE *, FILE *) ); 101_MYPROTOTYPE( int gettoken, (FILE *) ); 102_MYPROTOTYPE( VOID rdcmnt, (FILE *) ); 103_MYPROTOTYPE( VOID clrhash, (void) ); 104_MYPROTOTYPE( int hash, (char *) ); 105_MYPROTOTYPE( VOID enter, (char *, int) ); 106_MYPROTOTYPE( int lkup, (char *) ); 107_MYPROTOTYPE( static char* copy, (char *s) ); 108 109/* Variables and tables */ 110 111int lines, nstates, nacts; 112int tbl[MAXSTATES*96]; 113char tokval[MAXWORD]; 114char *tbl_type = TBL_TYPE; 115 116char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n"; 117 118char *fname = FNAME; /* Generated function name goes here */ 119 120/* Rest of program... */ 121 122char *txt2 = "()\n\ 123{\n\ 124 int c,actno;\n\ 125 extern "; 126 127/* Data type of state table is inserted here (short or int) */ 128 129char *txt2a = 130" tbl[];\n\ 131 while (1) {\n\ 132 c = input() - 32;\n\ 133 if (c < 0 || c > 95) c = 0;\n"; 134 135char *txt2b = " if ((actno = tbl[c + state*96]) != -1)\n\ 136 switch(actno) {\n"; 137 138/* this program's output goes here, followed by final text... */ 139 140char *txt3 = "\n }\n }\n}\n\n"; 141 142 143/* 144 * turn on the bit associated with the given state 145 * 146 */ 147VOID 148setwstate(state,t) int state; trans t; { 149 int idx,msk; 150 idx = state/8; /* byte associated with state */ 151 msk = 0x80 >> (state % 8); /* bit mask for state */ 152 t->states[idx] |= msk; 153} 154 155/* 156 * see if the state is involved in the transition 157 * 158 */ 159int 160teststate(state,t) int state; trans t; { 161 int idx,msk; 162 idx = state/8; 163 msk = 0x80 >> (state % 8); 164 return(t->states[idx] & msk); 165} 166 167 168/* 169 * read input from here... 170 * 171 */ 172 173trans 174rdinput(infp,outfp) FILE *infp, *outfp; { 175 trans x; 176 lines = 1; /* line counter */ 177 nstates = 0; /* no states */ 178 nacts = 0; /* no actions yet */ 179 fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/'); 180 fprintf(outfp,"gwart preprocessor. */\n"); 181 fprintf(outfp,"%c* Do not edit this file; edit the gwart-format ",'/'); 182 fprintf(outfp,"source file instead, */\n"); 183 fprintf(outfp,"%c* and then run it through gwart to produce a new ",'/'); 184 fprintf(outfp,"C source file. */\n\n"); 185 186 initial(infp,outfp); /* read state names, initial defs */ 187 prolog(outfp); /* write out our initial code */ 188 x = rdrules(infp,outfp); /* read rules */ 189 epilogue(outfp); /* write out epilogue code */ 190 return(x); 191} 192 193 194/* 195 * initial - read initial definitions and state names. Returns 196 * on EOF or %%. 197 * 198 */ 199VOID 200initial(infp,outfp) FILE *infp, *outfp; { 201 int c; 202 char wordbuf[MAXWORD]; 203 while ((c = getc(infp)) != EOF) { 204 if (c == '%') { 205 rdword(infp,wordbuf); 206 if (strcmp(wordbuf,"states") == 0) 207 rdstates(infp,outfp); 208 else if (strcmp(wordbuf,"%") == 0) return; 209 else fprintf(outfp,"%%%s",wordbuf); 210 } 211 else putc(c,outfp); 212 if (c == '\n') lines++; 213 } 214} 215 216/* 217 * boolean function to tell if the given character can be part of 218 * a word. 219 * 220 */ 221int 222isin(s,c) char *s; int c; { 223 for (; *s != '\0'; s++) 224 if (*s == (char) c) return(1); 225 return(0); 226} 227int 228isword(c) int c; { 229 static char special[] = ".%_-$@"; /* these are allowable */ 230 return(isalnum(c) || isin(special,c)); 231} 232 233/* 234 * read the next word into the given buffer. 235 * 236 */ 237VOID 238rdword(fp,buf) FILE *fp; char *buf; { 239 int len = 0,c; 240 while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c; 241 *buf++ = '\0'; /* tie off word */ 242 ungetc(c,fp); /* put break char back */ 243} 244 245/* 246 * read state names, up to a newline. 247 * 248 */ 249VOID 250rdstates(fp,ofp) FILE *fp,*ofp; { 251 int c; 252 char wordbuf[MAXWORD]; 253 while ((c = getc(fp)) != EOF && c != '\n') { 254 if (isspace(c) || c == C_L) continue; /* skip whitespace */ 255 ungetc(c,fp); /* put char back */ 256 rdword(fp,wordbuf); /* read the whole word */ 257 enter(wordbuf,++nstates); /* put into symbol tbl */ 258 fprintf(ofp,"#define %s %d\n",wordbuf,nstates); 259 } 260 lines++; 261} 262 263/* 264 * allocate a new, empty transition node 265 * 266 */ 267trans 268newtrans() { 269 trans new; 270 int i; 271 new = (trans) malloc(sizeof (struct transx)); 272 for (i=0; i<SBYTES; i++) new->states[i] = 0; 273 new->anyst = 0; 274 new->nxt = NULL; 275 return(new); 276} 277 278 279/* 280 * read all the rules. 281 * 282 */ 283 284trans 285rdrules(fp,out) FILE *fp,*out; { 286 trans head,cur,prev; 287 int curtok; 288 head = cur = prev = NULL; 289 while ((curtok = gettoken(fp)) != SEP) 290 291 switch(curtok) { 292 case LBRACK: 293 if (cur == NULL) 294 cur = newtrans(); 295 else 296 fatal("duplicate state list"); 297 statelist(fp,cur); /* set states */ 298 continue; /* prepare to read char */ 299 300 case WORD: 301 if ((int)strlen(tokval) != 1) 302 fatal("multiple chars in state"); 303 if (cur == NULL) { 304 cur = newtrans(); 305 cur->anyst = 1; 306 } 307 cur->actno = ++nacts; 308 cur->inchr = (char) (tokval[0] - 32); 309 if (head == NULL) 310 head = cur; 311 else 312 prev->nxt = cur; 313 prev = cur; 314 cur = NULL; 315 copyact(fp,out,nacts); 316 break; 317 default: fatal("bad input format"); 318 } 319 return(head); 320} 321 322/* 323 * read a list of (comma-separated) states, set them in the 324 * given transition. 325 * 326 */ 327VOID 328statelist(fp,t) FILE *fp; trans t; { 329 int curtok,sval; 330 curtok = COMMA; 331 while (curtok != RBRACK) { 332 if (curtok != COMMA) fatal("missing comma"); 333 if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); 334 if ((sval = lkup(tokval)) == -1) { 335 fprintf(stderr,"state %s undefined\n",tokval); 336 fatal("undefined state"); 337 } 338 setwstate(sval,t); 339 curtok = gettoken(fp); 340 } 341} 342 343/* 344 * copy an action from the input to the output file 345 * 346 */ 347VOID 348copyact(inp,outp,actno) FILE *inp,*outp; int actno; { 349 int c,bcnt; 350 fprintf(outp,"case %d:\n",actno); 351 while (c = getc(inp), (isspace(c) || c == C_L)) 352 if (c == '\n') lines++; 353 if (c == '{') { 354 bcnt = 1; 355 fputs(" {",outp); 356 while (bcnt > 0 && (c = getc(inp)) != EOF) { 357 if (c == '{') bcnt++; 358 else if (c == '}') bcnt--; 359 else if (c == '\n') lines++; 360 putc(c,outp); 361 } 362 if (bcnt > 0) fatal("action doesn't end"); 363 } else { 364 while (c != '\n' && c != EOF) { 365 putc(c,outp); 366 c = getc(inp); 367 } 368 lines++; 369 } 370 fprintf(outp,"\n break;\n"); 371} 372 373/* 374 * find the action associated with a given character and state. 375 * returns -1 if one can't be found. 376 * 377 */ 378int 379faction(hd,state,chr) trans hd; int state,chr; { 380 while (hd != NULL) { 381 if (hd->anyst || teststate(state,hd)) 382 if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr) 383 return(hd->actno); 384 hd = hd->nxt; 385 } 386 return(-1); 387} 388 389/* 390 * empty the table... 391 * 392 */ 393VOID 394emptytbl() { 395 int i; 396 for (i=0; i<nstates*96; i++) tbl[i] = -1; 397} 398 399/* 400 * add the specified action to the output for the given state and chr. 401 * 402 */ 403VOID 404addaction(act,state,chr) int act,state,chr; { 405 tbl[state*96 + chr] = act; 406} 407 408VOID 409writetbl(fp) FILE *fp; { 410 warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE); 411} 412 413 414/* 415 * write an array to the output file, given its name and size. 416 * 417 */ 418VOID 419warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; { 420 int i; 421 fprintf(fp,"%s %s[] = {\n",typ,nam); 422 for (i = 0; i < siz - 1; ) { 423 fprintf(fp," %2d,",cont[i]); 424 if ((++i % 16) == 0) putc('\n',fp); 425 } 426 fprintf(fp,"%2d\n};\n",cont[siz-1]); 427} 428 429int 430main(argc,argv) int argc; char **argv; { 431 trans head; 432 int state,c; 433 FILE *infile,*outfile; 434 435 if (argc > 1) { 436 if ((infile = fopen(argv[1],"r")) == NULL) { 437 fprintf(stderr,"Can't open %s\n",argv[1]); 438 fatal("unreadable input file"); 439 } 440 } else infile = stdin; 441 442 if (argc > 2) { 443 if ((outfile = fopen(argv[2],"w")) == NULL) { 444 fprintf(stderr,"Can't write to %s\n",argv[2]); 445 fatal("bad output file"); 446 } 447 } else outfile = stdout; 448 449 clrhash(); /* empty hash table */ 450 head = rdinput(infile,outfile); /* read input file */ 451 emptytbl(); /* empty our tables */ 452 for (state = 0; state <= nstates; state++) 453 for (c = 1; c < 96; c++) /* find actions, */ 454 addaction(faction(head,state,c),state,c); /* add to tbl */ 455 writetbl(outfile); 456 copyrest(infile,outfile); 457 printf("%d states, %d actions\n",nstates,nacts); 458 exit(0); 459} 460 461 462/* 463 * fatal error handler 464 * 465 */ 466 467VOID 468fatal(msg) char *msg; { 469 fprintf(stderr,"error in line %d: %s\n",lines,msg); 470 exit(1); 471} 472 473VOID 474prolog(outfp) FILE *outfp; { 475 int c; 476 while ((c = *txt1++) != '\0') putc(c,outfp); 477 while ((c = *fname++) != '\0') putc(c,outfp); 478 while ((c = *txt2++) != '\0') putc(c,outfp); 479 while ((c = *tbl_type++) != '\0') putc(c,outfp); 480 while ((c = *txt2a++) != '\0') putc(c,outfp); 481 while ((c = *txt2b++) != '\0') putc(c,outfp); 482} 483 484VOID 485epilogue(outfp) FILE *outfp; { 486 int c; 487 while ((c = *txt3++) != '\0') putc(c,outfp); 488} 489 490VOID 491copyrest(in,out) FILE *in,*out; { 492 int c; 493 while ((c = getc(in)) != EOF) putc(c,out); 494} 495 496/* 497 * gettoken - returns token type of next token, sets tokval 498 * to the string value of the token if appropriate. 499 * 500 */ 501 502int 503gettoken(fp) FILE *fp; { 504 int c; 505 while (1) { /* loop if reading comments... */ 506 do { 507 c = getc(fp); 508 if (c == '\n') lines++; 509 } while ((isspace(c) || c == C_L)); /* skip whitespace */ 510 switch(c) { 511 case EOF: 512 return(SEP); 513 case '%': 514 if ((c = getc(fp)) == '%') return(SEP); 515 tokval[0] = '%'; 516 tokval[1] = (char) c; 517 rdword(fp,tokval+2); 518 return(WORD); 519 case '<': 520 return(LBRACK); 521 case '>': 522 return(RBRACK); 523 case ',': 524 return(COMMA); 525 case '/': 526 if ((c = getc(fp)) == '*') { 527 rdcmnt(fp); /* skip over the comment */ 528 continue; 529 } else { /* and keep looping */ 530 ungetc(c,fp); /* put this back into input */ 531 c = '/'; /* put character back, fall thru */ 532 } 533 534 default: 535 if (isword(c)) { 536 ungetc(c,fp); 537 rdword(fp,tokval); 538 return(WORD); 539 } else fatal("Invalid character in input"); 540 } 541 } 542} 543 544/* 545 * skip over a comment 546 * 547 */ 548 549VOID 550rdcmnt(fp) FILE *fp; { 551 int c,star,prcnt; 552 prcnt = star = 0; /* no star seen yet */ 553 while (!((c = getc(fp)) == '/' && star)) { 554 if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); 555 prcnt = (c == '%'); 556 star = (c == '*'); 557 if (c == '\n') lines++; 558 } 559} 560 561/* 562 * symbol table management for gwart 563 * 564 * entry points: 565 * clrhash - empty hash table. 566 * enter - enter a name into the symbol table 567 * lkup - find a name's value in the symbol table. 568 */ 569#define HASHSIZE 101 /* # of entries in hash table */ 570 571struct sym { 572 char *name; /* symbol name */ 573 int val; /* value */ 574 struct sym *hnxt; /* next on collision chain */ 575} *htab[HASHSIZE]; /* the hash table */ 576 577/* 578 * empty the hash table before using it... 579 * 580 */ 581VOID 582clrhash() { 583 int i; 584 for (i=0; i<HASHSIZE; i++) htab[i] = NULL; 585} 586 587/* 588 * compute the value of the hash for a symbol 589 * 590 */ 591int 592hash(name) char *name; { 593 int sum; 594 for (sum = 0; *name != '\0'; name++) sum += (sum + *name); 595 sum %= HASHSIZE; /* take sum mod hashsize */ 596 if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */ 597 return(sum); 598} 599 600/* 601 * make a private copy of a string... 602 * 603 */ 604static char* 605copy(s) char *s; { 606 char *new; 607 new = (char *) malloc((int)strlen(s) + 1); 608 strcpy(new,s); 609 return(new); 610} 611 612/* 613 * enter state name into the hash table 614 * 615 */ 616VOID 617enter(name,svalue) char *name; int svalue; { 618 int h; 619 struct sym *cur; 620 if (lkup(name) != -1) { 621 fprintf(stderr,"state \"%s\" appears twice...\n", name); 622 exit(1); 623 } 624 h = hash(name); 625 cur = (struct sym *)malloc(sizeof (struct sym)); 626 cur->name = copy(name); 627 cur->val = svalue; 628 cur->hnxt = htab[h]; 629 htab[h] = cur; 630} 631 632/* 633 * find name in the symbol table, return its value. Returns -1 634 * if not found. 635 * 636 */ 637int 638lkup(name) char *name; { 639 struct sym *cur; 640 for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) 641 if (strcmp(cur->name,name) == 0) return(cur->val); 642 return(-1); 643} 644