1(* Modified by Michael.Norrish@nicta.com.au on 2005-04-13 so that it compiles
2   with both mlton and mosml. *)
3(* Modified by sweeks@acm.org on 2000-8-24.
4 * Ported to MLton.
5 *)
6(*  Lexical analyzer generator for Standard ML.
7        Version 1.7.0, June 1998
8
9Copyright (c) 1989-1992 by Andrew W. Appel,
10   David R. Tarditi, James S. Mattson
11
12This software comes with ABSOLUTELY NO WARRANTY.
13This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
14COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
15distributed with this software). You may copy and distribute this software;
16see the COPYRIGHT NOTICE for details and restrictions.
17
18    Changes:
19	07/25/89 (drt): added %header declaration, code to place
20		user declarations at same level as makeLexer, etc.
21		This is needed for the parser generator.
22	  /10/89 (appel): added %arg declaration (see lexgen.doc).
23	  /04/90 (drt): fixed following bug: couldn't use the lexer after an
24		error occurred -- NextTok and inquote weren't being reset
25	10/22/91 (drt): disabled use of lookahead
26	10/23/92 (drt): disabled use of $ operator (which involves lookahead),
27		added handlers for dictionary lookup routine
28	11/02/92 (drt): changed handler for exception Reject in generated lexer
29		to Internal.Reject
30        02/01/94 (appel): Moved the exception handler for Reject in such
31		a way as to allow tail-recursion (improves performance
32		wonderfully!).
33	02/01/94 (appel): Fixed a bug in parsing of state names.
34	05/19/94 (Mikael Pettersson, mpe@ida.liu.se):
35		Transition tables are usually represented as strings, but
36		when the range is too large, int vectors constructed by
37		code like "Vector.vector[1,2,3,...]" are used instead.
38		The problem with this isn't that the vector itself takes
39		a lot of space, but that the code generated by SML/NJ to
40		construct the intermediate list at run-time is *HUGE*. My
41		fix is to encode an int vector as a string literal (using
42		two bytes per int) and emit code to decode the string to
43		a vector at run-time. SML/NJ compiles string literals into
44		substrings in the code, so this uses much less space.
45	06/02/94 (jhr): Modified export-lex.sml to conform to new installation
46		scheme.  Also removed tab characters from string literals.
47	10/05/94 (jhr): Changed generator to produce code that uses the new
48		basis style strings and characters.
49	10/06/94 (jhr) Modified code to compile under new basis style strings
50		and characters.
51	02/08/95 (jhr) Modified to use new List module interface.
52	05/18/95 (jhr) changed Vector.vector to Vector.fromList
53*
54 * $Log$
55 * Revision 1.3  2005/07/21 07:01:27  michaeln
56 * Get mllex to cope with actions that include strings with unbalanced
57 * parentheses.  (Code taken from SML/NJ's mllex.)
58 *
59 * Revision 1.2  2005/04/14 05:42:08  michaeln
60 * Slight change to allow the product of mllex foo to be compiled by mosml
61 * without having to use the -toplevel option.  Also a "fix" for an off-by-one
62 * issue that I think is a bug.
63 *
64 * Revision 1.1  2005/04/13 05:31:30  michaeln
65 * A MoscowML compilable version of the "standard" mllex tool, as used by
66 * both SML/NJ and MLton.  The source code is also compilable by mlton,
67 * though this is cute more than useful as mlton comes with a version of mllex
68 * anyway.
69 *
70 * Revision 1.1.1.1  1998/04/08 18:40:10  george
71 * Version 110.5
72 *
73 * Revision 1.9  1998/01/06 19:23:53  appel
74 *   added %posarg feature to permit position-within-file to be passed
75 *   as a parameter to makeLexer
76 *
77# Revision 1.8  1998/01/06  19:01:48  appel
78#   repaired error messages like "cannot have both %structure and %header"
79#
80# Revision 1.7  1998/01/06  18:55:49  appel
81#   permit %% to be unescaped within regular expressions
82#
83# Revision 1.6  1998/01/06  18:46:13  appel
84#   removed undocumented feature that permitted extra %% at end of rules
85#
86# Revision 1.5  1998/01/06  18:29:23  appel
87#   put yylineno variable inside makeLexer function
88#
89# Revision 1.4  1998/01/06  18:19:59  appel
90#   check for newline inside quoted string
91#
92# Revision 1.3  1997/10/04  03:52:13  dbm
93#   Fix to remove output file if ml-lex fails.
94#
95# Revision 1.2  1997/05/06  01:12:38  george
96# *** empty log message ***
97#
98 * Revision 1.2  1996/02/26  15:02:27  george
99 *    print no longer overloaded.
100 *    use of makestring has been removed and replaced with Int.toString ..
101 *    use of IO replaced with TextIO
102 *
103 * Revision 1.1.1.1  1996/01/31  16:01:15  george
104 * Version 109
105 *
106 *)
107
108(* Subject: lookahead in sml-lex
109   Reply-to: david.tarditi@CS.CMU.EDU
110   Date: Mon, 21 Oct 91 14:13:26 -0400
111
112There is a serious bug in the implementation of lookahead,
113as done in sml-lex, and described in Aho, Sethi, and Ullman,
114p. 134 "Implementing the Lookahead Operator"
115
116We have disallowed the use of lookahead for now because
117of this bug.
118
119As a counter-example to the implementation described in
120ASU, consider the following specification with the
121input string "aba" (this example is taken from
122a comp.compilers message from Dec. 1989, I think):
123
124type lexresult=unit
125val linenum = ref 1
126fun error x = TextIO.output(TextIO.stdErr, x ^ "\n")
127val eof = fn () => ()
128%%
129%structure Lex
130%%
131(a|ab)/ba => (print yytext; print "\n"; ());
132
133The ASU proposal works as follows. Suppose that we are
134using NFA's to represent our regular expressions.  Then to
135build an NFA for e1 / e2, we build an NFA n1 for e1
136and an NFA n2 for e2, and add an epsilon transition
137from e1 to e2.
138
139When lexing, when we encounter the end state of e1e2,
140we take as the end of the string the position in
141the string that was the last occurrence of the state of
142the NFA having a transition on the epsilon introduced
143for /.
144
145Using the example we have above, we'll have an NFA
146with the following states:
147
148
149   1 -- a --> 2 -- b --> 3
150              |          |
151              | epsilon  | epsilon
152              |          |
153              |------------> 4 -- b --> 5 -- a --> 6
154
155On our example, we get the following list of transitions:
156
157a   :   2, 4      (make an epsilon transition from 2 to 4)
158ab  :   3, 4, 5   (make an epsilon transition from 3 to 4)
159aba :   6
160
161If we chose the last state in which we made an epsilon transition,
162we'll chose the transition from 3 to 4, and end up with "ab"
163as our token, when we should have "a" as our token.
164
165*)
166
167functor RedBlack(B : sig type key
168			 val > : key*key->bool
169		     end):
170	    sig type tree
171		type key
172		val empty : tree
173		val insert : key * tree -> tree
174		val lookup : key * tree -> key
175	 	exception notfound of key
176	    end =
177struct
178 open B
179 datatype color = RED | BLACK
180 datatype tree = empty | tree of key * color * tree * tree
181 exception notfound of key
182
183 fun insert (key,t) =
184  let fun f empty = tree(key,RED,empty,empty)
185        | f (tree(k,BLACK,l,r)) =
186	    if key>k
187	    then case f r
188		 of r as tree(rk,RED, rl as tree(rlk,RED,rll,rlr),rr) =>
189			(case l
190			 of tree(lk,RED,ll,lr) =>
191				tree(k,RED,tree(lk,BLACK,ll,lr),
192					   tree(rk,BLACK,rl,rr))
193			  | _ => tree(rlk,BLACK,tree(k,RED,l,rll),
194						tree(rk,RED,rlr,rr)))
195		  | r as tree(rk,RED,rl, rr as tree(rrk,RED,rrl,rrr)) =>
196			(case l
197			 of tree(lk,RED,ll,lr) =>
198				tree(k,RED,tree(lk,BLACK,ll,lr),
199					   tree(rk,BLACK,rl,rr))
200			  | _ => tree(rk,BLACK,tree(k,RED,l,rl),rr))
201	          | r => tree(k,BLACK,l,r)
202	    else if k>key
203	    then case f l
204	         of l as tree(lk,RED,ll, lr as tree(lrk,RED,lrl,lrr)) =>
205			(case r
206			 of tree(rk,RED,rl,rr) =>
207				tree(k,RED,tree(lk,BLACK,ll,lr),
208					   tree(rk,BLACK,rl,rr))
209			  | _ => tree(lrk,BLACK,tree(lk,RED,ll,lrl),
210						tree(k,RED,lrr,r)))
211		  | l as tree(lk,RED, ll as tree(llk,RED,lll,llr), lr) =>
212			(case r
213			 of tree(rk,RED,rl,rr) =>
214				tree(k,RED,tree(lk,BLACK,ll,lr),
215					   tree(rk,BLACK,rl,rr))
216			  | _ => tree(lk,BLACK,ll,tree(k,RED,lr,r)))
217	          | l => tree(k,BLACK,l,r)
218	    else tree(key,BLACK,l,r)
219        | f (tree(k,RED,l,r)) =
220	    if key>k then tree(k,RED,l, f r)
221	    else if k>key then tree(k,RED, f l, r)
222	    else tree(key,RED,l,r)
223   in case f t
224      of tree(k,RED, l as tree(_,RED,_,_), r) => tree(k,BLACK,l,r)
225       | tree(k,RED, l, r as tree(_,RED,_,_)) => tree(k,BLACK,l,r)
226       | t => t
227  end
228
229
230 fun lookup (key,t) =
231  let fun look empty = raise (notfound key)
232	| look (tree(k,_,l,r)) =
233		if k>key then look l
234		else if key>k then look r
235		else k
236   in look t
237  end
238
239end
240
241signature LEXGEN =
242  sig
243     val lexGen: string -> unit
244  end
245
246structure LexGen: LEXGEN =
247   struct
248   open Array List
249   infix 9 sub
250
251   datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR
252	  | LP | RP | CARAT | DOLLAR | SLASH | STATE of string list
253	  | REPS of int * int | ID of string | ACTION of string
254	  | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
255	  | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
256
257   datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
258		| ALT of exp * exp | CAT of exp * exp | TRAIL of int
259		| END of int
260
261   (* flags describing input Lex spec. - unnecessary code is omitted *)
262   (* if possible *)
263
264   val CharFormat = ref false;
265   val UsesTrailingContext = ref false;
266   val UsesPrevNewLine = ref false;
267
268   (* flags for various bells & whistles that Lex has.  These slow the
269      lexer down and should be omitted from production lexers (if you
270      really want speed) *)
271
272   val CountNewLines = ref false;
273   val PosArg = ref false;
274   val HaveReject = ref false;
275
276   (* Can increase size of character set *)
277
278   val CharSetSize: int ref = ref 129;
279
280   (* Can name structure or declare header code *)
281
282   val StrName = ref "Mlex"
283   val HeaderCode = ref ""
284   val HeaderDecl = ref false
285   val ArgCode = ref (NONE: string option)
286   val StrDecl = ref false
287
288   val ResetFlags = fn () => (CountNewLines := false; HaveReject := false;
289			      PosArg := false;
290			      UsesTrailingContext := false;
291			       CharSetSize := 129; StrName := "Mlex";
292				HeaderCode := ""; HeaderDecl:= false;
293				ArgCode := NONE;
294				StrDecl := false)
295
296   val LexOut = ref(TextIO.stdOut)
297   val removeTABs = String.translate (fn #"\t" => "    " | c => str c)
298   fun say x = TextIO.output(!LexOut, removeTABs x)
299
300(* Union: merge two sorted lists of integers *)
301
302fun union(a,b) = let val rec merge = fn
303	  (nil,nil,z) => z
304	| (nil,el::more,z) => merge(nil,more,el::z)
305	| (el::more,nil,z) => merge(more,nil,el::z)
306	| (x::morex,y::morey,z) => if (x:int)=(y:int)
307		then merge(morex,morey,x::z)
308		else if x>y then merge(morex,y::morey,x::z)
309		else merge(x::morex,morey,y::z)
310	in merge(rev a,rev b,nil)
311end
312
313(* Nullable: compute if a important expression parse tree node is nullable *)
314
315val rec nullable = fn
316	  EPS => true
317	| CLASS(_) => false
318	| CLOSURE(_) => true
319	| ALT(n1,n2) => nullable(n1) orelse nullable(n2)
320	| CAT(n1,n2) => nullable(n1) andalso nullable(n2)
321	| TRAIL(_) => true
322	| END(_) => false
323
324(* FIRSTPOS: firstpos function for parse tree expressions *)
325
326and firstpos = fn
327	  EPS => nil
328	| CLASS(_,i) => [i]
329	| CLOSURE(n) => firstpos(n)
330	| ALT(n1,n2) => union(firstpos(n1),firstpos(n2))
331	| CAT(n1,n2) => if nullable(n1) then union(firstpos(n1),firstpos(n2))
332		else firstpos(n1)
333	| TRAIL(i) => [i]
334	| END(i) => [i]
335
336(* LASTPOS: Lastpos function for parse tree expressions *)
337
338and lastpos = fn
339	  EPS => nil
340	| CLASS(_,i) => [i]
341	| CLOSURE(n) => lastpos(n)
342	| ALT(n1,n2) => union(lastpos(n1),lastpos(n2))
343	| CAT(n1,n2) => if nullable(n2) then union(lastpos(n1),lastpos(n2))
344		else lastpos(n2)
345	| TRAIL(i) => [i]
346	| END(i) => [i]
347	;
348
349(* ++: Increment an integer reference *)
350
351fun ++(x) : int = (x := !x + 1; !x);
352
353structure dict =
354    struct
355	type 'a relation = 'a * 'a -> bool
356        abstype ('b,'a) dictionary = DATA of { Table : ('b * 'a) list,
357				          Leq : 'b * 'b -> bool }
358	with
359    	    exception LOOKUP
360	    fun create Leqfunc = DATA { Table = nil, Leq = Leqfunc }
361	    fun lookup (DATA { Table = entrylist, Leq = leq }) key =
362		let fun search [] = raise LOOKUP
363		      | search((k,item)::entries) =
364			if leq(key,k)
365			then if leq(k,key) then item else raise LOOKUP
366			else search entries
367		in search entrylist
368	        end
369	     fun enter (DATA { Table = entrylist, Leq = leq })
370		(newentry as (key : 'b,item :'a)) : ('b,'a) dictionary =
371		   let val gt = fn a => fn b => not (leq(a,b))
372		       val eq = fn k => fn k' => (leq(k,k')) andalso (leq(k',k))
373		       fun update nil = [ newentry ]
374			 | update ((entry as (k,_))::entries) =
375			      if (eq  key k) then newentry::entries
376			      else if gt k key then newentry::(entry::entries)
377			      else entry::(update entries)
378		   in DATA { Table = update entrylist, Leq = leq }
379	           end
380	     fun listofdict (DATA { Table = entrylist,Leq = leq}) =
381		let fun f (nil,r) = rev r
382		      | f (a::b,r) = f (b,a::r)
383	   	in f(entrylist,nil)
384		end
385      end
386end
387
388open dict;
389
390(* INPUT.ML : Input w/ one character push back capability *)
391
392val LineNum: int ref = ref 1;
393
394abstype ibuf =
395	BUF of TextIO.instream * {b : string ref, p : int ref}
396with
397	fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0})
398	fun close_ibuf (BUF (s,_)) = TextIO.closeIn(s)
399	exception eof
400	fun getch (a as (BUF(s,{b,p}))) =
401		 if (!p = (size (!b)))
402		   then (b := TextIO.inputN(s, 1024);
403			 p := 0;
404			 if (size (!b))=0
405			    then raise eof
406			    else getch a)
407		   else (let val ch = String.sub(!b,!p)
408			 in (if ch = #"\n"
409				 then LineNum := !LineNum + 1
410				 else ();
411			     p := !p + 1;
412			     ch)
413			 end)
414	fun ungetch(BUF(s,{b,p})) = (
415	   p := !p - 1;
416	   if String.sub(!b,!p) = #"\n"
417	      then LineNum := !LineNum - 1
418	      else ())
419end;
420
421exception Error
422
423fun prErr x = (
424      TextIO.output (TextIO.stdErr, String.concat [
425	  "ml-lex: error, line ", (Int.toString (!LineNum)), ": ", x, "\n"
426	]);
427      raise Error)
428fun prSynErr x = (
429      TextIO.output (TextIO.stdErr, String.concat [
430	  "ml-lex: syntax error, line ", (Int.toString (!LineNum)), ": ", x, "\n"
431	]);
432      raise Error)
433
434exception SyntaxError; (* error in user's input file *)
435
436exception LexError; (* unexpected error in lexer *)
437
438val LexBuf = ref(make_ibuf(TextIO.stdIn));
439val LexState = ref 0;
440val NextTok = ref BOF;
441val inquote = ref false;
442
443fun AdvanceTok () : unit = let
444      fun isLetter c =
445	    ((c >= #"a") andalso (c <= #"z")) orelse
446	    ((c >= #"A") andalso (c <= #"Z"))
447      fun isDigit c = (c >= #"0") andalso (c <= #"9")
448    (* check for valid (non-leading) identifier character (added by JHR) *)
449      fun isIdentChr c =
450	    ((isLetter c) orelse (isDigit c) orelse (c = #"_") orelse (c = #"'"))
451      fun atoi s = let
452	    fun num (c::r, n) = if isDigit c
453		  then num (r, 10*n + (Char.ord c - Char.ord #"0"))
454		  else n
455	      | num ([], n) = n
456	    in
457	      num (explode s, 0)
458	    end
459
460      fun skipws () = (case nextch()
461	     of #" " => skipws()
462	      | #"\t" => skipws()
463	      | #"\n" => skipws()
464              | #"\r" => skipws()
465	      | x => x
466	    (* end case *))
467
468      and nextch () = getch(!LexBuf)
469
470      and escaped () = (case nextch()
471	     of #"b" => #"\008"
472	      | #"n" => #"\n"
473	      | #"t" => #"\t"
474	      | #"h" => #"\128"
475	      | x => let
476		  fun err t = prErr("illegal ascii escape '"^(implode(rev t))^"'")
477		  fun cvt c = (Char.ord c - Char.ord #"0")
478		  fun f (n: int, c, t) = if c=3
479			then if n >= (!CharSetSize)
480			  then err t
481			  else Char.chr n
482		        else let val ch=nextch()
483			  in
484			    if isDigit ch
485			      then f(n*10+(cvt ch), c+1, ch::t)
486		  	      else err t
487			  end
488		  in
489		    if isDigit x then f(cvt x, 1, [x]) else x
490		  end
491	    (* end case *))
492
493      and onechar x = let val c = array(!CharSetSize, false)
494	      in
495		update(c, Char.ord(x), true); CHARS(c)
496	      end
497
498      in case !LexState of 0 => let val makeTok = fn () =>
499		case skipws()
500			(* Lex % operators *)
501		 of #"%" => (case nextch() of
502		  	  #"%" => LEXMARK
503			| a => let fun f s =
504				    let val a = nextch()
505				    in if isLetter a then f(a::s)
506					else (ungetch(!LexBuf);
507					      implode(rev s))
508				    end
509			        in case f [a]
510				 of "reject" => REJECT
511				  | "count"  => COUNT
512				  | "full"   => FULLCHARSET
513				  | "s"      => LEXSTATES
514				  | "S"      => LEXSTATES
515				  | "structure" => STRUCT
516				  | "header" => HEADER
517				  | "arg"    => ARG
518				  | "posarg" => POSARG
519			          | _ => prErr "unknown % operator "
520			       end
521			     )
522			(* semicolon (for end of LEXSTATES) *)
523		| #";" => SEMI
524			(* anything else *)
525		| ch => if isLetter(ch) then
526			 let fun getID matched =
527			     let val x = nextch()
528(**** fix by JHR
529			     in if isLetter(x) orelse isDigit(x) orelse
530                                   x = "_" orelse x = "'"
531****)
532			     in if (isIdentChr x)
533				 then getID (x::matched)
534				 else (ungetch(!LexBuf); implode(rev matched))
535			     end
536			in ID(getID [ch])
537			end
538		      else (prSynErr ("bad character: " ^
539                                      String.toString (String.str ch)))
540	in NextTok := makeTok()
541	end
542	| 1 => let val rec makeTok = fn () =>
543		if !inquote then case nextch() of
544			(* inside quoted string *)
545		  #"\\" => onechar(escaped())
546		| #"\"" => (inquote := false; makeTok())
547		| #"\n" => (prSynErr "end-of-line inside quoted string";
548			    inquote := false; makeTok())
549		| x => onechar(x)
550		else case skipws() of
551			(* single character operators *)
552		  #"?" => QMARK
553		| #"*" => STAR
554		| #"+" => PLUS
555		| #"|" => BAR
556		| #"(" => LP
557		| #")" => RP
558		| #"^" => CARAT
559		| #"$" => DOLLAR
560		| #"/" => SLASH
561		| #";" => SEMI
562		| #"." => let val c = array(!CharSetSize,true) in
563				update(c,10,false); CHARS(c)
564			end
565			(* assign and arrow *)
566		| #"=" => let val c = nextch() in
567			if c = #">" then ARROW else (ungetch(!LexBuf); ASSIGN)
568		end
569			(* character set *)
570		| #"[" => let val classch = fn () => let val x = skipws()
571				in if x = #"\\" then (true,escaped()) else (false,x)
572				end;
573			val (_,first) = classch();
574			val flag = (first <> #"^");
575			val c = array(!CharSetSize,not flag);
576			fun add NONE = ()
577			  | add (SOME x) = update(c, Char.ord(x), flag)
578			and range (x, y) = if x>y
579			      then (prErr "bad char. range")
580			      else let
581				val i = ref(Char.ord(x)) and j = Char.ord(y)
582				in while !i<=j do (
583				  add (SOME(Char.chr(!i)));
584				  i := !i + 1)
585				end
586			and getClass last = (case classch()
587			     of (false,#"]") => (add(last); c)
588			      | (_,#"-") => (case last
589				   of NONE => getClass(SOME #"-")
590				    | (SOME last') => let val (esc,x) = classch()
591					in
592					  if not esc andalso x = #"]"
593					    then (add(last); add(SOME #"-"); c)
594					    else (range(last',x); getClass(NONE))
595					end
596				  (* end case *))
597			      | (_,x) => (add(last); getClass(SOME x))
598			    (* end case *))
599		in CHARS(getClass(if first = #"^" then NONE else SOME first))
600		end
601			(* Start States specification *)
602		| #"<" => let val rec get_state = fn (prev,matched) =>
603			case nextch() of
604			  #">" => matched::prev
605			| #"," => get_state(matched::prev,"")
606			| x => if isIdentChr(x)
607				then get_state(prev,matched ^ String.str x)
608				else (prSynErr "bad start state list")
609		in STATE(get_state(nil,""))
610		end
611			(* {id} or repititions *)
612		| #"{" => let val ch = nextch() in if isLetter(ch) then
613			let fun getID matched = (case nextch()
614			  of #"}" => matched
615			   | x => if (isIdentChr x) then
616				    getID(matched ^ String.str x)
617				  else (prErr "invalid char. class name")
618			 (* end case *))
619			in ID(getID(String.str ch))
620			end
621			else if isDigit(ch) then
622			 let fun get_r (matched, r1) = (case nextch()
623				 of #"}" => let val n = atoi(matched) in
624					if r1 = ~1 then (n,n) else (r1,n)
625					end
626				  | #"," => if r1 = ~1 then get_r("",atoi(matched))
627				       else (prErr "invalid repetitions spec.")
628				  | x => if isDigit(x)
629				    then get_r(matched ^ String.str x,r1)
630			            else (prErr "invalid char in repetitions spec")
631				(* end case *))
632			 in REPS(get_r(String.str ch,~1))
633			 end
634			else (prErr "bad repetitions spec")
635		end
636			(* Lex % operators *)
637		| #"\\" => onechar(escaped())
638			(* start quoted string *)
639		| #"\"" => (inquote := true; makeTok())
640			(* anything else *)
641		| ch => onechar(ch)
642	in NextTok := makeTok()
643	end
644	| 2 => NextTok :=
645               (case skipws() of
646                  #"(" =>
647                  let
648                    fun loop_to_end (backslash, x) =
649                      let
650                        val c    = getch (! LexBuf)
651                        val notb = not backslash
652                        val nstr = c :: x
653                      in
654                        case c of
655                          #"\"" => if notb then nstr
656                                   else loop_to_end (false, nstr)
657                        | _ => loop_to_end (c = #"\\" andalso notb, nstr)
658                      end
659                    fun GetAct (lpct, x) =
660                      let
661                        val c    = getch (! LexBuf)
662                        val nstr = c :: x
663                      in
664                        case c of
665                          #"\"" => GetAct (lpct, loop_to_end (false, nstr))
666                        | #"(" => GetAct (lpct + 1, nstr)
667                        | #")" => if lpct = 0 then implode (rev x)
668                                  else GetAct(lpct - 1, nstr)
669                        | _ => GetAct(lpct, nstr)
670                      end
671                  in
672                    ACTION (GetAct (0,nil))
673                  end
674                | #";" => SEMI
675                | c => (prSynErr ("invalid character " ^
676                                  String.toString (String.str c))))
677	| _ => raise LexError
678end
679handle eof => NextTok := EOF ;
680
681fun GetTok (_:unit) : token =
682	let val t = !NextTok in AdvanceTok(); t
683	end;
684val SymTab = ref (create String.<=) : (string,exp) dictionary ref
685
686fun GetExp () : exp =
687
688	let val rec optional = fn e => ALT(EPS,e)
689
690	    and lookup' = fn name =>
691		lookup(!SymTab) name
692		handle LOOKUP => prErr ("bad regular expression name: "^
693					name)
694
695	and newline = fn () => let val c = array(!CharSetSize,false) in
696		update(c,10,true); c
697		end
698
699	and endline = fn e => trail(e,CLASS(newline(),0))
700
701	and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2)
702
703	and closure1 = fn e => CAT(e,CLOSURE(e))
704
705	and repeat = fn (min,max,e) => let val rec rep = fn
706		  (0,0) => EPS
707		| (0,1) => ALT(e,EPS)
708		| (0,i) => CAT(rep(0,1),rep(0,i-1))
709		| (i,j) => CAT(e,rep(i-1,j-1))
710	in rep(min,max)
711	end
712
713	and exp0 = fn () => case GetTok() of
714		  CHARS(c) => exp1(CLASS(c,0))
715		| LP => let val e = exp0() in
716		 if !NextTok = RP then
717		  (AdvanceTok(); exp1(e))
718		 else (prSynErr "missing ')'") end
719		| ID(name) => exp1(lookup' name)
720		| _ => raise SyntaxError
721
722	and exp1 = fn (e) => case !NextTok of
723		  SEMI => e
724		| ARROW => e
725		| EOF => e
726		| LP => exp2(e,exp0())
727		| RP => e
728		| t => (AdvanceTok(); case t of
729			  QMARK => exp1(optional(e))
730			| STAR => exp1(CLOSURE(e))
731			| PLUS => exp1(closure1(e))
732			| CHARS(c) => exp2(e,CLASS(c,0))
733			| BAR => ALT(e,exp0())
734			| DOLLAR => (UsesTrailingContext := true; endline(e))
735			| SLASH => (UsesTrailingContext := true;
736				    trail(e,exp0()))
737			| REPS(i,j) => exp1(repeat(i,j,e))
738			| ID(name) => exp2(e,lookup' name)
739			| _ => raise SyntaxError)
740
741	and exp2 = fn (e1,e2) => case !NextTok of
742		  SEMI => CAT(e1,e2)
743		| ARROW => CAT(e1,e2)
744		| EOF => CAT(e1,e2)
745		| LP => exp2(CAT(e1,e2),exp0())
746		| RP => CAT(e1,e2)
747		| t => (AdvanceTok(); case t of
748		  	  QMARK => exp1(CAT(e1,optional(e2)))
749			| STAR => exp1(CAT(e1,CLOSURE(e2)))
750			| PLUS => exp1(CAT(e1,closure1(e2)))
751			| CHARS(c) => exp2(CAT(e1,e2),CLASS(c,0))
752			| BAR => ALT(CAT(e1,e2),exp0())
753			| DOLLAR => (UsesTrailingContext := true;
754				     endline(CAT(e1,e2)))
755			| SLASH => (UsesTrailingContext := true;
756				    trail(CAT(e1,e2),exp0()))
757			| REPS(i,j) => exp1(CAT(e1,repeat(i,j,e2)))
758			| ID(name) => exp2(CAT(e1,e2),lookup' name)
759			| _ => raise SyntaxError)
760in exp0()
761end;
762val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
763
764val StateNum: int ref = ref 0;
765
766fun GetStates () : int list =
767
768   let fun add nil sl = sl
769  	  | add (x::y) sl = add y (union ([lookup (!StateTab)(x)
770					   handle LOOKUP =>
771					      prErr ("bad state name: "^x)
772					  ],sl))
773
774	fun addall i sl =
775	    if i <= !StateNum then addall (i+2) (union ([i],sl))
776	    else sl
777
778	fun incall (x::y) = (x+1)::incall y
779	  | incall nil = nil
780
781	fun addincs nil = nil
782  	  | addincs (x::y) = x::(x+1)::addincs y
783
784	val state_list =
785	   case !NextTok of
786	     STATE s => (AdvanceTok(); LexState := 1; add s nil)
787	     | _ => addall 1 nil
788
789      in case !NextTok
790	   of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true;
791			incall state_list)
792	    | _ => addincs state_list
793      end
794
795val LeafNum: int ref = ref ~1;
796
797fun renum(e : exp) : exp =
798	let val rec label = fn
799	  EPS => EPS
800	| CLASS(x,_) => CLASS(x,++LeafNum)
801	| CLOSURE(e) => CLOSURE(label(e))
802	| ALT(e1,e2) => ALT(label(e1),label(e2))
803	| CAT(e1,e2) => CAT(label(e1),label(e2))
804	| TRAIL(i) => TRAIL(++LeafNum)
805	| END(i) => END(++LeafNum)
806in label(e)
807end;
808
809exception ParseError;
810
811fun parse() : (string * (int list * exp) list * ((string,string) dictionary)) =
812	let val Accept = ref (create String.<=) : (string,string) dictionary ref
813	val rec ParseRtns = fn l => case getch(!LexBuf) of
814		  #"%" => let val c = getch(!LexBuf) in
815		    	   if c = #"%" then (implode (rev l))
816			   else ParseRtns(c :: #"%" :: l)
817			end
818		| c => ParseRtns(c::l)
819	and ParseDefs = fn () =>
820		(LexState:=0; AdvanceTok(); case !NextTok of
821		  LEXMARK => ()
822		| LEXSTATES =>
823		   let fun f () = (case !NextTok of (ID i) =>
824				    (StateTab := enter(!StateTab)(i,++StateNum);
825				     ++StateNum; AdvanceTok(); f())
826					| _ => ())
827		   in AdvanceTok(); f ();
828		      if !NextTok=SEMI then ParseDefs() else
829			(prSynErr "expected ';'")
830		   end
831		| ID x => (LexState:=1; AdvanceTok(); if GetTok() = ASSIGN
832			  then (SymTab := enter(!SymTab)(x,GetExp());
833			       if !NextTok = SEMI then ParseDefs()
834			       else (prSynErr "expected ';'"))
835			else raise SyntaxError)
836		| REJECT => (HaveReject := true; ParseDefs())
837		| COUNT => (CountNewLines := true; ParseDefs())
838		| FULLCHARSET => (CharSetSize := 256; ParseDefs())
839		| HEADER => (LexState := 2; AdvanceTok();
840			     case GetTok()
841			     of ACTION s =>
842				if (!StrDecl) then
843				   (prErr "cannot have both %structure and %header \
844				    \declarations")
845				else if (!HeaderDecl) then
846				   (prErr "duplicate %header declarations")
847				else
848				    (HeaderCode := s; LexState := 0;
849				     HeaderDecl := true; ParseDefs())
850				| _ => raise SyntaxError)
851	        | POSARG => (PosArg := true; ParseDefs())
852                | ARG => (LexState := 2; AdvanceTok();
853			     case GetTok()
854			     of ACTION s =>
855				(case !ArgCode
856				   of SOME _ => prErr "duplicate %arg declarations"
857				    | NONE => ArgCode := SOME s;
858				 LexState := 0;
859				 ParseDefs())
860				| _ => raise SyntaxError)
861		| STRUCT => (AdvanceTok();
862			    case !NextTok of
863			       (ID i) =>
864			        if (!HeaderDecl) then
865				   (prErr "cannot have both %structure and %header \
866				    \declarations")
867				else if (!StrDecl) then
868				   (prErr "duplicate %structure declarations")
869				else (StrName := i; StrDecl := true)
870			         | _  => (prErr "expected ID");
871				ParseDefs())
872		| _ => raise SyntaxError)
873	and ParseRules =
874		fn rules => (LexState:=1; AdvanceTok(); case !NextTok of
875		  EOF => rules
876		| _ =>
877		 let val s = GetStates()
878		     val e = renum(CAT(GetExp(),END(0)))
879		 in
880		 if !NextTok = ARROW then
881		   (LexState:=2; AdvanceTok();
882		    case GetTok() of ACTION(act) =>
883		      if !NextTok=SEMI then
884		        (Accept:=enter(!Accept) (Int.toString (!LeafNum),act);
885		         ParseRules((s,e)::rules))
886		      else (prSynErr "expected ';'")
887		    | _ => raise SyntaxError)
888		  else (prSynErr "expected '=>'")
889		end)
890in let val usercode = ParseRtns nil
891   in (ParseDefs(); (usercode,ParseRules(nil),!Accept))
892   end
893end handle SyntaxError => (prSynErr "")
894
895fun makebegin () : unit =
896   let fun make nil = ()
897	 | make ((x,n:int)::y)=(say "val "; say x; say " = " ;
898				say "STARTSTATE ";
899				say (Int.toString n); say ";\n"; make y)
900   in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab))
901   end
902
903structure L =
904	struct
905	  nonfix >
906	  type key = int list * string
907	  fun > ((key,item:string),(key',item')) =
908	    let fun f ((a:int)::a') (b::b') = if Int.> (a,b) then true
909					   else if a=b then f a' b'
910					   else false
911		  | f _ _ = false
912	    in f key key'
913	    end
914	end
915
916structure RB = RedBlack(L)
917
918fun maketable (fins:(int * (int list)) list,
919	     tcs :(int * (int list)) list,
920	     tcpairs: (int * int) list,
921	     trans : (int*(int list)) list) : unit =
922
923(* Fins = (state #, list of final leaves for the state) list
924   tcs = (state #, list of trailing context leaves which begin in this state)
925	 list
926   tcpairs = (trailing context leaf, end leaf) list
927   trans = (state #,list of transitions for state) list *)
928
929   let datatype elem = N of int | T of int | D of int
930       val count = ref 0
931       val _ = (if length(trans)<256 then CharFormat := true
932		 else CharFormat := false;
933		 if !UsesTrailingContext then
934    		     (say "\ndatatype yyfinstate = N of int | \
935			   \ T of int | D of int\n")
936		 else say "\ndatatype yyfinstate = N of int";
937		 say "\ntype statedata = {fin : yyfinstate list, trans: ";
938		 case !CharFormat of
939		       true => say "string}"
940		     | false => say "int Vector.vector}";
941	         say "\n(* transition & final state table *)\nval tab = let\n";
942		 case !CharFormat of
943		       true => ()
944		     | false =>
945		       (say "fun decode s k =\n";
946			say "  let val k' = k + k\n";
947			say "      val hi = Char.ord(String.sub(s, k'))\n";
948			say "      val lo = Char.ord(String.sub(s, k' + 1))\n";
949			say "  in hi * 256 + lo end\n"))
950
951      val newfins =
952	let fun IsEndLeaf t =
953	     let fun f ((l,e)::r) = if (e=t) then true else f r
954		   | f nil = false in f tcpairs end
955
956	 fun GetEndLeaf t =
957	   let fun f ((tl,el)::r) = if (tl=t) then el else f r
958		 | f [] = raise Fail "GetEndLeaf"
959	   in f tcpairs
960	   end
961	 fun GetTrConLeaves s =
962	   let fun f ((s',l)::r) = if (s = s') then l else f r
963	         | f nil = nil
964	   in f tcs
965	   end
966	 fun sort_leaves s =
967	   let fun insert (x:int) (a::b) =
968		 if (x <= a) then x::(a::b)
969		 else a::(insert x b)
970		 | insert x nil = [x]
971	   in List.foldr (fn (x,r) => insert x r) [] s
972	   end
973	 fun conv a = if (IsEndLeaf a) then (D a) else (N a)
974	 fun merge (a::a',b::b') =
975	   if (a <= b) then (conv a)::merge(a',b::b')
976	   else (T b)::(merge(a::a',b'))
977	   | merge (a::a',nil) = (conv a)::(merge (a',nil))
978	   | merge (nil,b::b') = (T b)::(merge (b',nil))
979	   | merge (nil,nil) = nil
980
981	in map (fn (x,l) =>
982	  rev (merge (l,
983		sort_leaves (map (fn x => GetEndLeaf x) (GetTrConLeaves x)))))
984		    fins
985	end
986
987	val rs =
988	 let open RB
989	     fun makeItems x =
990	       let fun emit8(x, pos) =
991		     let val s = StringCvt.padLeft #"0" 3 (Int.toString x)
992		     in
993		       case pos
994			 of 16	=> (say "\\\n\\\\"; say s; 1)
995			  | _	=> (say "\\"; say s; pos+1)
996		     end
997		   fun emit16(x, pos) =
998		     let val hi8 = x div 256
999			 val lo8 = x - hi8 * 256	(* x rem 256 *)
1000		     in
1001		       emit8(lo8, emit8(hi8, pos))
1002		     end
1003		   fun MakeString([], _, _) = ()
1004		     | MakeString(x::xs, emitter, pos) =
1005			MakeString(xs, emitter, emitter(x, pos))
1006	        in case !CharFormat of
1007		    true => (say "\n\""; MakeString(x,emit8,0); say "\"\n")
1008		  | false => (say (Int.toString(length x));
1009		     say ",\n\""; MakeString(x,emit16,0); say "\"\n")
1010	        end
1011
1012	    fun makeEntry(nil,rs,t) = rev rs
1013	      | makeEntry(((l:int,x)::y),rs,t) =
1014	          let val name = (Int.toString l)
1015		  in let val (r,n) = lookup ((x,name),t)
1016		      in makeEntry(y,(n::rs),t)
1017		      end handle notfound _ =>
1018                        (count := !count+1;
1019                          say " ("; say name; say ",";
1020		          makeItems x; say "),\n";
1021		         makeEntry(y,(name::rs),(insert ((x,name),t))))
1022	   	  end
1023
1024            val _ = say "val s = [\n"
1025            val res =  makeEntry(trans,nil,empty)
1026            val _ =
1027              case !CharFormat
1028               of true => (say "(0, \"\")]\n"; say "fun f x = x\n")
1029                | false => (say "(0, 0, \"\")]\n";
1030                    say "fun f(n, i, x) = (n, Vector.tabulate(i, decode x))\n")
1031
1032            val _ = say "val s = map f (rev (tl (rev s)))\n"
1033            val _ = say "exception LexHackingError\n"
1034            val _ = say "fun look ((j,x)::r, i) = if i = j then x else look(r, i)\n"
1035            val _ = say "  | look ([], i) = raise LexHackingError\n"
1036
1037        val _ = say "fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)}\n"
1038 	 in res
1039	end
1040
1041	fun makeTable(nil,nil) = ()
1042	  | makeTable(a::a',b::b') =
1043	     let fun makeItems nil = ()
1044		   | makeItems (hd::tl) =
1045		     let val (t,n) =
1046			 case hd of
1047			   (N i) => ("(N ",i)
1048			 | (T i) => ("(T ",i)
1049			 | (D i) => ("(D ",i)
1050		     in (say t; say (Int.toString n); say ")";
1051			 if null tl
1052			 then ()
1053			 else (say ","; makeItems tl))
1054		     end
1055	      in (say "{fin = ["; makeItems b;
1056		  say "], trans = "; say a; say "}";
1057		  if null a'
1058		  then ()
1059		  else (say ",\n"; makeTable(a',b')))
1060	      end
1061	  | makeTable _ = raise Fail "makeTable"
1062
1063	fun msg x = TextIO.output(TextIO.stdOut, x)
1064
1065  in (say "in Vector.fromList(map g\n["; makeTable(rs,newfins);
1066      say "])\nend\n";
1067    msg ("\nNumber of states = " ^ (Int.toString (length trans)));
1068    msg ("\nNumber of distinct rows = " ^ (Int.toString (!count)));
1069    msg ("\nApprox. memory size of trans. table = " ^
1070	  (Int.toString (!count*(!CharSetSize)*(if !CharFormat then 1 else 8))));
1071    msg " bytes\n")
1072end
1073
1074(* makeaccept: Takes a (string,string) dictionary, prints case statement for
1075   accepting leaf actions.  The key strings are the leaf #'s, the data strings
1076   are the actions *)
1077
1078fun makeaccept ends =
1079    let fun startline f = if f then say "  " else say "| "
1080        fun stripLWS s =
1081            Substring.string (Substring.dropl Char.isSpace (Substring.full s))
1082	 fun make(nil,f) = (startline f; say "_ => raise Internal.LexerError\n")
1083	  | make((x,a)::y,f) = (startline f; say x; say " => ";
1084				if Substring.size(#2 (Substring.position "yytext" (Substring.full a))) = 0
1085 then
1086                                     (say "("; say a; say ")")
1087                                else (say "let val yytext=yymktext() in ";
1088                                      say (stripLWS a); say " end");
1089                                say "\n"; make(y,false))
1090    in make (listofdict(ends),true)
1091    end
1092
1093fun leafdata(e:(int list * exp) list) =
1094	let val fp = array(!LeafNum + 1,nil)
1095	and leaf = array(!LeafNum + 1,EPS)
1096	and tcpairs = ref nil
1097	and trailmark = ref ~1;
1098	val rec add = fn
1099		  (nil,x) => ()
1100		| (hd::tl,x) => (update(fp,hd,union(fp sub hd,x));
1101			add(tl,x))
1102	and moredata = fn
1103		  CLOSURE(e1) =>
1104			(moredata(e1); add(lastpos(e1),firstpos(e1)))
1105		| ALT(e1,e2) => (moredata(e1); moredata(e2))
1106		| CAT(e1,e2) => (moredata(e1); moredata(e2);
1107			add(lastpos(e1),firstpos(e2)))
1108		| CLASS(x,i) => update(leaf,i,CLASS(x,i))
1109		| TRAIL(i) => (update(leaf,i,TRAIL(i)); if !trailmark = ~1
1110			then trailmark := i else ())
1111		| END(i) => (update(leaf,i,END(i)); if !trailmark <> ~1
1112			then (tcpairs := (!trailmark,i)::(!tcpairs);
1113			trailmark := ~1) else ())
1114		| _ => ()
1115	and makedata = fn
1116		  nil => ()
1117		| (_,x)::tl => (moredata(x);makedata(tl))
1118	in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs)
1119	end;
1120
1121fun makedfa(rules) =
1122let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref
1123    val fintab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
1124    val transtab = ref (create(Int.<=)) : (int,int list) dictionary ref
1125    val tctab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
1126    val (fp, leaf, tcpairs) = leafdata(rules);
1127
1128fun visit (state,statenum) =
1129	let val transitions = gettrans(state) in
1130	   fintab := enter(!fintab)(statenum,getfin(state));
1131	   tctab := enter(!tctab)(statenum,gettc(state));
1132	   transtab := enter(!transtab)(statenum,transitions)
1133	end
1134
1135and visitstarts (states) =
1136	let fun vs nil i = ()
1137	      | vs (hd::tl) i = (visit (hd,i); vs tl (i+1))
1138	in vs states 0
1139	end
1140
1141and hashstate(s: int list) =
1142	let val rec hs =
1143	        fn (nil,z) => z
1144		 | ((x:int)::y,z) => hs(y,z ^ " " ^ (Int.toString x))
1145	in hs(s,"")
1146	end
1147
1148and find(s) = lookup(!StateTab)(hashstate(s))
1149
1150and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n)
1151
1152and getstate (state) =
1153	find(state)
1154	handle LOOKUP => let val n = ++StateNum in
1155		add(state,n); visit(state,n); n
1156		end
1157
1158and getfin state =
1159	let fun f nil fins = fins
1160	      | f (hd::tl) fins =
1161	         case (leaf sub hd)
1162	            of END _ => f tl (hd::fins)
1163	             | _ => f tl fins
1164	in f state nil
1165	end
1166
1167and gettc state =
1168	let fun f nil fins = fins
1169	      | f (hd::tl) fins =
1170	         case (leaf sub hd)
1171	            of TRAIL _ => f tl (hd::fins)
1172	             | _ => f tl fins
1173	in f state nil
1174	end
1175
1176and gettrans (state) =
1177      let fun loop c tlist =
1178	 let fun cktrans nil r = r
1179	       | cktrans (hd::tl) r =
1180		  case (leaf sub hd) of
1181	           CLASS(i,_)=>
1182			(if (i sub c) then cktrans tl (union(r,fp sub hd))
1183		         else cktrans tl r handle Subscript =>
1184						cktrans tl r
1185			)
1186		   | _ => cktrans tl r
1187	 in if c >= 0 then
1188	      let val v=cktrans state nil
1189	      in loop (c-1) (if v=nil then 0::tlist else (getstate v)::tlist)
1190	      end
1191	    else tlist
1192	 end
1193     in loop ((!CharSetSize) - 1) nil
1194     end
1195
1196and startstates() =
1197	let val startarray = array(!StateNum + 1, nil);
1198            fun listofarray(a,n) =
1199  		let fun f i l = if i >= 0 then  f (i-1) ((a sub i)::l) else l
1200 		in f (n-1) nil end
1201	val rec makess = fn
1202		  nil => ()
1203		| (startlist,e)::tl => (fix(startlist,firstpos(e));makess(tl))
1204	and fix = fn
1205		  (nil,_) => ()
1206		| (s::tl,firsts) => (update(startarray,s,
1207			union(firsts,startarray sub s));
1208			fix(tl,firsts))
1209	in makess(rules);listofarray(startarray, !StateNum + 1)
1210	end
1211
1212in visitstarts(startstates());
1213(listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs)
1214end
1215
1216val skel_hd =
1217"   struct\n\
1218\    type int = Int.int\n\
1219\    structure UserDeclarations =\n\
1220\      struct\n\
1221\"
1222
1223val skel_mid2 =
1224"                       | Internal.D k => action (i,(acts::l),k::rs)\n\
1225\                       | Internal.T k =>\n\
1226\                         let fun f (a::b,r) =\n\
1227\                              if a=k\n\
1228\                                then action(i,(((Internal.N a)::acts)::l),(b@r))\n\
1229\                                else f (b,a::r)\n\
1230\                               | f (nil,r) = action(i,(acts::l),rs)\n\
1231\                          in f (rs,nil)\n\
1232\                          end\n\
1233\"
1234
1235fun lexGen infile =
1236    let val outfile = infile ^ ".sml"
1237      fun PrintLexer (ends) =
1238    let val sayln = fn x => (say x; say "\n")
1239     in case !ArgCode
1240	 of NONE => (sayln "fun lex () : Internal.result =";
1241		     sayln "let fun continue() = lex() in")
1242	  | SOME s => (say "fun lex "; say "(yyarg as ("; say s; sayln ")) =";
1243		       sayln "let fun continue() : Internal.result =");
1244	 say "  let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
1245	 sayln " list list,l,i0: int) =";
1246	 if !UsesTrailingContext
1247	     then say "\tlet fun action (i: int,nil,rs)"
1248	     else say "\tlet fun action (i: int,nil)";
1249	 sayln " = raise LexError";
1250	 if !UsesTrailingContext
1251	     then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)"
1252	     else sayln "\t| action (i,nil::l) = action (i-1,l)";
1253	 if !UsesTrailingContext
1254	     then sayln "\t| action (i,(node::acts)::l,rs) ="
1255	     else sayln "\t| action (i,(node::acts)::l) =";
1256	 sayln "\t\tcase node of";
1257	 sayln "\t\t    Internal.N yyk =>";
1258	 sayln "\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\
1259	       \\t\t\t     val yypos: int = i0+ !yygone";
1260	 if !CountNewLines
1261	    then (sayln "\t\t\tval _ = yylineno := CharVectorSlice.foldli";
1262	  	  sayln "\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice(!yyb,i0,SOME(i-i0)))")
1263	    else ();
1264	 if !HaveReject
1265	     then (say "\t\t\tfun REJECT() = action(i,acts::l";
1266		   if !UsesTrailingContext
1267		       then sayln ",rs)" else sayln ")")
1268	     else ();
1269	 sayln "\t\t\topen UserDeclarations Internal.StartStates";
1270	 sayln " in (yybufpos := i; case yyk of";
1271	 sayln "";
1272	 sayln "\t\t\t(* Application actions *)\n";
1273	 makeaccept(ends);
1274	 say "\n\t\t) end ";
1275	 say ")\n\n";
1276	 if (!UsesTrailingContext) then say skel_mid2 else ();
1277	 sayln "\tval {fin,trans} = Vector.sub (Internal.tab, s)";
1278	 sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves";
1279	 sayln "\tin if l = !yybl then";
1280	 sayln "\t     if trans = #trans(Vector.sub(Internal.tab,0))";
1281	 sayln "\t       then action(l,NewAcceptingLeaves";
1282	 if !UsesTrailingContext then say ",nil" else ();
1283         say ") else";
1284	 sayln "\t    let val newchars= if !yydone then \"\" else yyinput 1024";
1285	 sayln "\t    in if (String.size newchars)=0";
1286	 sayln "\t\t  then (yydone := true;";
1287	 say "\t\t        if (l=i0) then UserDeclarations.eof ";
1288	 sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg");
1289	 say   "\t\t                  else action(l,NewAcceptingLeaves";
1290	 if !UsesTrailingContext then
1291	    sayln ",nil))" else sayln "))";
1292	 sayln "\t\t  else (if i0=l then yyb := newchars";
1293	 sayln "\t\t     else yyb := String.substring(!yyb,i0,l-i0)^newchars;";
1294	 sayln "\t\t     yygone := !yygone+i0;";
1295	 sayln "\t\t     yybl := String.size (!yyb);";
1296	 sayln "\t\t     scan (s,AcceptingLeaves,l-i0,0))";
1297	 sayln "\t    end";
1298	 sayln "\t  else let val NewChar = Char.ord (CharVector.sub (!yyb,l))";
1299         if !CharSetSize=129
1300           then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128"
1301           else ();
1302	 say "\t\tval NewState = ";
1303	 sayln (if !CharFormat
1304                then "Char.ord (CharVector.sub (trans,NewChar))"
1305                else "Vector.sub (trans, NewChar)");
1306	 say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
1307	 if !UsesTrailingContext then sayln ",nil)" else sayln ")";
1308	 sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
1309	 sayln "\tend";
1310	 sayln "\tend";
1311	 if !UsesPrevNewLine then () else sayln "(*";
1312	 sayln "\tval start= if String.substring(!yyb,!yybufpos-1,1)=\"\\n\"";
1313	 sayln "then !yybegin+1 else !yybegin";
1314	 if !UsesPrevNewLine then () else sayln "*)";
1315	 say "\tin scan(";
1316	 if !UsesPrevNewLine then say "start"
1317	 else say "!yybegin (* start *)";
1318	 sayln ",nil,!yybufpos,!yybufpos)";
1319	 sayln "    end";
1320	 sayln (case !ArgCode of NONE => "end" | SOME _ => "in continue end");
1321	 sayln "  in lex";
1322	 sayln "  end";
1323	 sayln "end"
1324	end
1325
1326    in (UsesPrevNewLine := false;
1327	ResetFlags();
1328        LexBuf := make_ibuf(TextIO.openIn infile);
1329        NextTok := BOF;
1330        inquote := false;
1331	LexOut := TextIO.openOut(outfile);
1332	StateNum := 2;
1333	LineNum := 1;
1334	StateTab := enter(create(String.<=))("INITIAL",1);
1335	LeafNum := ~1;
1336	let
1337	   val (user_code,rules,ends) =
1338	       parse() handle x =>
1339               	  (close_ibuf(!LexBuf);
1340		   TextIO.closeOut(!LexOut);
1341		   OS.FileSys.remove outfile;
1342		   raise x)
1343	   val (fins,trans,tctab,tcpairs) = makedfa(rules)
1344	   val _ = if !UsesTrailingContext then
1345	              (close_ibuf(!LexBuf);
1346		       TextIO.closeOut(!LexOut);
1347		       OS.FileSys.remove outfile;
1348		       prErr "lookahead is unimplemented")
1349		   else ()
1350	in
1351	  if (!HeaderDecl)
1352	      then say (!HeaderCode)
1353	      else say ("structure " ^ (!StrName));
1354	  say "=\n";
1355	  say skel_hd;
1356	  say user_code;
1357	  say "end (* end of user routines *)\n";
1358	  say "exception LexError (* raised if illegal leaf ";
1359	  say "action tried *)\n";
1360	  say "structure Internal =\n\tstruct\n";
1361	  maketable(fins,tctab,tcpairs,trans);
1362	  say "structure StartStates =\n\tstruct\n";
1363	  say "\tdatatype yystartstate = STARTSTATE of int\n";
1364	  makebegin();
1365	  say "\nend\n";
1366	  say "type result = UserDeclarations.lexresult\n";
1367	  say "\texception LexerError (* raised if illegal leaf ";
1368	  say "action tried *)\n";
1369	  say "end\n\n";
1370	  say "type int = Int.int\n";
1371	  say (if (!PosArg) then "fun makeLexer (yyinput: int -> string,yygone0:int) =\nlet\n"
1372		else "fun makeLexer (yyinput: int -> string) =\nlet\tval yygone0:int=0\n");
1373	  if !CountNewLines then say "\tval yylineno: int ref = ref 0\n\n" else ();
1374	  say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
1375	  \\tval yybl: int ref = ref 1\t\t(*buffer length *)\n\
1376	  \\tval yybufpos: int ref = ref 1\t\t(* location of next character to use *)\n\
1377	  \\tval yygone: int ref = ref yygone0\t(* position in file of beginning of buffer *)\n\
1378	  \\tval yydone = ref false\t\t(* eof found yet? *)\n\
1379	  \\tval yybegin: int ref = ref 1\t\t(*Current 'start state' for lexer *)\n\
1380  	  \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
1381	  \\t\t yybegin := x\n\n";
1382	  PrintLexer(ends);
1383	  close_ibuf(!LexBuf);
1384	   TextIO.closeOut(!LexOut)
1385	 end)
1386    end
1387end
1388