1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 2 * 3 * $Log$ 4 * Revision 1.1 2006/06/23 03:21:27 michaeln 5 * Changed the names of the files in mlyacclib because I want these files 6 * to move into sigobj, and I don't want name-clashes, particularly with 7 * names like stream.sml. (If you use a parser generated by mlyacc, then 8 * you need to have the files in mlyacclib available too.) 9 * 10 * Revision 1.1 2006/06/22 07:40:27 michaeln 11 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 12 * as the base. 13 * 14 * Revision 1.2 1997/08/26 19:18:54 jhr 15 * Replaced used of "abstraction" with ":>". 16 * 17# Revision 1.1.1.1 1997/01/14 01:38:04 george 18# Version 109.24 19# 20 * Revision 1.3 1996/10/03 03:36:58 jhr 21 * Qualified identifiers that are no-longer top-level (quot, rem, min, max). 22 * 23 * Revision 1.2 1996/02/26 15:02:29 george 24 * print no longer overloaded. 25 * use of makestring has been removed and replaced with Int.toString .. 26 * use of IO replaced with TextIO 27 * 28 * Revision 1.1.1.1 1996/01/31 16:01:42 george 29 * Version 109 30 * 31 *) 32 33(* parser.sml: This is a parser driver for LR tables with an error-recovery 34 routine added to it. The routine used is described in detail in this 35 article: 36 37 'A Practical Method for LR and LL Syntactic Error Diagnosis and 38 Recovery', by M. Burke and G. Fisher, ACM Transactions on 39 Programming Langauges and Systems, Vol. 9, No. 2, April 1987, 40 pp. 164-197. 41 42 This program is an implementation is the partial, deferred method discussed 43 in the article. The algorithm and data structures used in the program 44 are described below. 45 46 This program assumes that all semantic actions are delayed. A semantic 47 action should produce a function from unit -> value instead of producing the 48 normal value. The parser returns the semantic value on the top of the 49 stack when accept is encountered. The user can deconstruct this value 50 and apply the unit -> value function in it to get the answer. 51 52 It also assumes that the lexer is a lazy stream. 53 54 Data Structures: 55 ---------------- 56 57 * The parser: 58 59 The state stack has the type 60 61 (state * (semantic value * line # * line #)) list 62 63 The parser keeps a queue of (state stack * lexer pair). A lexer pair 64 consists of a terminal * value pair and a lexer. This allows the 65 parser to reconstruct the states for terminals to the left of a 66 syntax error, and attempt to make error corrections there. 67 68 The queue consists of a pair of lists (x,y). New additions to 69 the queue are cons'ed onto y. The first element of x is the top 70 of the queue. If x is nil, then y is reversed and used 71 in place of x. 72 73 Algorithm: 74 ---------- 75 76 * The steady-state parser: 77 78 This parser keeps the length of the queue of state stacks at 79 a steady state by always removing an element from the front when 80 another element is placed on the end. 81 82 It has these arguments: 83 84 stack: current stack 85 queue: value of the queue 86 lexPair ((terminal,value),lex stream) 87 88 When SHIFT is encountered, the state to shift to and the value are 89 are pushed onto the state stack. The state stack and lexPair are 90 placed on the queue. The front element of the queue is removed. 91 92 When REDUCTION is encountered, the rule is applied to the current 93 stack to yield a triple (nonterm,value,new stack). A new 94 stack is formed by adding (goto(top state of stack,nonterm),value) 95 to the stack. 96 97 When ACCEPT is encountered, the top value from the stack and the 98 lexer are returned. 99 100 When an ERROR is encountered, fixError is called. FixError 101 takes the arguments to the parser, fixes the error if possible and 102 returns a new set of arguments. 103 104 * The distance-parser: 105 106 This parser includes an additional argument distance. It pushes 107 elements on the queue until it has parsed distance tokens, or an 108 ACCEPT or ERROR occurs. It returns a stack, lexer, the number of 109 tokens left unparsed, a queue, and an action option. 110*) 111 112signature FIFO = 113 sig type 'a queue 114 val empty : 'a queue 115 exception Empty 116 val get : 'a queue -> 'a * 'a queue 117 val put : 'a * 'a queue -> 'a queue 118 end 119 120(* drt (12/15/89) -- the functor should be used in development work, but 121 it wastes space in the release version. 122 123functor ParserGen(structure LrTable : LR_TABLE 124 structure Stream : STREAM) : LR_PARSER = 125*) 126 127structure LrParser :> LR_PARSER = 128 struct 129 structure LrTable = LrTable 130 structure Stream = Stream 131 132 structure Token : TOKEN = 133 struct 134 structure LrTable = LrTable 135 datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) 136 val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => t=t' 137 end 138 139 open LrTable 140 open Token 141 142 val DEBUG1 = false 143 val DEBUG2 = false 144 exception ParseError 145 exception ParseImpossible of int 146 147 structure Fifo :> FIFO = 148 struct 149 type 'a queue = ('a list * 'a list) 150 val empty = (nil,nil) 151 exception Empty 152 fun get(a::x, y) = (a, (x,y)) 153 | get(nil, nil) = raise Empty 154 | get(nil, y) = get(rev y, nil) 155 fun put(a,(x,y)) = (x,a::y) 156 end 157 158 type ('a,'b) elem = (state * ('a * 'b * 'b)) 159 type ('a,'b) stack = ('a,'b) elem list 160 type ('a,'b) lexv = ('a,'b) token 161 type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream) 162 type ('a,'b) distanceParse = 163 ('a,'b) lexpair * 164 ('a,'b) stack * 165 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * 166 int -> 167 ('a,'b) lexpair * 168 ('a,'b) stack * 169 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * 170 int * 171 action option 172 173 type ('a,'b) ecRecord = 174 {is_keyword : term -> bool, 175 preferred_change : (term list * term list) list, 176 error : string * 'b * 'b -> unit, 177 errtermvalue : term -> 'a, 178 terms : term list, 179 showTerminal : term -> string, 180 noShift : term -> bool} 181 182 local 183 val print = fn s => TextIO.output(TextIO.stdOut,s) 184 val println = fn s => (print s; print "\n") 185 val showState = fn (STATE s) => "STATE " ^ (Int.toString s) 186 in 187 fun printStack(stack: ('a,'b) stack, n: int) = 188 case stack 189 of (state,_) :: rest => 190 (print("\t" ^ Int.toString n ^ ": "); 191 println(showState state); 192 printStack(rest, n+1)) 193 | nil => () 194 195 fun prAction showTerminal 196 (stack as (state,_) :: _, (TOKEN (term,_), _), action) = 197 (println "Parse: state stack:"; 198 printStack(stack, 0); 199 print(" state=" 200 ^ showState state 201 ^ " next=" 202 ^ showTerminal term 203 ^ " action=" 204 ); 205 case action 206 of SHIFT state => println ("SHIFT " ^ (showState state)) 207 | REDUCE i => println ("REDUCE " ^ (Int.toString i)) 208 | ERROR => println "ERROR" 209 | ACCEPT => println "ACCEPT") 210 | prAction _ _ = () 211 end 212 213 (* ssParse: parser which maintains the queue of (state * lexvalues) in a 214 steady-state. It takes a table, showTerminal function, saction 215 function, and fixError function. It parses until an ACCEPT is 216 encountered, or an exception is raised. When an error is encountered, 217 fixError is called with the arguments of parseStep (lexv,stack,and 218 queue). It returns the lexv, and a new stack and queue adjusted so 219 that the lexv can be parsed *) 220 221 val ssParse = 222 fn (table,showTerminal,saction,fixError,arg) => 223 let val prAction = prAction showTerminal 224 val action = LrTable.action table 225 val goto = LrTable.goto table 226 fun parseStep(args as 227 (lexPair as (TOKEN (terminal, value as (_,leftPos,_)), 228 lexer 229 ), 230 stack as (state,_) :: _, 231 queue)) = 232 let val nextAction = action (state,terminal) 233 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) 234 else () 235 in case nextAction 236 of SHIFT s => 237 let val newStack = (s,value) :: stack 238 val newLexPair = Stream.get lexer 239 val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair), 240 queue)) 241 in parseStep(newLexPair,(s,value)::stack,newQueue) 242 end 243 | REDUCE i => 244 (case saction(i,leftPos,stack,arg) 245 of (nonterm,value,stack as (state,_) :: _) => 246 parseStep(lexPair,(goto(state,nonterm),value)::stack, 247 queue) 248 | _ => raise (ParseImpossible 197)) 249 | ERROR => parseStep(fixError args) 250 | ACCEPT => 251 (case stack 252 of (_,(topvalue,_,_)) :: _ => 253 let val (token,restLexer) = lexPair 254 in (topvalue,Stream.cons(token,restLexer)) 255 end 256 | _ => raise (ParseImpossible 202)) 257 end 258 | parseStep _ = raise (ParseImpossible 204) 259 in parseStep 260 end 261 262 (* distanceParse: parse until n tokens are shifted, or accept or 263 error are encountered. Takes a table, showTerminal function, and 264 semantic action function. Returns a parser which takes a lexPair 265 (lex result * lexer), a state stack, a queue, and a distance 266 (must be > 0) to parse. The parser returns a new lex-value, a stack 267 with the nth token shifted on top, a queue, a distance, and action 268 option. *) 269 270 val distanceParse = 271 fn (table,showTerminal,saction,arg) => 272 let val prAction = prAction showTerminal 273 val action = LrTable.action table 274 val goto = LrTable.goto table 275 fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE) 276 | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)), 277 lexer 278 ), 279 stack as (state,_) :: _, 280 queue,distance) = 281 let val nextAction = action(state,terminal) 282 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) 283 else () 284 in case nextAction 285 of SHIFT s => 286 let val newStack = (s,value) :: stack 287 val newLexPair = Stream.get lexer 288 in parseStep(newLexPair,(s,value)::stack, 289 Fifo.put((newStack,newLexPair),queue),distance-1) 290 end 291 | REDUCE i => 292 (case saction(i,leftPos,stack,arg) 293 of (nonterm,value,stack as (state,_) :: _) => 294 parseStep(lexPair,(goto(state,nonterm),value)::stack, 295 queue,distance) 296 | _ => raise (ParseImpossible 240)) 297 | ERROR => (lexPair,stack,queue,distance,SOME nextAction) 298 | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction) 299 end 300 | parseStep _ = raise (ParseImpossible 242) 301 in parseStep : ('_a,'_b) distanceParse 302 end 303 304(* mkFixError: function to create fixError function which adjusts parser state 305 so that parse may continue in the presence of an error *) 306 307fun mkFixError({is_keyword,terms,errtermvalue, 308 preferred_change,noShift, 309 showTerminal,error,...} : ('_a,'_b) ecRecord, 310 distanceParse : ('_a,'_b) distanceParse, 311 minAdvance,maxAdvance) 312 313 ((TOKEN (term, (_, leftPos, _)), _), _, queue) = 314 let val _ = if DEBUG2 then 315 error("syntax error found at " ^ (showTerminal term), 316 leftPos,leftPos) 317 else () 318 319 fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p)) 320 321 val minDelta = 3 322 323 (* pull all the state * lexv elements from the queue *) 324 325 val stateList = 326 let fun f q = let val (elem,newQueue) = Fifo.get q 327 in elem :: (f newQueue) 328 end handle Fifo.Empty => nil 329 in f queue 330 end 331 332 (* now number elements of stateList, giving distance from 333 error token *) 334 335 val (_, numStateList) = 336 List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList 337 338 (* Represent the set of potential changes as a linked list. 339 340 Values of datatype Change hold information about a potential change. 341 342 oper = oper to be applied 343 pos = the # of the element in stateList that would be altered. 344 distance = the number of tokens beyond the error token which the 345 change allows us to parse. 346 new = new terminal * value pair at that point 347 orig = original terminal * value pair at the point being changed. 348 *) 349 350 datatype ('a,'b) change = CHANGE of 351 {pos : int, distance : int, leftPos: 'b, rightPos: 'b, 352 new : ('a,'b) lexv list, orig : ('a,'b) lexv list} 353 354 355 val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t) 356 357(* parse: given a lexPair, a stack, and the distance from the error 358 token, return the distance past the error token that we are able to parse.*) 359 360 fun parse (lexPair,stack,queuePos : int) = 361 case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1) 362 of (_,_,_,distance,SOME ACCEPT) => 363 if maxAdvance-distance-1 >= 0 364 then maxAdvance 365 else maxAdvance-distance-1 366 | (_,_,_,distance,_) => maxAdvance - distance - 1 367 368(* catList: concatenate results of scanning list *) 369 370 fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l 371 372 fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new 373 then minDelta else 0 374 375 fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} = 376 let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new 377 val distance = parse(lex',stack, 378 pos + List.length new - List.length orig) 379 in if distance >= minAdvance + keywordsDelta new 380 then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos, 381 distance=distance,orig=orig,new=new}] 382 else [] 383 end 384 385 386(* tryDelete: Try to delete n terminals. 387 Return single-element [success] or nil. 388 Do not delete unshiftable terminals. *) 389 390 391 fun tryDelete n ((stack, lexPair as (TOKEN (_, (_, l, r)), _)), qPos) = 392 let fun del(0,accum,left,right,lexPair) = 393 tryChange{lex=lexPair,stack=stack, 394 pos=qPos,leftPos=left,rightPos=right, 395 orig=rev accum, new=[]} 396 | del(n,accum,left,_,(tok as TOKEN(term,(_,_,r)),lexer)) = 397 if noShift term then [] 398 else del(n-1,tok::accum,left,r,Stream.get lexer) 399 in del(n,[],l,r,lexPair) 400 end 401 402(* tryInsert: try to insert tokens before the current terminal; 403 return a list of the successes *) 404 405 fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) = 406 catList terms (fn t => 407 tryChange{lex=lexPair,stack=stack, 408 pos=queuePos,orig=[],new=[tokAt(t,l)], 409 leftPos=l,rightPos=l}) 410 411(* trySubst: try to substitute tokens for the current terminal; 412 return a list of the successes *) 413 414 fun trySubst ((stack, (orig as TOKEN (term,(_,l,r)),lexer)), 415 queuePos) = 416 if noShift term then [] 417 else 418 catList terms (fn t => 419 tryChange{lex=Stream.get lexer,stack=stack, 420 pos=queuePos, 421 leftPos=l,rightPos=r,orig=[orig], 422 new=[tokAt(t,r)]}) 423 424 (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair". 425 If it succeeds, returns SOME(toks',l,r,lp), where 426 toks' is the actual tokens (with positions and values) deleted, 427 (l,r) are the (leftmost,rightmost) position of toks', 428 lp is what remains of the stream after deletion 429 *) 430 fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp) 431 | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) = 432 if t=t' 433 then SOME([tok],l,r,Stream.get lp') 434 else NONE 435 | do_delete(t::rest,(tok as TOKEN(t',(_,l,_)),lp')) = 436 if t=t' 437 then case do_delete(rest,Stream.get lp') 438 of SOME(deleted,_,r',lp'') => 439 SOME(tok::deleted,l,r',lp'') 440 | NONE => NONE 441 else NONE 442 443 fun tryPreferred((stack,lexPair),queuePos) = 444 catList preferred_change (fn (delete,insert) => 445 if List.exists noShift delete then [] (* should give warning at 446 parser-generation time *) 447 else case do_delete(delete,lexPair) 448 of SOME(deleted,l,r,lp) => 449 tryChange{lex=lp,stack=stack,pos=queuePos, 450 leftPos=l,rightPos=r,orig=deleted, 451 new=map (fn t=>(tokAt(t,r))) insert} 452 | NONE => []) 453 454 val changes = catList numStateList tryPreferred @ 455 catList numStateList tryInsert @ 456 catList numStateList trySubst @ 457 catList numStateList (tryDelete 1) @ 458 catList numStateList (tryDelete 2) @ 459 catList numStateList (tryDelete 3) 460 461 val findMaxDist = fn l => 462 foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l 463 464(* maxDist: max distance past error taken that we could parse *) 465 466 val maxDist = findMaxDist changes 467 468(* remove changes which did not parse maxDist tokens past the error token *) 469 470 val changes = catList changes 471 (fn(c as CHANGE{distance,...}) => 472 if distance=maxDist then [c] else []) 473 474 in case changes 475 of (l as change :: _) => 476 let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) = 477 let val s = 478 case (orig,new) 479 of (_::_,[]) => "deleting " ^ (showTerms orig) 480 | ([],_::_) => "inserting " ^ (showTerms new) 481 | _ => "replacing " ^ (showTerms orig) ^ 482 " with " ^ (showTerms new) 483 in error ("syntax error: " ^ s,leftPos,rightPos) 484 end 485 486 val _ = 487 (if length l > 1 andalso DEBUG2 then 488 (print "multiple fixes possible; could fix it by:\n"; 489 app print_msg l; 490 print "chosen correction:\n") 491 else (); 492 print_msg change) 493 494 (* findNth: find nth queue entry from the error 495 entry. Returns the Nth queue entry and the portion of 496 the queue from the beginning to the nth-1 entry. The 497 error entry is at the end of the queue. 498 499 Examples: 500 501 queue = a b c d e 502 findNth 0 = (e,a b c d) 503 findNth 1 = (d,a b c) 504 *) 505 506 val findNth = fn n => 507 let fun f (h::t,0) = (h,rev t) 508 | f (_::t,n) = f(t,n-1) 509 | f (nil,_) = let exception FindNth 510 in raise FindNth 511 end 512 in f (rev stateList,n) 513 end 514 515 val CHANGE {pos,orig,new,...} = change 516 val (last,queueFront) = findNth pos 517 val (stack,lexPair) = last 518 519 val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig 520 val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new 521 522 val restQueue = 523 Fifo.put((stack,lp2), 524 foldl Fifo.put Fifo.empty queueFront) 525 526 val (lexPair,stack,queue,_,_) = 527 distanceParse(lp2,stack,restQueue,pos) 528 529 in (lexPair,stack,queue) 530 end 531 | nil => (error("syntax error found at " ^ (showTerminal term), 532 leftPos,leftPos); raise ParseError) 533 end 534 535 val parse = fn {arg,table,lexer,saction,void,lookahead, 536 ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} => 537 let val distance = 15 (* defer distance tokens *) 538 val minAdvance = 1 (* must parse at least 1 token past error *) 539 val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *) 540 val lexPair = Stream.get lexer 541 val (TOKEN (_,(_,leftPos,_)),_) = lexPair 542 val startStack = [(initialState table,(void,leftPos,leftPos))] 543 val startQueue = Fifo.put((startStack,lexPair),Fifo.empty) 544 val distanceParse = distanceParse(table,showTerminal,saction,arg) 545 val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance) 546 val ssParse = ssParse(table,showTerminal,saction,fixError,arg) 547 fun loop (lexPair,stack,queue,_,SOME ACCEPT) = 548 ssParse(lexPair,stack,queue) 549 | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue) 550 | loop (lexPair,stack,queue,distance,SOME ERROR) = 551 let val (lexPair,stack,queue) = fixError(lexPair,stack,queue) 552 in loop (distanceParse(lexPair,stack,queue,distance)) 553 end 554 | loop _ = let exception ParseInternal 555 in raise ParseInternal 556 end 557 in loop (distanceParse(lexPair,startStack,startQueue,distance)) 558 end 559 end; 560 561(* drt (12/15/89) -- needed only when the code above is functorized 562 563structure LrParser = ParserGen(structure LrTable=LrTable 564 structure Stream=Stream); 565*) 566