1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) 2 3(* parser.sml: This is a parser driver for LR tables with an error-recovery 4 routine added to it. The routine used is described in detail in this 5 article: 6 7 'A Practical Method for LR and LL Syntactic Error Diagnosis and 8 Recovery', by M. Burke and G. Fisher, ACM Transactions on 9 Programming Langauges and Systems, Vol. 9, No. 2, April 1987, 10 pp. 164-197. 11 12 This program is an implementation is the partial, deferred method discussed 13 in the article. The algorithm and data structures used in the program 14 are described below. 15 16 This program assumes that all semantic actions are delayed. A semantic 17 action should produce a function from unit -> value instead of producing the 18 normal value. The parser returns the semantic value on the top of the 19 stack when accept is encountered. The user can deconstruct this value 20 and apply the unit -> value function in it to get the answer. 21 22 It also assumes that the lexer is a lazy stream. 23 24 Data Structures: 25 ---------------- 26 27 * The parser: 28 29 The state stack has the type 30 31 (state * (semantic value * line # * line #)) list 32 33 The parser keeps a queue of (state stack * lexer pair). A lexer pair 34 consists of a terminal * value pair and a lexer. This allows the 35 parser to reconstruct the states for terminals to the left of a 36 syntax error, and attempt to make error corrections there. 37 38 The queue consists of a pair of lists (x,y). New additions to 39 the queue are cons'ed onto y. The first element of x is the top 40 of the queue. If x is nil, then y is reversed and used 41 in place of x. 42 43 Algorithm: 44 ---------- 45 46 * The steady-state parser: 47 48 This parser keeps the length of the queue of state stacks at 49 a steady state by always removing an element from the front when 50 another element is placed on the end. 51 52 It has these arguments: 53 54 stack: current stack 55 queue: value of the queue 56 lexPair ((terminal,value),lex stream) 57 58 When SHIFT is encountered, the state to shift to and the value are 59 are pushed onto the state stack. The state stack and lexPair are 60 placed on the queue. The front element of the queue is removed. 61 62 When REDUCTION is encountered, the rule is applied to the current 63 stack to yield a triple (nonterm,value,new stack). A new 64 stack is formed by adding (goto(top state of stack,nonterm),value) 65 to the stack. 66 67 When ACCEPT is encountered, the top value from the stack and the 68 lexer are returned. 69 70 When an ERROR is encountered, fixError is called. FixError 71 takes the arguments to the parser, fixes the error if possible and 72 returns a new set of arguments. 73 74 * The distance-parser: 75 76 This parser includes an additional argument distance. It pushes 77 elements on the queue until it has parsed distance tokens, or an 78 ACCEPT or ERROR occurs. It returns a stack, lexer, the number of 79 tokens left unparsed, a queue, and an action option. 80*) 81 82signature FIFO = 83 sig type 'a queue 84 val empty : 'a queue 85 exception Empty 86 val get : 'a queue -> 'a * 'a queue 87 val put : 'a * 'a queue -> 'a queue 88 end 89 90(* drt (12/15/89) -- the functor should be used in development work, but 91 it wastes space in the release version. 92 93functor ParserGen(structure LrTable : LR_TABLE 94 structure Stream : STREAM) : LR_PARSER = 95*) 96 97structure LrParser :> LR_PARSER = 98 struct 99 structure LrTable = LrTable 100 structure Stream = Stream 101 102 fun eqT (LrTable.T i, LrTable.T i') = i = i' 103 104 structure Token : TOKEN = 105 struct 106 structure LrTable = LrTable 107 datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) 108 val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t') 109 end 110 111 open LrTable 112 open Token 113 114 val DEBUG1 = false 115 val DEBUG2 = false 116 exception ParseError 117 exception ParseImpossible of int 118 119 structure Fifo :> FIFO = 120 struct 121 type 'a queue = ('a list * 'a list) 122 val empty = (nil,nil) 123 exception Empty 124 fun get(a::x, y) = (a, (x,y)) 125 | get(nil, nil) = raise Empty 126 | get(nil, y) = get(rev y, nil) 127 fun put(a,(x,y)) = (x,a::y) 128 end 129 130 type ('a,'b) elem = (state * ('a * 'b * 'b)) 131 type ('a,'b) stack = ('a,'b) elem list 132 type ('a,'b) lexv = ('a,'b) token 133 type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream) 134 type ('a,'b) distanceParse = 135 ('a,'b) lexpair * 136 ('a,'b) stack * 137 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * 138 int -> 139 ('a,'b) lexpair * 140 ('a,'b) stack * 141 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * 142 int * 143 action option 144 145 type ('a,'b) ecRecord = 146 {is_keyword : term -> bool, 147 preferred_change : (term list * term list) list, 148 error : string * 'b * 'b -> unit, 149 errtermvalue : term -> 'a, 150 terms : term list, 151 showTerminal : term -> string, 152 noShift : term -> bool} 153 154 local 155 val print = fn s => TextIO.output(TextIO.stdOut,s) 156 val println = fn s => (print s; print "\n") 157 val showState = fn (STATE s) => "STATE " ^ (Int.toString s) 158 in 159 fun printStack(stack: ('a,'b) stack, n: int) = 160 case stack 161 of (state,_) :: rest => 162 (print("\t" ^ Int.toString n ^ ": "); 163 println(showState state); 164 printStack(rest, n+1)) 165 | nil => () 166 167 fun prAction showTerminal 168 (stack as (state,_) :: _, next as (TOKEN (term,_),_), action) = 169 (println "Parse: state stack:"; 170 printStack(stack, 0); 171 print(" state=" 172 ^ showState state 173 ^ " next=" 174 ^ showTerminal term 175 ^ " action=" 176 ); 177 case action 178 of SHIFT state => println ("SHIFT " ^ (showState state)) 179 | REDUCE i => println ("REDUCE " ^ (Int.toString i)) 180 | ERROR => println "ERROR" 181 | ACCEPT => println "ACCEPT") 182 | prAction _ (_,_,action) = () 183 end 184 185 (* ssParse: parser which maintains the queue of (state * lexvalues) in a 186 steady-state. It takes a table, showTerminal function, saction 187 function, and fixError function. It parses until an ACCEPT is 188 encountered, or an exception is raised. When an error is encountered, 189 fixError is called with the arguments of parseStep (lexv,stack,and 190 queue). It returns the lexv, and a new stack and queue adjusted so 191 that the lexv can be parsed *) 192 193 val ssParse = 194 fn (table,showTerminal,saction,fixError,arg) => 195 let val prAction = prAction showTerminal 196 val action = LrTable.action table 197 val goto = LrTable.goto table 198 fun parseStep(args as 199 (lexPair as (TOKEN (terminal, value as (_,leftPos,_)), 200 lexer 201 ), 202 stack as (state,_) :: _, 203 queue)) = 204 let val nextAction = action (state,terminal) 205 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) 206 else () 207 in case nextAction 208 of SHIFT s => 209 let val newStack = (s,value) :: stack 210 val newLexPair = Stream.get lexer 211 val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair), 212 queue)) 213 in parseStep(newLexPair,(s,value)::stack,newQueue) 214 end 215 | REDUCE i => 216 (case saction(i,leftPos,stack,arg) 217 of (nonterm,value,stack as (state,_) :: _) => 218 parseStep(lexPair,(goto(state,nonterm),value)::stack, 219 queue) 220 | _ => raise (ParseImpossible 197)) 221 | ERROR => parseStep(fixError args) 222 | ACCEPT => 223 (case stack 224 of (_,(topvalue,_,_)) :: _ => 225 let val (token,restLexer) = lexPair 226 in (topvalue,Stream.cons(token,restLexer)) 227 end 228 | _ => raise (ParseImpossible 202)) 229 end 230 | parseStep _ = raise (ParseImpossible 204) 231 in parseStep 232 end 233 234 (* distanceParse: parse until n tokens are shifted, or accept or 235 error are encountered. Takes a table, showTerminal function, and 236 semantic action function. Returns a parser which takes a lexPair 237 (lex result * lexer), a state stack, a queue, and a distance 238 (must be > 0) to parse. The parser returns a new lex-value, a stack 239 with the nth token shifted on top, a queue, a distance, and action 240 option. *) 241 242 val distanceParse = 243 fn (table,showTerminal,saction,arg) => 244 let val prAction = prAction showTerminal 245 val action = LrTable.action table 246 val goto = LrTable.goto table 247 fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE) 248 | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)), 249 lexer 250 ), 251 stack as (state,_) :: _, 252 queue,distance) = 253 let val nextAction = action(state,terminal) 254 val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) 255 else () 256 in case nextAction 257 of SHIFT s => 258 let val newStack = (s,value) :: stack 259 val newLexPair = Stream.get lexer 260 in parseStep(newLexPair,(s,value)::stack, 261 Fifo.put((newStack,newLexPair),queue),distance-1) 262 end 263 | REDUCE i => 264 (case saction(i,leftPos,stack,arg) 265 of (nonterm,value,stack as (state,_) :: _) => 266 parseStep(lexPair,(goto(state,nonterm),value)::stack, 267 queue,distance) 268 | _ => raise (ParseImpossible 240)) 269 | ERROR => (lexPair,stack,queue,distance,SOME nextAction) 270 | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction) 271 end 272 | parseStep _ = raise (ParseImpossible 242) 273 in parseStep : ('_a,'_b) distanceParse 274 end 275 276(* mkFixError: function to create fixError function which adjusts parser state 277 so that parse may continue in the presence of an error *) 278 279fun mkFixError({is_keyword,terms,errtermvalue, 280 preferred_change,noShift, 281 showTerminal,error,...} : ('_a,'_b) ecRecord, 282 distanceParse : ('_a,'_b) distanceParse, 283 minAdvance,maxAdvance) 284 285 (lexv as (TOKEN (term,value as (_,leftPos,_)),_),stack,queue) = 286 let val _ = if DEBUG2 then 287 error("syntax error found at " ^ (showTerminal term), 288 leftPos,leftPos) 289 else () 290 291 fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p)) 292 293 val minDelta = 3 294 295 (* pull all the state * lexv elements from the queue *) 296 297 val stateList = 298 let fun f q = let val (elem,newQueue) = Fifo.get q 299 in elem :: (f newQueue) 300 end handle Fifo.Empty => nil 301 in f queue 302 end 303 304 (* now number elements of stateList, giving distance from 305 error token *) 306 307 val (_, numStateList) = 308 List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList 309 310 (* Represent the set of potential changes as a linked list. 311 312 Values of datatype Change hold information about a potential change. 313 314 oper = oper to be applied 315 pos = the # of the element in stateList that would be altered. 316 distance = the number of tokens beyond the error token which the 317 change allows us to parse. 318 new = new terminal * value pair at that point 319 orig = original terminal * value pair at the point being changed. 320 *) 321 322 datatype ('a,'b) change = CHANGE of 323 {pos : int, distance : int, leftPos: 'b, rightPos: 'b, 324 new : ('a,'b) lexv list, orig : ('a,'b) lexv list} 325 326 327 val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t) 328 329 val printChange = fn c => 330 let val CHANGE {distance,new,orig,pos,...} = c 331 in (print ("{distance= " ^ (Int.toString distance)); 332 print (",orig ="); print(showTerms orig); 333 print (",new ="); print(showTerms new); 334 print (",pos= " ^ (Int.toString pos)); 335 print "}\n") 336 end 337 338 val printChangeList = app printChange 339 340(* parse: given a lexPair, a stack, and the distance from the error 341 token, return the distance past the error token that we are able to parse.*) 342 343 fun parse (lexPair,stack,queuePos : int) = 344 case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1) 345 of (_,_,_,distance,SOME ACCEPT) => 346 if maxAdvance-distance-1 >= 0 347 then maxAdvance 348 else maxAdvance-distance-1 349 | (_,_,_,distance,_) => maxAdvance - distance - 1 350 351(* catList: concatenate results of scanning list *) 352 353 fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l 354 355 fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new 356 then minDelta else 0 357 358 fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} = 359 let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new 360 val distance = parse(lex',stack,pos+length new-length orig) 361 in if distance >= minAdvance + keywordsDelta new 362 then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos, 363 distance=distance,orig=orig,new=new}] 364 else [] 365 end 366 367 368(* tryDelete: Try to delete n terminals. 369 Return single-element [success] or nil. 370 Do not delete unshiftable terminals. *) 371 372 373 fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) = 374 let fun del(0,accum,left,right,lexPair) = 375 tryChange{lex=lexPair,stack=stack, 376 pos=qPos,leftPos=left,rightPos=right, 377 orig=rev accum, new=[]} 378 | del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) = 379 if noShift term then [] 380 else del(n-1,tok::accum,left,r,Stream.get lexer) 381 in del(n,[],l,r,lexPair) 382 end 383 384(* tryInsert: try to insert tokens before the current terminal; 385 return a list of the successes *) 386 387 fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) = 388 catList terms (fn t => 389 tryChange{lex=lexPair,stack=stack, 390 pos=queuePos,orig=[],new=[tokAt(t,l)], 391 leftPos=l,rightPos=l}) 392 393(* trySubst: try to substitute tokens for the current terminal; 394 return a list of the successes *) 395 396 fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)), 397 queuePos) = 398 if noShift term then [] 399 else 400 catList terms (fn t => 401 tryChange{lex=Stream.get lexer,stack=stack, 402 pos=queuePos, 403 leftPos=l,rightPos=r,orig=[orig], 404 new=[tokAt(t,r)]}) 405 406 (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair". 407 If it succeeds, returns SOME(toks',l,r,lp), where 408 toks' is the actual tokens (with positions and values) deleted, 409 (l,r) are the (leftmost,rightmost) position of toks', 410 lp is what remains of the stream after deletion 411 *) 412 fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp) 413 | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) = 414 if eqT (t, t') 415 then SOME([tok],l,r,Stream.get lp') 416 else NONE 417 | do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) = 418 if eqT (t,t') 419 then case do_delete(rest,Stream.get lp') 420 of SOME(deleted,l',r',lp'') => 421 SOME(tok::deleted,l,r',lp'') 422 | NONE => NONE 423 else NONE 424 425 fun tryPreferred((stack,lexPair),queuePos) = 426 catList preferred_change (fn (delete,insert) => 427 if List.exists noShift delete then [] (* should give warning at 428 parser-generation time *) 429 else case do_delete(delete,lexPair) 430 of SOME(deleted,l,r,lp) => 431 tryChange{lex=lp,stack=stack,pos=queuePos, 432 leftPos=l,rightPos=r,orig=deleted, 433 new=map (fn t=>(tokAt(t,r))) insert} 434 | NONE => []) 435 436 val changes = catList numStateList tryPreferred @ 437 catList numStateList tryInsert @ 438 catList numStateList trySubst @ 439 catList numStateList (tryDelete 1) @ 440 catList numStateList (tryDelete 2) @ 441 catList numStateList (tryDelete 3) 442 443 val findMaxDist = fn l => 444 foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l 445 446(* maxDist: max distance past error taken that we could parse *) 447 448 val maxDist = findMaxDist changes 449 450(* remove changes which did not parse maxDist tokens past the error token *) 451 452 val changes = catList changes 453 (fn(c as CHANGE{distance,...}) => 454 if distance=maxDist then [c] else []) 455 456 in case changes 457 of (l as change :: _) => 458 let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) = 459 let val s = 460 case (orig,new) 461 of (_::_,[]) => "deleting " ^ (showTerms orig) 462 | ([],_::_) => "inserting " ^ (showTerms new) 463 | _ => "replacing " ^ (showTerms orig) ^ 464 " with " ^ (showTerms new) 465 in error ("syntax error: " ^ s,leftPos,rightPos) 466 end 467 468 val _ = 469 (if length l > 1 andalso DEBUG2 then 470 (print "multiple fixes possible; could fix it by:\n"; 471 app print_msg l; 472 print "chosen correction:\n") 473 else (); 474 print_msg change) 475 476 (* findNth: find nth queue entry from the error 477 entry. Returns the Nth queue entry and the portion of 478 the queue from the beginning to the nth-1 entry. The 479 error entry is at the end of the queue. 480 481 Examples: 482 483 queue = a b c d e 484 findNth 0 = (e,a b c d) 485 findNth 1 = (d,a b c) 486 *) 487 488 val findNth = fn n => 489 let fun f (h::t,0) = (h,rev t) 490 | f (h::t,n) = f(t,n-1) 491 | f (nil,_) = let exception FindNth 492 in raise FindNth 493 end 494 in f (rev stateList,n) 495 end 496 497 val CHANGE {pos,orig,new,...} = change 498 val (last,queueFront) = findNth pos 499 val (stack,lexPair) = last 500 501 val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig 502 val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new 503 504 val restQueue = 505 Fifo.put((stack,lp2), 506 foldl Fifo.put Fifo.empty queueFront) 507 508 val (lexPair,stack,queue,_,_) = 509 distanceParse(lp2,stack,restQueue,pos) 510 511 in (lexPair,stack,queue) 512 end 513 | nil => (error("syntax error found at " ^ (showTerminal term), 514 leftPos,leftPos); raise ParseError) 515 end 516 517 val parse = fn {arg,table,lexer,saction,void,lookahead, 518 ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} => 519 let val distance = 15 (* defer distance tokens *) 520 val minAdvance = 1 (* must parse at least 1 token past error *) 521 val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *) 522 val lexPair = Stream.get lexer 523 val (TOKEN (_,(_,leftPos,_)),_) = lexPair 524 val startStack = [(initialState table,(void,leftPos,leftPos))] 525 val startQueue = Fifo.put((startStack,lexPair),Fifo.empty) 526 val distanceParse = distanceParse(table,showTerminal,saction,arg) 527 val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance) 528 val ssParse = ssParse(table,showTerminal,saction,fixError,arg) 529 fun loop (lexPair,stack,queue,_,SOME ACCEPT) = 530 ssParse(lexPair,stack,queue) 531 | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue) 532 | loop (lexPair,stack,queue,distance,SOME ERROR) = 533 let val (lexPair,stack,queue) = fixError(lexPair,stack,queue) 534 in loop (distanceParse(lexPair,stack,queue,distance)) 535 end 536 | loop _ = let exception ParseInternal 537 in raise ParseInternal 538 end 539 in loop (distanceParse(lexPair,startStack,startQueue,distance)) 540 end 541 end; 542 543