1# -*- tcl -*- 2# 3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4 5# # ## ### ##### ######## ############# ##################### 6## Package description 7 8## Implementation of the PackRat Machine (PARAM), a virtual machine on 9## top of which parsers for Parsing Expression Grammars (PEGs) can be 10## realized. This implementation is tied to Tcl for control flow. We 11## (will) have alternate implementations written in TclOO, and critcl, 12## all exporting the same API. 13# 14## RD stands for Recursive Descent. 15 16# # ## ### ##### ######## ############# ##################### 17## Requisites 18 19package require Tcl 8.5 20package require TclOO 21package require struct::stack 1.4 ; # Requiring get, trim methods 22package require pt::ast 23package require pt::pe 24 25# # ## ### ##### ######## ############# ##################### 26## Implementation 27 28oo::class create ::pt::rde::oo { 29 30 # # ## ### ##### ######## ############# ##################### 31 ## API - Lifecycle 32 33 constructor {} { 34 set selfns [info object namespace] 35 36 set mystackloc [struct::stack ${selfns}::LOC] ; # LS 37 set mystackerr [struct::stack ${selfns}::ERR] ; # ES 38 set mystackast [struct::stack ${selfns}::AST] ; # ARS/AS 39 set mystackmark [struct::stack ${selfns}::MARK] ; # s.a. 40 41 my reset 42 return 43 } 44 45 method reset {chan} { 46 set mychan $chan ; # IN 47 set myline 1 ; # 48 set mycolumn 0 ; # 49 set mycurrent {} ; # CC 50 set myloc -1 ; # CL 51 set myok 0 ; # ST 52 set msvalue {} ; # SV 53 set myerror {} ; # ER 54 set mytoken {} ; # TC 55 array unset mysymbol * ; # NC 56 57 $mystackloc clear 58 $mystackerr clear 59 $mystackast clear 60 $mystackmark clear 61 return 62 } 63 64 method complete {} { 65 if {$myok} { 66 set n [$mystackast size] 67 if {$n > 1} { 68 set pos [$mystackloc peek] 69 incr pos 70 set children [lreverse [$mystackast peek [$mystackast size]]] ; # SaveToMark 71 return [pt::ast new {} $pos $myloc {*}$children] ; # Reduce ALL 72 } else { 73 return [$mystackast peek] 74 } 75 } else { 76 lassign $myerror loc messages 77 return -code error [list pt::rde $loc [$self position $loc] $messages] 78 } 79 } 80 81 # # ## ### ##### ######## ############# ##################### 82 ## API - State accessors 83 84 method chan {} { return $mychan } 85 method line {} { return $myline } 86 method column {} { return $mycolumn } 87 88 # - - -- --- ----- -------- 89 90 method current {} { return $mycurrent } 91 method location {} { return $myloc } 92 method lmarked {} { return [lreverse [$mystackloc get]] } 93 94 # - - -- --- ----- -------- 95 96 method ok {} { return $myok } 97 method value {} { return $mysvalue } 98 method error {} { return $myerror } 99 method emarked {} { return [lreverse [$mystackerr get]] } 100 101 # - - -- --- ----- -------- 102 103 method tokens {{from {}} {to {}}} { 104 switch -exact [llength [info level 0]] { 105 4 { return $mytoken } 106 5 { return [lrange $mytoken $from $from] } 107 6 { return [lrange $mytoken $from $to] } 108 } 109 } 110 111 method symbols {} { 112 return [array get mysymbol] 113 } 114 115 method scached {} { 116 return [array names mysymbol] 117 } 118 119 # - - -- --- ----- -------- 120 121 method asts {} { return [lreverse [$mystackast get]] } 122 method amarked {} { return [lreverse [$mystackmark get]] } 123 method ast {} { return [$mystackast peek] } 124 125 # - - -- --- ----- -------- 126 127 method position {loc} { 128 return [lrange [lindex $mytoken $loc] 1 2] 129 } 130 131 # # ## ### ##### ######## ############# ##################### 132 ## API - Instructions - Control flow 133 134 method i:ok_continue {} { 135 if {!$myok} return 136 return -code continue 137 } 138 139 method i:fail_continue {} { 140 if {$myok} return 141 return -code continue 142 } 143 144 method i:fail_return {} { 145 if {$myok} return 146 return -code return 147 } 148 149 method i:ok_return {} { 150 if {!$myok} return 151 return -code return 152 } 153 154 # # ## ### ##### ######## ############# ##################### 155 ## API - Instructions - Unconditional matching. 156 157 method i_status_ok {} { 158 set myok 1 159 return 160 } 161 162 method i_status_fail {} { 163 set myok 0 164 return 165 } 166 167 method i_status_negate {} { 168 set myok [expr {!$myok}] 169 return 170 } 171 172 # # ## ### ##### ######## ############# ##################### 173 ## API - Instructions - Error handling. 174 175 method i_error_clear {} { 176 set myerror {} 177 return 178 } 179 180 method i_error_push {} { 181 $mystackerr push $myerror 182 return 183 } 184 185 method i_error_pop_merge {} { 186 set olderror [$mystackerr pop] 187 188 # We have either old or new error data, keep it. 189 190 if {![llength $myerror]} { set myerror $olderror ; return } 191 if {![llength $olderror]} return 192 193 # If one of the errors is further on in the input choose that as 194 # the information to propagate. 195 196 lassign $myerror loe msgse 197 lassign $olderror lon msgsn 198 199 if {$lon > $loe} { set myerror $olderror ; return } 200 if {$loe > $lon} return 201 202 # Equal locations, merge the message lists. 203 #set myerror [list $loe [struct::set union $msgse $msgsn]] 204 set myerror [list $loe [lsort -uniq [list {*}$msgse {*}$msgsn]]] 205 return 206 } 207 208 method i_error_nonterminal {symbol} { 209 # Inlined: Errors, Expected. 210 if {![llength $myerror]} return 211 set pos [$mystackloc peek] 212 incr pos 213 lassign $myerror loc messages 214 if {$loc != $pos} return 215 set myerror [list $loc [list $symbol]] 216 return 217 } 218 219 # # ## ### ##### ######## ############# ##################### 220 ## API - Instructions - Basic input handling and tracking 221 222 method i_loc_pop_rewind/discard {} { 223 #$myparser i:fail_loc_pop_rewind 224 #$myparser i:ok_loc_pop_discard 225 #return 226 set last [$mystackloc pop] 227 if {!$myok} { 228 set myloc $last 229 } 230 return 231 } 232 233 method i_loc_pop_discard {} { 234 $mystackloc pop 235 return 236 } 237 238 method i_loc_pop_rewind {} { 239 set myloc [$mystackloc pop] 240 return 241 } 242 243 method i:fail_loc_pop_rewind {} { 244 if {$myok} return 245 set myloc [$mystackloc pop] 246 return 247 } 248 249 method i_loc_push {} { 250 $mystackloc push $myloc 251 return 252 } 253 254 # # ## ### ##### ######## ############# ##################### 255 ## API - Instructions - AST stack handling 256 257 method i_ast_pop_rewind/discard {} { 258 #$myparser i:fail_ast_pop_rewind 259 #$myparser i:ok_ast_pop_discard 260 #return 261 set mark [$mystackmark pop] 262 if {$myok} return 263 $mystackast trim $mark 264 return 265 } 266 267 method i_ast_pop_discard/rewind {} { 268 #$myparser i:ok_ast_pop_rewind 269 #$myparser i:fail_ast_pop_discard 270 #return 271 set mark [$mystackmark pop] 272 if {!$myok} return 273 $mystackast trim $mark 274 return 275 } 276 277 method i_ast_pop_discard {} { 278 $mystackmark pop 279 return 280 } 281 282 method i_ast_pop_rewind {} { 283 $mystackast trim [$mystackmark pop] 284 return 285 } 286 287 method i:fail_ast_pop_rewind {} { 288 if {$myok} return 289 $mystackast trim [$mystackmark pop] 290 return 291 } 292 293 method i_ast_push {} { 294 $mystackmark push [$mystackast size] 295 return 296 } 297 298 method i:ok_ast_value_push {} { 299 if {!$myok} return 300 $mystackast push $mysvalue 301 return 302 } 303 304 # # ## ### ##### ######## ############# ##################### 305 ## API - Instructions - Nonterminal cache 306 307 method i_symbol_restore {symbol} { 308 # Satisfy from cache if possible. 309 set k [list $myloc $symbol] 310 if {![info exists mysymbol($k)]} { return 0 } 311 lassign $mysymbol($k) myloc myok myerror mysvalue 312 # We go forward, as the nonterminal matches (or not). 313 return 1 314 } 315 316 method i_symbol_save {symbol} { 317 # Store not only the value, but also how far 318 # the match went (if it was a match). 319 set at [$mystackloc peek] 320 set k [list $at $symbol] 321 set mysymbol($k) [list $myloc $myok $myerror $mysvalue] 322 return 323 } 324 325 # # ## ### ##### ######## ############# ##################### 326 ## API - Instructions - Semantic values. 327 328 method i_value_clear {} { 329 set mysvalue {} 330 return 331 } 332 333 method i_value_clear/leaf {symbol} { 334 # not quite value_lead (guarded, and clear on fail) 335 # Inlined clear, reduce, and optimized. 336 # Clear ; if {$ok} {Reduce $symbol} 337 set mysvalue {} 338 if {!$myok} return 339 set pos [$mystackloc peek] 340 incr pos 341 set mysvalue [pt::ast new $symbol $pos $myloc] 342 return 343 } 344 345 method i_value_clear/reduce {symbol} { 346 set mysvalue {} 347 if {!$myok} return 348 349 set mark [$mystackmark peek];# Old size of stack before current nt pushed more. 350 set newa [expr {[$mystackast size] - $mark}] 351 352 set pos [$mystackloc peek] 353 incr pos 354 355 if {!$newa} { 356 set mysvalue {} 357 } elseif {$newa == 1} { 358 # peek 1 => single element comes back 359 set mysvalue [list [$mystackast peek]] ; # SaveToMark 360 } else { 361 # peek n > 1 => list of elements comes back 362 set mysvalue [lreverse [$mystackast peek $newa]] ; # SaveToMark 363 } 364 365 set mysvalue [pt::ast new $symbol $pos $myloc {*}$mysvalue] ; # Reduce $symbol 366 return 367 } 368 369 # # ## ### ##### ######## ############# ##################### 370 ## API - Instructions - Terminal matching 371 372 method i_input_next {msg} { 373 # Inlined: Getch, Expected, ClearErrors 374 # Satisfy from input cache if possible. 375 376 incr myloc 377 if {$myloc < [llength $mytoken]} { 378 set mycurrent [lindex $mytoken $myloc 0] 379 set myok 1 380 set myerror {} 381 return 382 } 383 384 # Actually read from the input, and remember 385 # the information. 386 # Note: We are implicitly incrementing the location! 387 388 set token [my ReadChar] 389 390 if {![llength $token]} { 391 set myok 0 392 set myerror [list $myloc [list $msg]] 393 return 394 } 395 396 lappend mytoken $token 397 set mycurrent [lindex $token 0] 398 set myok 1 399 set myerror {} 400 return 401 } 402 403 method i_test_alnum {} { 404 set myok [string is alnum -strict $mycurrent] 405 my OkFail [pt::pe alnum] 406 return 407 } 408 409 method i_test_alpha {} { 410 set myok [string is alpha -strict $mycurrent] 411 my OkFail [pt::pe alpha] 412 return 413 } 414 415 method i_test_ascii {} { 416 set myok [string is ascii -strict $mycurrent] 417 my OkFail [pt::pe ascii] 418 return 419 } 420 421 method i_test_char {tok} { 422 set myok [expr {$tok eq $mycurrent}] 423 my OkFail [pt::pe terminal $tok] 424 return 425 } 426 427 method i_test_ddigit {} { 428 set myok [string match {[0-9]} $mycurrent] 429 my OkFail [pt::pe ddigit] 430 return 431 } 432 433 method i_test_digit {} { 434 set myok [string is digit -strict $mycurrent] 435 my OkFail [pt::pe digit] 436 return 437 } 438 439 method i_test_graph {} { 440 set myok [string is graph -strict $mycurrent] 441 my OkFail [pt::pe graph] 442 return 443 } 444 445 method i_test_lower {} { 446 set myok [string is lower -strict $mycurrent] 447 my OkFail [pt::pe lower] 448 return 449 } 450 451 method i_test_print {} { 452 set myok [string is print -strict $mycurrent] 453 my OkFail [pt::pe printable] 454 return 455 } 456 457 method i_test_punct {} { 458 set myok [string is punct -strict $mycurrent] 459 my OkFail [pt::pe punct] 460 return 461 } 462 463 method i_test_range {toks toke} { 464 set myok [expr { 465 ([string compare $toks $mycurrent] <= 0) && 466 ([string compare $mycurrent $toke] <= 0) 467 }] ; # {} 468 my OkFail [pt::pe range $toks $toke] 469 return 470 } 471 472 method i_test_space {} { 473 set myok [string is space -strict $mycurrent] 474 my OkFail [pt::pe space] 475 return 476 } 477 478 method i_test_upper {} { 479 set myok [string is upper -strict $mycurrent] 480 my OkFail [pt::pe upper] 481 return 482 } 483 484 method i_test_wordchar {} { 485 set myok [string is wordchar -strict $mycurrent] 486 my OkFail [pt::pe wordchar] 487 return 488 } 489 490 method i_test_xdigit {} { 491 set myok [string is xdigit -strict $mycurrent] 492 my OkFail [pt::pe xdigit] 493 return 494 } 495 496 # # ## ### ##### ######## ############# ##################### 497 ## Internals 498 499 method ReadChar {} { 500 upvar 1 mychan mychan myline myline mycolumn mycolumn 501 502 if {[eof $mychan]} {return {}} 503 504 set ch [read $mychan 1] 505 if {$ch eq ""} {return {}} 506 507 set token [list $ch $myline $mycolumn] 508 509 if {$ch eq "\n"} { 510 incr myline 511 set mycolumn 0 512 } else { 513 incr mycolumn 514 } 515 516 return $token 517 } 518 519 method OkFail {msg} { 520 upvar 1 myok myok myerror myerror myloc myloc 521 # Inlined: Expected, Unget, ClearErrors 522 if {!$myok} { 523 set myerror [list $myloc [list $msg]] 524 incr myloc -1 525 } else { 526 set myerror {} 527 } 528 return 529 } 530 531 # # ## ### ##### ######## ############# ##################### 532 ## Data structures. 533 ## Mainly the architectural state of the instance's PARAM. 534 535 variable \ 536 mychan myline mycolumn \ 537 mycurrent myloc mystackloc \ 538 myok mysvalue myerror mystackerr \ 539 mytoken mysymbol \ 540 mystackast mystackmark 541 542 # Parser Input (channel, location (line, column)) ........... 543 # Token, current parsing location, stack of locations ....... 544 # Match state . ........ ............. ..................... 545 # Caches for tokens and nonterminals .. ..................... 546 # Abstract syntax tree (AST) .......... ..................... 547 548 # # ## ### ##### ######## ############# ##################### 549} 550 551# # ## ### ##### ######## ############# ##################### 552## Ready 553 554package provide pt::rde 1 555return 556