1# -*- tcl -*- 2# ### ### ### ######### ######### ######### 3## Package description 4 5## Implementation of the ME virtual machine as a singleton, tied to 6## Tcl for control flow and stack handling (except the AST stack). 7 8# ### ### ### ######### ######### ######### 9## Requisites 10 11# ### ### ### ######### ######### ######### 12## Implementation 13 14namespace eval ::grammar::me::tcl { 15 namespace export \ 16 init lc tok sv tokens ast \ 17 astall ctok nc next ord \ 18 \ 19 isv_clear ict_advance inc_save \ 20 isv_terminal ict_match_token inc_restore \ 21 isv_nonterminal_leaf ict_match_tokrange icl_get \ 22 isv_nonterminal_range ict_match_tokclass icl_rewind \ 23 isv_nonterminal_reduce iok_ok \ 24 ier_clear iok_fail \ 25 ier_get iok_negate \ 26 ier_expected ias_push \ 27 ier_nonterminal ias_mark \ 28 ier_merge ias_pop2mark 29 30 variable ok 31} 32 33# ### ### ### ######### ######### ######### 34## Implementation, API. Ensemble command. 35 36proc ::grammar::me::tcl {cmd args} { 37 # Dispatcher for the ensemble command. 38 variable tcl::cmds 39 return [uplevel 1 [linsert $args 0 $cmds($cmd)]] 40} 41 42namespace eval grammar::me::tcl { 43 variable cmds 44 45 # Mapping from cmd names to procedures for quick dispatch. The 46 # objects will shimmer into resolved command references. 47 48 array set cmds { 49 init ::grammar::me::tcl::init 50 lc ::grammar::me::tcl::lc 51 tok ::grammar::me::tcl::tok 52 sv ::grammar::me::tcl::sv 53 tokens ::grammar::me::tcl::tokens 54 ast ::grammar::me::tcl::ast 55 astall ::grammar::me::tcl::astall 56 ctok ::grammar::me::tcl::ctok 57 nc ::grammar::me::tcl::nc 58 next ::grammar::me::tcl::next 59 ord ::grammar::me::tcl::ord 60 } 61} 62 63# ### ### ### ######### ######### ######### 64## API Implementation. 65 66proc ::grammar::me::tcl::init {nxcmd {tokmap {}}} { 67 variable next $nxcmd 68 variable as {} 69 variable ok 0 70 variable error {} 71 variable sv {} 72 variable loc -1 73 variable ct {} 74 variable tc {} 75 variable nc 76 variable tokOrd 77 variable tokUseOrd 0 78 79 array unset nc * 80 array unset tokOrd * 81 82 if {[llength $tokmap]} { 83 if {[llength $tokmap] % 2 == 1} { 84 return -code error \ 85 "Bad token order map, not a dictionary" 86 } 87 array set tokOrd $tokmap 88 set tokUseOrd 1 89 } 90 return 91} 92 93proc ::grammar::me::tcl::lc {pos} { 94 variable tc 95 return [lrange [lindex $tc $pos] 2 3] 96} 97 98proc ::grammar::me::tcl::tok {from {to {}}} { 99 variable tc 100 if {$to == {}} {set to $from} 101 return [lrange $tc $from $to] 102} 103 104proc ::grammar::me::tcl::tokens {} { 105 variable tc 106 return [llength $tc] 107} 108 109proc ::grammar::me::tcl::sv {} { 110 variable sv 111 return $sv 112} 113 114proc ::grammar::me::tcl::ast {} { 115 variable as 116 return [lindex $as end] 117} 118 119proc ::grammar::me::tcl::astall {} { 120 variable as 121 return $as 122} 123 124proc ::grammar::me::tcl::ctok {} { 125 variable ct 126 return $ct 127} 128 129proc ::grammar::me::tcl::nc {} { 130 variable nc 131 return [array get nc] 132} 133 134proc ::grammar::me::tcl::next {} { 135 variable next 136 return $next 137} 138 139proc ::grammar::me::tcl::ord {} { 140 variable tokOrd 141 return [array get tokOrd] 142} 143 144# ### ### ### ######### ######### ######### 145## Terminal matching 146 147proc ::grammar::me::tcl::ict_advance {msg} { 148 # Inlined: Getch, Expected, ClearErrors 149 150 variable ok 151 variable error 152 # ------------------------ 153 variable tc 154 variable loc 155 variable ct 156 # ------------------------ 157 variable next 158 # ------------------------ 159 160 # Satisfy from input cache if possible. 161 incr loc 162 if {$loc < [llength $tc]} { 163 set ct [lindex $tc $loc 0] 164 set ok 1 165 set error {} 166 return 167 } 168 169 # Actually read from the input, and remember 170 # the information. 171 172 # Read from buffer, and remember. 173 # Note: loc is the instance variable. 174 # This implicitly increments the location! 175 176 set tokdata [uplevel \#0 $next] 177 if {![llength $tokdata]} { 178 set ok 0 179 set error [list $loc [list $msg]] 180 return 181 } elseif {[llength $tokdata] != 4} { 182 return -code error "Bad callback result, expected 4 elements" 183 } 184 185 lappend tc $tokdata 186 set ct [lindex $tokdata 0] 187 set ok 1 188 set error {} 189 return 190} 191 192proc ::grammar::me::tcl::ict_match_token {tok msg} { 193 variable ct 194 variable ok 195 196 set ok [expr {$tok eq $ct}] 197 198 OkFail $msg 199 return 200} 201 202proc ::grammar::me::tcl::ict_match_tokrange {toks toke msg} { 203 variable ct 204 variable ok 205 variable tokUseOrd 206 variable tokOrd 207 208 if {$tokUseOrd} { 209 set ord $tokOrd($ct) 210 set ok [expr { 211 ($toks <= $ord) && 212 ($ord <= $toke) 213 }] ; # {} 214 } else { 215 set ok [expr { 216 ([string compare $toks $ct] <= 0) && 217 ([string compare $ct $toke] <= 0) 218 }] ; # {} 219 } 220 221 OkFail $msg 222 return 223} 224 225proc ::grammar::me::tcl::ict_match_tokclass {code msg} { 226 variable ct 227 variable ok 228 229 set ok [string is $code -strict $ct] 230 231 OkFail $msg 232 return 233} 234 235proc ::grammar::me::tcl::OkFail {msg} { 236 variable ok 237 variable error 238 variable loc 239 240 # Inlined: Expected, Unget, ClearErrors 241 242 if {!$ok} { 243 set error [list $loc [list $msg]] 244 incr loc -1 245 } else { 246 set error {} 247 } 248 return 249} 250 251# ### ### ### ######### ######### ######### 252## Nonterminal cache 253 254proc ::grammar::me::tcl::inc_restore {symbol} { 255 variable loc 256 variable nc 257 variable ok 258 variable error 259 variable sv 260 261 # Satisfy from cache if possible. 262 if {[info exists nc($loc,$symbol)]} { 263 foreach {go ok error sv} $nc($loc,$symbol) break 264 265 # Go forward, as the nonterminal matches (or not). 266 set loc $go 267 return 1 268 } 269 return 0 270} 271 272proc ::grammar::me::tcl::inc_save {symbol at} { 273 variable loc 274 variable nc 275 variable ok 276 variable error 277 variable sv 278 279 if 0 { 280 if {[info exists nc($at,$symbol)]} { 281 return -code error "Cannot overwrite\ 282 existing data @ ($at, $symbol)" 283 } 284 } 285 286 # FIXME - end location should be argument. 287 288 # Store not only the value, but also how far 289 # the match went (if it was a match). 290 291 set nc($at,$symbol) [list $loc $ok $error $sv] 292 return 293} 294 295# ### ### ### ######### ######### ######### 296## Unconditional matching. 297 298proc ::grammar::me::tcl::iok_ok {} { 299 variable ok 1 300 return 301} 302 303proc ::grammar::me::tcl::iok_fail {} { 304 variable ok 0 305 return 306} 307 308proc ::grammar::me::tcl::iok_negate {} { 309 variable ok 310 set ok [expr {!$ok}] 311 return 312} 313 314# ### ### ### ######### ######### ######### 315## Basic input handling and tracking 316 317proc ::grammar::me::tcl::icl_get {} { 318 variable loc 319 return $loc 320} 321 322proc ::grammar::me::tcl::icl_rewind {oldloc} { 323 variable loc 324 325 if 0 { 326 if {($oldloc < -1) || ($oldloc > $loc)} { 327 return -code error "Bad location \"$oldloc\" (vs $loc)" 328 } 329 } 330 set loc $oldloc 331 return 332} 333 334# ### ### ### ######### ######### ######### 335## Error handling. 336 337proc ::grammar::me::tcl::ier_get {} { 338 variable error 339 return $error 340} 341 342proc ::grammar::me::tcl::ier_clear {} { 343 variable error {} 344 return 345} 346 347proc ::grammar::me::tcl::ier_nonterminal {msg pos} { 348 # Inlined: Errors, Expected. 349 350 variable error 351 352 if {[llength $error]} { 353 foreach {l m} $error break 354 incr pos 355 if {$l == $pos} { 356 set error [list $l [list $msg]] 357 } 358 } 359} 360 361proc ::grammar::me::tcl::ier_merge {new} { 362 variable error 363 364 # We have either old or new error data, keep it. 365 366 if {![llength $error]} {set error $new ; return} 367 if {![llength $new]} {return} 368 369 # If one of the errors is further on in the input choose that as 370 # the information to propagate. 371 372 foreach {loe msgse} $error break 373 foreach {lon msgsn} $new break 374 375 if {$lon > $loe} {set error $new ; return} 376 if {$loe > $lon} {return} 377 378 # Equal locations, merge the message lists. 379 380 foreach m $msgsn {lappend msgse $m} 381 set error [list $loe [lsort -uniq $msgse]] 382 return 383} 384 385# ### ### ### ######### ######### ######### 386## Operations for the construction of the 387## abstract syntax tree (AST). 388 389proc ::grammar::me::tcl::isv_clear {} { 390 variable sv {} 391 return 392} 393 394proc ::grammar::me::tcl::isv_terminal {} { 395 variable loc 396 variable sv 397 variable as 398 399 set sv [list {} $loc $loc] 400 lappend as $sv 401 return 402} 403 404proc ::grammar::me::tcl::isv_nonterminal_leaf {nt pos} { 405 # Inlined clear, reduce, and optimized. 406 variable ok 407 variable loc 408 variable sv {} 409 410 # Clear ; if {$ok} {Reduce $nt} 411 412 if {$ok} { 413 incr pos 414 set sv [list $nt $pos $loc] 415 } 416 return 417} 418 419proc ::grammar::me::tcl::isv_nonterminal_range {nt pos} { 420 variable ok 421 variable loc 422 variable sv {} 423 424 if {$ok} { 425 # TerminalString $pos 426 # Get all characters after 'pos' to current location as terminal data. 427 428 incr pos 429 set sv [list $nt $pos $loc [list {} $pos $loc]] 430 431 #set sv [linsert $sv 0 $nt] ;#Reduce $nt 432 } 433 return 434} 435 436proc ::grammar::me::tcl::isv_nonterminal_reduce {nt pos {mrk 0}} { 437 variable ok 438 variable as 439 variable loc 440 variable sv {} 441 442 if {$ok} { 443 incr pos 444 set sv [lrange $as $mrk end] ;#SaveToMark $mrk 445 set sv [linsert $sv 0 $nt $pos $loc] ;#Reduce $nt 446 } 447 return 448} 449 450# ### ### ### ######### ######### ######### 451## AST stack handling 452 453proc ::grammar::me::tcl::ias_push {} { 454 variable as 455 variable sv 456 lappend as $sv 457 return 458} 459 460proc ::grammar::me::tcl::ias_mark {} { 461 variable as 462 return [llength $as] 463} 464 465proc ::grammar::me::tcl::ias_pop2mark {mark} { 466 variable as 467 if {[llength $as] <= $mark} return 468 incr mark -1 469 set as [lrange $as 0 $mark] 470 return 471} 472 473# ### ### ### ######### ######### ######### 474## Data structures. 475 476namespace eval ::grammar::me::tcl { 477 # ### ### ### ######### ######### ######### 478 ## Public State of MVM (Matching Virtual Machine) 479 480 variable ok 0 ; # Boolean: Ok/Fail of last match operation. 481 482 # ### ### ### ######### ######### ######### 483 ## Internal state. 484 485 variable ct {} ; # Current token. 486 variable loc 0 ; # Location of 'ct' as offset in input. 487 488 variable error {} ; # Error data for last match. 489 # ; # == List (loc, list of strings) 490 # ; # or empty list 491 variable sv {} ; # Semantic value for last match. 492 493 # ### ### ### ######### ######### ######### 494 ## Data structures for AST construction 495 496 variable as {} ; # Stack of values for AST 497 498 # ### ### ### ######### ######### ######### 499 ## Memo data structures for tokens and match results. 500 501 variable tc {} 502 variable nc ; array set nc {} 503 504 # ### ### ### ######### ######### ######### 505 ## Input buffer, location of next character to read. 506 ## ASSERT (loc <= cloc) 507 508 variable next ; # Callback to get next character. 509 510 # Token ordering for range checks. Optional 511 512 variable tokOrd ; array set tokOrd {} 513 variable tokUseOrd 0 514 515 # ### ### ### ######### ######### ######### 516} 517 518# ### ### ### ######### ######### ######### 519## Package Management 520 521package provide grammar::me::tcl 0.1 522