1# -*- tcl -*- 2# (C) 2005-2006 Andreas Kupries <andreas_kupries@users.sourceforge.net> 3# ### ### ### ######### ######### ######### 4## Package description 5 6## Implementation of ME virtual machines based on state values 7## manipulated by the commands according to the match 8## instructions. Allows for implementation in C. 9 10# ### ### ### ######### ######### ######### 11## Requisites 12 13namespace eval ::grammar::me::cpu::core {} 14 15# ### ### ### ######### ######### ######### 16## Implementation, API. Ensemble command. 17 18proc ::grammar::me::cpu::core {cmd args} { 19 # Dispatcher for the ensemble command. 20 variable core::cmds 21 return [uplevel 1 [linsert $args 0 $cmds($cmd)]] 22} 23 24namespace eval grammar::me::cpu::core { 25 variable cmds 26 27 # Mapping from cmd names to procedures for quick dispatch. The 28 # objects will shimmer into resolved command references. 29 30 array set cmds { 31 disasm ::grammar::me::cpu::core::disasm 32 asm ::grammar::me::cpu::core::asm 33 new ::grammar::me::cpu::core::new 34 lc ::grammar::me::cpu::core::lc 35 tok ::grammar::me::cpu::core::tok 36 pc ::grammar::me::cpu::core::pc 37 iseof ::grammar::me::cpu::core::iseof 38 at ::grammar::me::cpu::core::at 39 cc ::grammar::me::cpu::core::cc 40 sv ::grammar::me::cpu::core::sv 41 ok ::grammar::me::cpu::core::ok 42 error ::grammar::me::cpu::core::error 43 lstk ::grammar::me::cpu::core::lstk 44 astk ::grammar::me::cpu::core::astk 45 mstk ::grammar::me::cpu::core::mstk 46 estk ::grammar::me::cpu::core::estk 47 rstk ::grammar::me::cpu::core::rstk 48 nc ::grammar::me::cpu::core::nc 49 ast ::grammar::me::cpu::core::ast 50 halted ::grammar::me::cpu::core::halted 51 code ::grammar::me::cpu::core::code 52 eof ::grammar::me::cpu::core::eof 53 put ::grammar::me::cpu::core::put 54 run ::grammar::me::cpu::core::run 55 } 56} 57 58# ### ### ### ######### ######### ######### 59## Ensemble implementation 60 61proc ::grammar::me::cpu::core::disasm {code} { 62 variable iname 63 variable tclass 64 variable anum 65 66 Validate $code ord dst jmp 67 68 set label 0 69 foreach k [array names jmp] { 70 set jmp($k) bra$label 71 incr label 72 } 73 foreach k [array names dst] { 74 if {![info exists jmp($k)]} { 75 set jmp($k) {} 76 } 77 } 78 79 set result {} 80 foreach {asm pool tokmap} $code break 81 82 set pc 0 83 set pcend [llength $asm] 84 85 while {$pc < $pcend} { 86 set base $pc 87 set insn [lindex $asm $pc] ; incr pc 88 set an [lindex $anum $insn] 89 90 if {$an == 1} { 91 set a [lindex $asm $pc] ; incr pc 92 } elseif {$an == 2} { 93 set a [lindex $asm $pc] ; incr pc 94 set b [lindex $asm $pc] ; incr pc 95 } elseif {$an == 3} { 96 set a [lindex $asm $pc] ; incr pc 97 set b [lindex $asm $pc] ; incr pc 98 set c [lindex $asm $pc] ; incr pc 99 } 100 101 set instruction {} 102 lappend instruction $jmp($base) 103 lappend instruction $iname($insn) 104 105 switch -exact $insn { 106 0 - 5 - 20 - 24 - 25 - 26 - 107 a/string { 108 lappend instruction [lindex $pool $a] 109 } 110 1 { 111 # a/tok b/string 112 if {![llength $tokmap]} { 113 lappend instruction [lindex $pool $a] 114 } else { 115 lappend instruction ${a}:$ord($a) 116 } 117 lappend instruction [lindex $pool $b] 118 } 119 2 { 120 # a/tokstart b/tokend c/string 121 if {![llength $tokmap]} { 122 lappend instruction [lindex $pool $a] 123 lappend instruction [lindex $pool $b] 124 } else { 125 # tokmap defined: a = b = order rank. 126 lappend instruction ${a}:$ord($a) 127 lappend instruction ${b}:$ord($b) 128 } 129 lappend instruction [lindex $pool $c] 130 } 131 3 { 132 # a/class(0-5) b/string 133 lappend instruction [lindex $tclass $a] 134 lappend instruction [lindex $pool $b] 135 } 136 4 { 137 # a/branch b/string 138 lappend instruction $jmp($a) 139 lappend instruction [lindex $pool $b] 140 } 141 6 - 11 - 12 - 13 - 142 a/branch { 143 lappend instruction $jmp($a) 144 } 145 default {} 146 } 147 148 lappend result $instruction 149 } 150 151 return $result 152} 153 154proc ::grammar::me::cpu::core::asm {code} { 155 variable iname 156 variable anum 157 variable tccode 158 159 # code = list(insn), insn = list (label insn-name ...) 160 161 # I. Indices for the labels, based on instruction sizes. 162 163 array set jmp {} 164 set off 0 165 foreach insn $code { 166 foreach {label name} $insn break 167 # Ignore embedded comments, except for labels 168 if {$label ne ""} { 169 set jmp($label) $off 170 } 171 if {$name eq ".C"} continue 172 if {![info exists iname($name)]} { 173 return -code error "Bad instruction \"$insn\", unknown command \"$name\"" 174 } 175 set an [lindex $anum $iname($name)] 176 if {[llength $insn] != ($an+2)} { 177 return -code error "Bad instruction \"$insn\", expected $an argument[expr {$an == 1 ? "" : "s"}]" 178 } 179 incr off 180 incr off [lindex $anum $iname($name)] 181 } 182 183 set asm {} 184 set pool {} 185 array set poolh {} 186 array set tokmap {} 187 array set ord {} 188 set plain 0 189 190 foreach insn $code { 191 foreach {label name} $insn break 192 # Ignore embedded comments 193 if {$name eq ".C"} continue 194 set an [lindex $anum $iname($name)] 195 196 # Instruction code to assembly ... 197 lappend asm $iname($name) 198 199 # Encode arguments ... 200 switch -exact -- $name { 201 ict_advance - 202 inc_save - 203 ier_nonterminal - 204 isv_nonterminal_leaf - 205 isv_nonterminal_range - 206 isv_nonterminal_reduce { 207 lappend asm [Str [lindex $insn 2]] 208 } 209 ict_match_token { 210 lappend asm [Tok [lindex $insn 2]] 211 lappend asm [Str [lindex $insn 3]] 212 } 213 ict_match_tokrange { 214 lappend asm [Tok [lindex $insn 2]] 215 lappend asm [Tok [lindex $insn 3]] 216 lappend asm [Str [lindex $insn 4]] 217 } 218 ict_match_tokclass { 219 set ccode [lindex $insn 2] 220 if {![info exists tccode($ccode)]} { 221 return -code error "Bad instruction \"$insn\", unknown class code \"$ccode\"" 222 } 223 lappend asm $tccode($ccode) 224 lappend asm [Str [lindex $insn 3]] 225 226 } 227 inc_restore { 228 set jmpto [lindex $insn 2] 229 if {![info exists jmp($jmpto)]} { 230 return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\"" 231 } 232 lappend asm $jmp($jmpto) 233 lappend asm [Str [lindex $insn 3]] 234 } 235 icf_ntcall - 236 icf_jalways - 237 icf_jok - 238 icf_jfail { 239 set jmpto [lindex $insn 2] 240 if {![info exists jmp($jmpto)]} { 241 return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\"" 242 } 243 lappend asm $jmp($jmpto) 244 } 245 } 246 } 247 248 return [list $asm $pool [array get tokmap]] 249} 250 251proc ::grammar::me::cpu::core::new {code} { 252 # The code generating the state is drawn out to integrate a 253 # specification of how the machine state is mapped to Tcl as well. 254 255 Validate $code 256 257 set state {} ; # The state is representend as a Tcl list. 258 # ### ### ### ######### ######### ######### 259 lappend state $code ; # [_0] code - list - code to run (-) 260 lappend state 0 ; # [_1] pc - int - Program counter 261 lappend state 0 ; # [_2] halt - bool - Flag, set (internal) when machine was halted (icf_halt). 262 lappend state 0 ; # [_3] eof - bool - Flag, set (external) when where will be no more input. 263 lappend state {} ; # [_4] tc - list - Terminal cache, pending and processed tokens. 264 lappend state -1 ; # [_5] cl - int - Current Location 265 lappend state {} ; # [_6] ct - token - Current Character 266 lappend state 0 ; # [_7] ok - bool - Match Status 267 lappend state {} ; # [_8] sv - any - Semantic Value 268 lappend state {} ; # [_9] er - list - Error status (*) 269 lappend state {} ; # [10] ls - list - Location Stack (x) 270 lappend state {} ; # [11] as - list - Ast Stack 271 lappend state {} ; # [12] ms - list - Ast Marker Stack 272 lappend state {} ; # [13] es - list - Error Stack 273 lappend state {} ; # [14] rs - list - Return Stack 274 lappend state {} ; # [15] nc - dict - Nonterminal Cache (backtracking) 275 # ### ### ### ######### ######### ######### 276 277 # tc = list(token) 278 # token = list(str lexeme line col) 279 280 281 # (-) See manpage of this package for the representation. 282 283 # (*) 2 elements, first is error location, second is list of 284 # ... strings, the error messages. The strings are actually 285 # ... represented by references into the pool element of the code. 286 287 # (x) Regarding the various stacks maintained in the state, their 288 # top element is always at the right end, i.e. the last 289 # element in the list representing it. 290 291 return $state 292} 293 294proc ::grammar::me::cpu::core::ntok {state} { 295 return [llength [lindex $state 4]] 296} 297 298proc ::grammar::me::cpu::core::lc {state loc} { 299 set tc [lindex $state 4] 300 set loc [INDEX $tc $loc "Illegal location"] 301 return [lrange [lindex $tc $loc] 2 3] 302 # result = list(line col) 303} 304 305proc ::grammar::me::cpu::core::tok {state args} { 306 if {[llength $args] > 2} { 307 return -code error {wrong # args: should be "grammar::me::cpu::core::tok state ?from ?to??"} 308 } 309 set tc [lindex $state 4] 310 if {[llength $args] == 0} { 311 return $tc 312 } elseif {[llength $args] == 1} { 313 set at [INDEX $tc [lindex $args 0] "Illegal location"] 314 return [lrange $tc $at $at] 315 } else { 316 set from [INDEX $tc [lindex $args 0] "Illegal start location"] 317 set to [INDEX $tc [lindex $args 1] "Illegal end location"] 318 if {$from > $to} { 319 return -code error "Illegal empty location range $from .. $to" 320 } 321 return [lrange $tc $from $to] 322 } 323 # result = list(token), token = list(str lex line col) 324} 325 326proc ::grammar::me::cpu::core::pc {state} { 327 return [lindex $state 1] 328} 329 330proc ::grammar::me::cpu::core::iseof {state} { 331 return [lindex $state 3] 332} 333 334proc ::grammar::me::cpu::core::at {state} { 335 return [lindex $state 5] 336} 337 338proc ::grammar::me::cpu::core::cc {state} { 339 return [lindex $state 6] 340} 341 342proc ::grammar::me::cpu::core::sv {state} { 343 return [lindex $state 8] 344} 345 346proc ::grammar::me::cpu::core::ok {state} { 347 return [lindex $state 7] 348} 349 350proc ::grammar::me::cpu::core::error {state} { 351 set er [lindex $state 9] 352 if {[llength $er]} { 353 foreach {l m} $er break 354 355 set pool [lindex $state 0 1] ; # state ->/0 code ->/1 pool 356 set mx {} 357 foreach id $m { 358 lappend mx [lindex $pool $id] 359 } 360 set er [list $l $mx] 361 } 362 return $er 363} 364 365proc ::grammar::me::cpu::core::lstk {state} { 366 return [lindex $state 10] 367} 368 369proc ::grammar::me::cpu::core::astk {state} { 370 return [lindex $state 11] 371} 372 373proc ::grammar::me::cpu::core::mstk {state} { 374 return [lindex $state 12] 375} 376 377proc ::grammar::me::cpu::core::estk {state} { 378 return [lindex $state 13] 379} 380 381proc ::grammar::me::cpu::core::rstk {state} { 382 return [lindex $state 14] 383} 384 385proc ::grammar::me::cpu::core::nc {state} { 386 return [lindex $state 15] 387} 388 389proc ::grammar::me::cpu::core::ast {state} { 390 return [lindex $state 11 end] 391} 392 393proc ::grammar::me::cpu::core::halted {state} { 394 return [lindex $state 2] 395} 396 397proc ::grammar::me::cpu::core::code {state} { 398 return [lindex $state 0] 399} 400 401proc ::grammar::me::cpu::core::eof {statevar} { 402 upvar 1 $statevar state 403 lset state 3 1 404 return 405} 406 407proc ::grammar::me::cpu::core::put {statevar tok lex line col} { 408 upvar 1 $statevar state 409 if {[lindex $state 3]} { 410 return -code error "Cannot add input data after eof" 411 } 412 set tc [K [lindex $state 4] [lset state 4 {}]] 413 lappend tc [list $tok $lex $line $col] 414 lset state 4 $tc 415 return 416} 417 418proc ::grammar::me::cpu::core::run {statevar {steps -1}} { 419 # Execution loop. Should be instrumented for statistics about 420 # dynamic instruction frequency. I.e. which instructions are 421 # executed the most => put them at the front of the if/switch for 422 # quicker selection. I.e. frequency coding of the branches for 423 # speed. 424 425 # A C implementation can shimmer the state into a directly 426 # accessible data structure. And the asm instructions can shimmer 427 # into an integer index upon which we can switch fast. 428 429 variable anum 430 variable tclass 431 upvar 1 $statevar state 432 variable iname ; # For debug output 433 434 # Do nothing for a stopped machine (halt flag set). 435 if {[lindex $state 2]} {return $state} 436 437 # Fail if there are no instruction to execute 438 if {![llength [lindex $state 0 0]]} { 439 # No instructions to execute 440 return -code error "No instructions to execute" 441 } 442 443 # Unpack state into locally accessible variables 444 # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 445 foreach {code pc halt eof tc cl ct ok sv er ls as ms es rs nc} $state break 446 447 # Unpack match program for easy access as well. 448 # 0 1 2 449 foreach {asm pool tokmap} $code break 450 451 if 0 { 452 puts ________________________ 453 puts [join [disasm $code] \n] 454 puts ________________________ 455 } 456 457 # Ensure that the unpacked information is not shared 458 unset state 459 460 # Internal flags for optimal handling of the nonterminal 461 # cache. Avoid multiple unpacking of the dictionary, and avoid 462 # repacking if it was not modified. 463 464 set ncunpacked 0 465 set ncmodified 0 466 set tmunpacked 0 467 468 while {1} { 469 # Stop execution if the specified number of instructions have 470 # been executed. Ignore if infinity was specified. 471 if {$steps == 0} break 472 if {$steps > 0} {incr steps -1} 473 474 # Get current instruction ... 475 476 if 0 {puts .$pc:\t$iname([lindex $asm $pc])} 477 if 0 {puts -nonewline .$pc:\t$iname([lindex $asm $pc])} 478 479 set insn [lindex $asm $pc] ; incr pc 480 481 # And its arguments ... 482 483 set an [lindex $anum $insn] 484 if {$an == 1} { 485 set a [lindex $asm $pc] ; incr pc 486 if 0 {puts \t<$a>} 487 } elseif {$an == 2} { 488 set a [lindex $asm $pc] ; incr pc 489 set b [lindex $asm $pc] ; incr pc 490 if 0 {puts \t<$a|$b>} 491 } elseif {$an == 3} { 492 set a [lindex $asm $pc] ; incr pc 493 set b [lindex $asm $pc] ; incr pc 494 set c [lindex $asm $pc] ; incr pc 495 if 0 {puts \t<$a|$b|$c>} 496 } ;# else {puts ""} 497 498 # Dispatch to implementation of the instruction ... 499 500 # Separate if commands are used for easier ordering of the 501 # dispatch. The order of the branches should be frequency 502 # coded to have the most frequently used instructions first. 503 504 # ict_advance <a:message> 505 if {$insn == 0} { 506 if 0 {puts \t\[$cl|[llength $tc]|$eof\]} 507 incr cl 508 if {$cl < [llength $tc]} { 509 if 0 {puts \tConsume} 510 511 set ct [lindex $tc $cl 0] 512 set ok 1 513 set er {} 514 } elseif {$eof} { 515 if 0 {puts \tFail<Eof>} 516 517 # We have no input, and there won't be more coming in 518 # either. Fail the advance. We do _not_ stop the match 519 # loop, the program has to complete. The failure might 520 # be no such, revealed during backtracking. The current 521 # location is not rewound automatically, this is the 522 # responsibility of any backtracking. 523 524 set er [list $cl [list $a]] 525 set ok 0 526 } else { 527 if 0 {puts \tSuspend&Wait} 528 529 # We have no input, stop matching and wait for 530 # more. We reset the machine into a state 531 # which will restart this instruction when 532 # execution resumes. 533 534 incr cl -1 535 incr pc -2 ; # code and message argument 536 break 537 } 538 if 0 {puts .Next} 539 continue 540 } 541 542 # ict_match_token <a:token> <b:message> 543 if {$insn == 1} { 544 if {[llength $tokmap]} { 545 if {!$tmunpacked} { 546 array set tm $tokmap 547 set tmunpacked 1 548 } 549 set ok [expr {$a == $tm($ct)}] 550 } else { 551 set xch [lindex $pool $a] 552 set ok [expr {$xch eq $ct}] 553 } 554 if {!$ok} { 555 set er [list $cl [list $b]] 556 } else { 557 set er {} 558 } 559 continue 560 } 561 562 # ict_match_tokrange <a:tokstart> <b:tokend> <c:message> 563 if {$insn == 2} { 564 if {[llength $tokmap]} { 565 if {!$tmunpacked} { 566 array set tm $tokmap 567 set tmunpacked 1 568 } 569 set x $tm($ct) 570 set ok [expr {($a <= $x) && ($x <= $b)}] 571 } else { 572 set a [lindex $pool $a] 573 set b [lindex $pool $b] 574 set ok [expr { 575 ([string compare $a $ct] <= 0) && 576 ([string compare $ct $b] <= 0) 577 }] ; # {} 578 } 579 if {!$ok} { 580 set er [list $cl [list $c]] 581 } else { 582 set er {} 583 } 584 continue 585 } 586 587 # ict_match_tokclass <a:code> <b:message> 588 if {$insn == 3} { 589 set strcode [lindex $tclass $a] 590 set ok [string is $strcode -strict $ct] 591 if {!$ok} { 592 set er [list $cl [list $b]] 593 } else { 594 set er {} 595 } 596 continue 597 } 598 599 # inc_restore <a:branchtarget> <b:nonterminal> 600 if {$insn == 4} { 601 set sym [lindex $pool $b] 602 603 # Unpack the cache dict, only here. 604 # 8.5 - Use dict operations instead. 605 606 if {!$ncunpacked} { 607 array set ncc $nc 608 set ncunpacked 1 609 } 610 611 if {[info exists ncc($cl,$sym)]} { 612 foreach {go ok error sv} $ncc($cl,$sym) break 613 614 # Go forward, as the nonterminal matches (or not). 615 set cl $go 616 set pc $a 617 } 618 continue 619 } 620 621 # inc_save <a:nonterminal> 622 if {$insn == 5} { 623 set sym [lindex $pool $a] 624 set at [lindex $ls end] 625 set ls [lrange $ls 0 end-1] 626 627 # Unpack, modify, only here. 628 # 8.5 - Use dict operations instead. 629 630 if {!$ncunpacked} { 631 array set ncc $nc 632 set ncunpacked 1 633 } 634 635 set ncc($at,$sym) [list $cl $ok $er $sv] 636 set ncmodified 1 637 continue 638 } 639 640 # icf_ntcall <a:branchtarget> 641 if {$insn == 6} { 642 lappend rs $pc 643 set pc $a 644 continue 645 } 646 647 # icf_ntreturn 648 if {$insn == 7} { 649 set pc [lindex $rs end] 650 set rs [lrange $rs 0 end-1] 651 continue 652 } 653 654 # iok_ok 655 if {$insn == 8} { 656 set ok 1 657 continue 658 } 659 660 # iok_fail 661 if {$insn == 9} { 662 set ok 0 663 continue 664 } 665 666 # iok_negate 667 if {$insn == 10} { 668 set ok [expr {!$ok}] 669 continue 670 } 671 672 # icf_jalways <a:branchtarget> 673 if {$insn == 11} { 674 set pc $a 675 continue 676 } 677 678 # icf_jok <a:branchtarget> 679 if {$insn == 12} { 680 if {$ok} {set pc $a} 681 # !ok => pc is already on next instruction. 682 continue 683 } 684 685 # icf_jfail <a:branchtarget> 686 if {$insn == 13} { 687 if {!$ok} {set pc $a} 688 # ok => pc is already on next instruction. 689 continue 690 } 691 692 # icf_halt 693 if {$insn == 14} { 694 set halt 1 695 break 696 } 697 698 # icl_push 699 if {$insn == 15} { 700 lappend ls $cl 701 continue 702 } 703 704 # icl_rewind 705 if {$insn == 16} { 706 set cl [lindex $ls end] 707 set ls [lrange $ls 0 end-1] 708 continue 709 } 710 711 # icl_pop 712 if {$insn == 17} { 713 set ls [lrange $ls 0 end-1] 714 continue 715 } 716 717 # ier_push 718 if {$insn == 18} { 719 lappend es $er 720 continue 721 } 722 723 # ier_clear 724 if {$insn == 19} { 725 set er {} 726 continue 727 } 728 729 # ier_nonterminal <a:nonterminal> 730 if {$insn == 20} { 731 if {[llength $er]} { 732 set pos [lindex $ls end] 733 incr pos 734 set eloc [lindex $er 0] 735 if {$eloc == $pos} { 736 set er [list $eloc [list $a]] 737 } 738 } 739 continue 740 } 741 742 # ier_merge 743 if {$insn == 21} { 744 set old [lindex $es end] 745 set es [lrange $es 0 end-1] 746 747 # We have either old or current error data, keep it. 748 749 if {![llength $er]} { 750 # No current data, keep old 751 set er $old 752 } elseif {[llength $old]} { 753 # If one of the errors is further on in the input 754 # choose that as the information to propagate. 755 756 foreach {loe msgse} $er break 757 foreach {lon msgsn} $old break 758 759 if {$lon > $loe} { 760 set er $old 761 } elseif {$loe == $lon} { 762 # Equal locations, merge the message lists. 763 764 foreach m $msgsn {lappend msgse $m} 765 set er [list $loe [lsort -uniq $msgse]] 766 } 767 # else lon < loe - er is better - nothing 768 } 769 # else - !old, but er - nothing 770 771 continue 772 } 773 774 # isv_clear 775 if {$insn == 22} { 776 set sv {} 777 continue 778 } 779 780 # isv_terminal (implied ias_push) 781 if {$insn == 23} { 782 set sv [list {} $cl $cl] 783 lappend as $sv 784 continue 785 } 786 787 # isv_nonterminal_leaf <a:nonterminal> 788 if {$insn == 24} { 789 set pos [lindex $ls end] 790 set sv [list $a $pos $cl] 791 continue 792 } 793 794 # isv_nonterminal_range <a:nonterminal> 795 if {$insn == 25} { 796 set pos [lindex $ls end] 797 set sv [list $a $pos $cl [list {} $pos $cl]] 798 continue 799 } 800 801 # isv_nonterminal_reduce <a:nonterminal> 802 if {$insn == 26} { 803 set pos [lindex $ls end] 804 if {[llength $ms]} { 805 set mrk [lindex $ms end] 806 incr mrk 807 } else { 808 set mrk 0 809 } 810 set sv [lrange $as $mrk end] 811 set sv [linsert $sv 0 $a $pos $cl] 812 continue 813 } 814 815 # ias_push 816 if {$insn == 27} { 817 lappend as $sv 818 continue 819 } 820 821 # ias_mark 822 if {$insn == 28} { 823 set mark [llength $as] 824 incr mark -1 825 lappend ms $mark 826 continue 827 } 828 829 # ias_mrewind 830 if {$insn == 29} { 831 set mark [lindex $ms end] 832 set ms [lrange $ms 0 end-1] 833 set as [lrange $as 0 $mark] 834 continue 835 } 836 837 # ias_mpop 838 if {$insn == 30} { 839 set ms [lrange $ms 0 end-1] 840 continue 841 } 842 843 return -code error "Illegal instruction $insn" 844 } 845 846 # Repack a modified cache dictionary, then repack and store the 847 # updated state value. 848 849 if 0 {puts .Repackage\ state} 850 851 if {$ncmodified} {set nc [array get ncc]} 852 set state [list $code $pc $halt $eof $tc $cl $ct $ok $sv $er $ls $as $ms $es $rs $nc] 853 return 854} 855 856namespace eval grammar::me::cpu::core { 857 # Map between class codes and names 858 variable tclass {} 859 variable tccode 860 861 foreach {x code} { 862 0 alnum 863 1 alpha 864 2 digit 865 3 xdigit 866 4 punct 867 5 space 868 } { 869 lappend tclass $code 870 set tccode($code) $x 871 } 872 873 # Number of arguments per ME instruction. 874 # Indexed by instruction code. 875 variable anum {} 876 877 # Mapping between instruction codes and names. 878 variable iname 879 880 foreach {z insn x notes} { 881 0 ict_advance 1 {-- TESTED} 882 1 ict_match_token 2 {-- TESTED} 883 2 ict_match_tokrange 3 {-- TESTED} 884 3 ict_match_tokclass 2 {-- TESTED} 885 4 inc_restore 2 {-- TESTED} 886 5 inc_save 1 {-- TESTED} 887 6 icf_ntcall 1 {-- TESTED} 888 7 icf_ntreturn 0 {-- TESTED} 889 8 iok_ok 0 {-- TESTED} 890 9 iok_fail 0 {-- TESTED} 891 10 iok_negate 0 {-- TESTED} 892 11 icf_jalways 1 {-- TESTED} 893 12 icf_jok 1 {-- TESTED} 894 13 icf_jfail 1 {-- TESTED} 895 14 icf_halt 0 {-- TESTED} 896 15 icl_push 0 {-- TESTED} 897 16 icl_rewind 0 {-- TESTED} 898 17 icl_pop 0 {-- TESTED} 899 18 ier_push 0 {-- TESTED} 900 19 ier_clear 0 {-- TESTED} 901 20 ier_nonterminal 1 {-- TESTED} 902 21 ier_merge 0 {-- TESTED} 903 22 isv_clear 0 {-- TESTED} 904 23 isv_terminal 0 {-- TESTED} 905 24 isv_nonterminal_leaf 1 {-- TESTED} 906 25 isv_nonterminal_range 1 {-- TESTED} 907 26 isv_nonterminal_reduce 1 {-- TESTED} 908 27 ias_push 0 {-- TESTED} 909 28 ias_mark 0 {-- TESTED} 910 29 ias_mrewind 0 {-- TESTED} 911 30 ias_mpop 0 {-- TESTED} 912 } { 913 lappend anum $x 914 set iname($z) $insn 915 set iname($insn) $z 916 } 917} 918 919# ### ### ### ######### ######### ######### 920## Helper commands ((Dis)Assembler, runtime). 921 922proc ::grammar::me::cpu::core::INDEX {list i label} { 923 if {$i eq "end"} { 924 set i [expr {[llength $list] - 1}] 925 } elseif {[regexp {^end-([0-9]+)$} $i -> n]} { 926 set i [expr {[llength $list] - $n -1}] 927 } 928 if { 929 ![string is integer -strict $i] || 930 ($i < 0) || 931 ($i >= [llength $list]) 932 } { 933 return -code error "$label $i" 934 } 935 return $i 936} 937 938proc ::grammar::me::cpu::core::K {x y} {set x} 939 940proc ::grammar::me::cpu::core::Str {str} { 941 upvar 1 pool pool poolh poolh 942 if {![info exists poolh($str)]} { 943 set poolh($str) [llength $pool] 944 lappend pool $str 945 } 946 return $poolh($str) 947} 948 949proc ::grammar::me::cpu::core::Tok {str} { 950 upvar 1 tokmap tokmap ord ord plain plain 951 952 if {[regexp {^([^:]+):(.+)$} $str -> id name]} { 953 if {$plain} { 954 return -code error "Bad assembly, mixing plain and ranked tokens" 955 } 956 if {[info exists ord($id)]} { 957 return -code error "Bad assembly, non-total ordering for $name and $ord($id), at rank $id" 958 } 959 set ord($id) $name 960 set tokmap($name) $id 961 962 return $id 963 } else { 964 if {[array size ord]} { 965 return -code error "Bad assembly, mixing plain and ranked tokens" 966 } 967 set plain 1 968 return [uplevel 1 [list Str $str]] 969 } 970} 971 972proc ::grammar::me::cpu::core::Validate {code {ovar {}} {tvar {}} {jvar {}}} { 973 variable anum 974 variable iname 975 976 # Basic validation of structure ... 977 978 if {[llength $code] != 3} { 979 return -code error "Bad length" 980 } 981 982 foreach {asm pool tokmap} $code break 983 984 if {[llength $tokmap] % 2 == 1} { 985 return -code error "Bad tokmap, expected a dictionary" 986 } 987 988 array set ord {} 989 if {[llength $tokmap] > 0} { 990 foreach {tok rank} $tokmap { 991 if {[info exists ord($rank)]} { 992 return -code error "Bad tokmap, non-total ordering for $tok and $ord($rank), at rank $rank" 993 } 994 set ord($rank) $tok 995 } 996 } 997 998 # Basic validation of ME code: Valid instructions, collect valid 999 # branch target indices 1000 1001 array set target {} 1002 1003 set pc 0 1004 set pcend [llength $asm] 1005 set poolend [llength $pool] 1006 1007 while {$pc < $pcend} { 1008 set target($pc) . 1009 1010 set insn [lindex $asm $pc] 1011 if {($insn < 0) || ($insn > 30)} { 1012 return -code error "Invalid instruction $insn at PC $pc" 1013 } 1014 1015 incr pc 1016 incr pc [lindex $anum $insn] 1017 } 1018 1019 if {$pc > $pcend} { 1020 return -code error "Bad program, last instruction $insn ($iname($insn)) is truncated" 1021 } 1022 1023 # Validation of ME instruction arguments (pool references, branch 1024 # targets, ...) 1025 1026 if {$jvar ne ""} { 1027 upvar 1 $jvar jmp 1028 } 1029 array set jmp {} 1030 1031 set pc 0 1032 while {$pc < $pcend} { 1033 set base $pc 1034 set insn [lindex $asm $pc] ; incr pc 1035 set an [lindex $anum $insn] 1036 1037 if {$an == 1} { 1038 set a [lindex $asm $pc] ; incr pc 1039 } elseif {$an == 2} { 1040 set a [lindex $asm $pc] ; incr pc 1041 set b [lindex $asm $pc] ; incr pc 1042 } elseif {$an == 3} { 1043 set a [lindex $asm $pc] ; incr pc 1044 set b [lindex $asm $pc] ; incr pc 1045 set c [lindex $asm $pc] ; incr pc 1046 } 1047 1048 switch -exact $insn { 1049 0 - 5 - 20 - 24 - 25 - 26 - 1050 a/string { 1051 if {($a < 0) || ($a >= $poolend)} { 1052 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base" 1053 } 1054 } 1055 1 { 1056 # a/tok b/string 1057 if {![llength $tokmap]} { 1058 if {($a < 0) || ($a >= $poolend)} { 1059 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base" 1060 } 1061 } else { 1062 if {![info exists ord($a)]} { 1063 return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base" 1064 } 1065 } 1066 if {($b < 0) || ($b >= $poolend)} { 1067 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base" 1068 } 1069 } 1070 2 { 1071 # a/tokstart b/tokend c/string 1072 1073 if {![llength $tokmap]} { 1074 # a = b = string references. 1075 if {($a < 0) || ($a >= $poolend)} { 1076 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base" 1077 } 1078 if {($b < 0) || ($b >= $poolend)} { 1079 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base" 1080 } 1081 if {$a == $b} { 1082 return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base" 1083 } 1084 if {[string compare [lindex $pool $a] [lindex $pool $b]] > 0} { 1085 return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base" 1086 } 1087 } else { 1088 # tokmap defined: a = b = order rank. 1089 if {![info exists ord($a)]} { 1090 return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base" 1091 } 1092 if {![info exists ord($b)]} { 1093 return -code error "Invalid token rank $b for instruction $insn ($iname($insn)) at $base" 1094 } 1095 if {$a == $b} { 1096 return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base" 1097 } 1098 if {$a > $b} { 1099 return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base" 1100 } 1101 } 1102 if {($c < 0) || ($c >= $poolend)} { 1103 return -code error "Invalid string reference $c for instruction $insn ($iname($insn)) at $base" 1104 } 1105 } 1106 3 { 1107 # a/class(0-5) b/string 1108 if {($a < 0) || ($a > 5)} { 1109 return -code error "Invalid token-class $a for instruction $insn ($iname($insn)) at $base" 1110 } 1111 if {($b < 0) || ($b >= $poolend)} { 1112 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base" 1113 } 1114 } 1115 4 { 1116 # a/branch b/string 1117 if {![info exists target($a)]} { 1118 return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base" 1119 } else { 1120 set jmp($a) . 1121 } 1122 if {($b < 0) || ($b >= $poolend)} { 1123 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base" 1124 } 1125 } 1126 6 - 11 - 12 - 13 - 1127 a/branch { 1128 if {![info exists target($a)]} { 1129 return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base" 1130 } else { 1131 set jmp($base) $a 1132 } 1133 } 1134 default {} 1135 } 1136 } 1137 1138 # All checks passed, code is deemed good enough. 1139 # Caller may have asked for some of the collected 1140 # information. 1141 1142 if {$ovar ne ""} { 1143 upvar 1 $ovar o 1144 array set o [array get ord] 1145 } 1146 if {$tvar ne ""} { 1147 upvar 1 $tvar t 1148 array set t [array get target] 1149 } 1150 return 1151} 1152 1153# ### ### ### ######### ######### ######### 1154## Ready 1155 1156package provide grammar::me::cpu::core 0.2 1157