1# -*- tcl -*- 2# 3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Parser Generator / Transformation - Compile grammar to ME cpu instructions. 5 6# This package assumes to be used from within a PAGE plugin. It uses 7# the API commands listed below. These are identical across the major 8# types of PAGE plugins, allowing this package to be used in reader, 9# transform, and writer plugins. It cannot be used in a configuration 10# plugin, and this makes no sense either. 11# 12# To ensure that our assumption is ok we require the relevant pseudo 13# package setup by the PAGE plugin management code. 14# 15# -----------------+-- 16# page_info | Reporting to the user. 17# page_warning | 18# page_error | 19# -----------------+-- 20# page_log_error | Reporting of internals. 21# page_log_warning | 22# page_log_info | 23# -----------------+-- 24 25# ### ### ### ######### ######### ######### 26## Dumping the input grammar. But not as Tcl or other code. In PEG 27## format again, pretty printing. 28 29# ### ### ### ######### ######### ######### 30## Requisites 31 32# @mdgen NODEP: page::plugin 33 34package require page::plugin ; # S.a. pseudo-package. 35 36package require grammar::me::cpu::gasm 37package require textutil 38package require struct::graph 39 40package require page::analysis::peg::emodes 41package require page::util::quote 42package require page::util::peg 43 44namespace eval ::page::compiler::peg::mecpu { 45 # Get the peg char de/encoder commands. 46 # (unquote, quote'tcl) 47 48 namespace import ::page::util::quote::* 49 namespace import ::page::util::peg::* 50 51 52 namespace eval gas { 53 namespace import ::grammar::me::cpu::gas::begin 54 namespace import ::grammar::me::cpu::gas::done 55 namespace import ::grammar::me::cpu::gas::lift 56 namespace import ::grammar::me::cpu::gas::state 57 namespace import ::grammar::me::cpu::gas::state! 58 } 59 namespace import ::grammar::me::cpu::gas::* 60 rename begin {} 61 rename done {} 62 rename lift {} 63 rename state {} 64 rename state! {} 65} 66 67# ### ### ### ######### ######### ######### 68## Data structures for the generated code. 69 70## All data is held in node attributes of the tree. Per node: 71## 72## asm - List of instructions implementing the node. 73 74 75 76# ### ### ### ######### ######### ######### 77## API 78 79proc ::page::compiler::peg::mecpu {t} { 80 # Resolve the mode hints. Every gen(X) having a value of 'maybe' 81 # (or missing) is for the purposes of this code a 'yes'. 82 83 if {![page::analysis::peg::emodes::compute $t]} { 84 page_error " Unable to generate a ME parser without accept/generate properties" 85 return 86 } 87 88 foreach n [$t nodes] { 89 if {![$t keyexists $n gen] || ([$t get $n gen] eq "maybe")} { 90 $t set $n gen 1 91 } 92 if {![$t keyexists $n acc]} {$t set $n acc 1} 93 } 94 95 # Synthesize a program, then the assembly code. 96 97 mecpu::Synth $t 98 return 99} 100 101# ### ### ### ######### ######### ######### 102## Internal. Helpers 103 104proc ::page::compiler::peg::mecpu::Synth {t} { 105 # Phase 2: Bottom-up, synthesized attributes 106 107 # We use a global graph to capture instructions and their 108 # relations. The graph is then converted into a linear list of 109 # instructions, with proper labeling and jump instructions to 110 # handle all non-linear control-flow. 111 112 set g [struct::graph g] 113 $t set root gas::called {} 114 115 page_info "* Synthesize graph code" 116 117 $t walk root -order post -type dfs n { 118 SynthNode $n 119 } 120 121 status $g ; gdump $g synth 122 remove_unconnected $g ; gdump $g nounconnected 123 remove_dead $g ; gdump $g nodead 124 denop $g ; gdump $g nonops 125 parcmerge $g ; gdump $g parcmerge 126 forwmerge $g ; gdump $g fmerge 127 backmerge $g ; gdump $g bmerge 128 status $g 129 pathlengths $g ; gdump $g pathlen 130 jumps $g ; gdump $g jumps 131 status $g 132 symbols $g $t 133 134 set cc [2code $t $g] 135 #write asm/mecode [join $cc \n] 136 137 statistics $cc 138 139 $t set root asm $cc 140 $g destroy 141 return 142} 143 144proc ::page::compiler::peg::mecpu::SynthNode {n} { 145 upvar 1 t t g g 146 if {$n eq "root"} { 147 set code Root 148 } elseif {[$t keyexists $n symbol]} { 149 set code Nonterminal 150 } elseif {[$t keyexists $n op]} { 151 set code [$t get $n op] 152 } else { 153 return -code error "PANIC. Bad node $n, cannot classify" 154 } 155 156 page_log_info " [np $n] := ([linsert [$t children $n] 0 $code])" 157 158 SynthNode/$code $n 159 return 160} 161 162proc ::page::compiler::peg::mecpu::SynthNode/Root {n} { 163 upvar 1 t t g g 164 165 # Root is the grammar itself. 166 167 set gstart [$t get root start] 168 set gname [$t get root name] 169 170 if {$gstart eq ""} { 171 page_error " No start expression." 172 return 173 } 174 175 gas::begin $g $n halt "<Start Expression> '$gname'" 176 $g node set [Who entry] instruction .C 177 $g node set [Who entry] START . 178 179 Inline $t $gstart sexpr 180 /At sexpr/exit/ok ; /Ok ; Jmp exit/return 181 /At sexpr/exit/fail ; /Fail ; Jmp exit/return 182 183 gas::done --> $t 184 return 185} 186 187proc ::page::compiler::peg::mecpu::SynthNode/Nonterminal {n} { 188 upvar 1 t t g g 189 190 # This is the root of a definition. 191 # 192 # The text is a procedure wrapping the match code of its 193 # expression into the required the nonterminal handling (caching 194 # and such), plus the support code for the expression matcher. 195 196 set sym [$t get $n symbol] 197 set label [$t get $n label] 198 set gen [$t get $n gen] 199 set mode [$t get $n mode] 200 201 set pe [lindex [$t children $n] 0] 202 set egen [$t get $pe gen] 203 204 # -> inc_restore -found-> NOP gen: -> ok -> ias_push -> RETURN 205 # /!found \ / 206 # / \-fail --------->/ 207 # / !gen: -> RETURN 208 # / 209 # \-> icl_push (-> ias_mark) -> (*) -> SV -> inc_save (-> ias_mrewind) -X 210 # 211 # X -ok----> ias_push -> ier_nonterminal 212 # \ / 213 # \-fail ----------/ 214 215 # Poking into the generated instructions, converting the initial 216 # .NOP into a .C'omment. 217 218 set first [gas::begin $g $n !okfail "Nonterminal '$sym'"] 219 $g node set [Who entry] instruction .C 220 $g node set [Who entry] START . 221 222 Cmd inc_restore $label ; /Label restore ; /Ok 223 224 if {$gen} { 225 Bra ; /Label @ 226 /Fail ; Nop ; Exit 227 /At @ 228 /Ok ; Cmd ias_push ; Exit 229 } else { 230 Nop ; Exit 231 } 232 233 /At restore ; /Fail 234 Cmd icl_push ; # Balanced by inc_save (XX) 235 Cmd icl_push ; # Balanced by pop after ier_terminal 236 237 if {$egen} { 238 # [*] Needed for removal of SV's from stack after handling by 239 # this symbol, only if expression actually generates an SV. 240 241 Cmd ias_mark 242 } 243 244 Inline $t $pe subexpr ; /Ok ; Nop ; /Label unified 245 /At subexpr/exit/fail ; /Fail ; Jmp unified 246 /At unified 247 248 switch -exact -- $mode { 249 value {Cmd isv_nonterminal_reduce $label} 250 match {Cmd isv_nonterminal_range $label} 251 leaf {Cmd isv_nonterminal_leaf $label} 252 discard {Cmd isv_clear} 253 default {return -code error "Bad nonterminal mode \"$mode\""} 254 } 255 256 Cmd inc_save $label ; # Implied icl_pop (XX) 257 258 if {$egen} { 259 # See [*], this is the removal spoken about before. 260 Cmd ias_mrewind 261 } 262 263 /Label hold 264 265 if {$gen} { 266 /Ok 267 Cmd ias_push 268 Nop ; /Label merge 269 /At hold ; /Fail ; Jmp merge 270 /At merge 271 } 272 273 Cmd ier_nonterminal "Expected $label" 274 Cmd icl_pop 275 Exit 276 277 gas::done --> $t 278 return 279} 280 281proc ::page::compiler::peg::mecpu::SynthNode/? {n} { 282 upvar 1 t t g g 283 284 # The expression e? is equivalent to e/epsilon. 285 # And like this it is compiled. 286 287 set pe [lindex [$t children $n] 0] 288 289 gas::begin $g $n okfail ? 290 291 # -> icl_push -> ier_push -> (*) -ok--> ier_merge/ok --> icl_pop -ok----------------> OK 292 # \ / 293 # \-fail-> ier_merge/f ---> icl_rewind -> iok_ok -ok-/ 294 295 Cmd icl_push 296 Cmd ier_push 297 298 Inline $t $pe subexpr 299 300 /Ok 301 Cmd ier_merge 302 Cmd icl_pop 303 /Ok ; Exit 304 305 /At subexpr/exit/fail ; /Fail 306 Cmd ier_merge 307 Cmd icl_rewind 308 Cmd iok_ok 309 /Ok ; Exit 310 311 gas::done --> $t 312 return 313} 314 315proc ::page::compiler::peg::mecpu::SynthNode/* {n} { 316 upvar 1 t t g g 317 318 # Kleene star is like a repeated ? 319 320 # Note: Compilation as while loop, as done now 321 # means that the parser has no information about 322 # the intermediate structure of the input in his 323 # cache. 324 325 # Future: Create a helper symbol X and compile 326 # the expression e = e'* as: 327 # e = X; X <- (e' X)? 328 # with match data for X put into the cache. This 329 # is not exactly equivalent, the structure of the 330 # AST is different (right-nested tree instead of 331 # a list). This however can be handled with a 332 # special nonterminal mode to expand the current 333 # SV on the stack. 334 335 # Note 2: This is a transformation which can be 336 # done on the grammar itself, before the actual 337 # backend is let loose. This "strength reduction" 338 # allows us to keep this code here. 339 340 set pe [lindex [$t children $n] 0] 341 set egen [$t get $pe gen] 342 343 # Build instruction graph. 344 345 # /<---------------------------------------------------------------\ 346 # \_ \_ 347 # ---> icl_push -> ier_push -> (*) -ok--> ier_merge/ok --> icl_pop ->/ 348 # \ 349 # \-fail-> ier_merge/f ---> icl_rewind -> iok_ok -> OK 350 351 gas::begin $g $n okfail * 352 353 Cmd icl_push ; /Label header 354 Cmd ier_push 355 356 Inline $t $pe loop 357 358 /Ok 359 Cmd ier_merge 360 Cmd icl_pop 361 Jmp header ; /CloseLoop 362 363 /At loop/exit/fail ; /Fail 364 Cmd ier_merge 365 Cmd icl_rewind 366 Cmd iok_ok 367 /Ok ; Exit 368 369 gas::done --> $t 370 return 371} 372 373proc ::page::compiler::peg::mecpu::SynthNode/+ {n} { 374 upvar 1 t t g g 375 376 # Positive Kleene star x+ is equivalent to x x* 377 # This is how it is compiled. See also the notes 378 # at the * above, they apply in essence here as 379 # well, except that the transformat scheme is 380 # slighty different: 381 # 382 # e = e'* ==> e = X; X <- e' X? 383 384 set pe [lindex [$t children $n] 0] 385 386 # Build instruction graph. 387 388 # icl_push -> ier_push -> (*) -fail-> ier_merge/fl -> icl_rewind -> FAIL 389 # \ 390 # \--ok---> ier_merge/ok -> icl_pop ->\_ 391 # / 392 # /<--------------------------------------------------------/ 393 # / 394 # /<---------------------------------------------------------------\ 395 # \_ \_ 396 # -> icl_push -> ier_push -> (*) -ok--> ier_merge/ok --> icl_pop ->/ 397 # \ 398 # \-fail-> ier_merge/f ---> icl_rewind -> iok_ok -> OK 399 400 gas::begin $g $n okfail + 401 402 Cmd icl_push 403 Cmd ier_push 404 405 Inline $t $pe first 406 /At first/exit/fail ; /Fail 407 Cmd ier_merge 408 Cmd icl_rewind 409 /Fail ; Exit 410 411 /At first/exit/ok ; /Ok 412 Cmd ier_merge 413 Cmd icl_pop 414 415 # Loop copied from Kleene *, it is * 416 417 Cmd icl_push ; /Label header 418 Cmd ier_push 419 420 # For the loop we create the sub-expression instruction graph a 421 # second time. This is done by walking the subtree a second time 422 # and constructing a completely new node set. The result is 423 # imported under a new name. 424 425 set save [gas::state] 426 $t walk $pe -order post -type dfs n {SynthNode $n} 427 gas::state! $save 428 Inline $t $pe loop 429 430 /Ok 431 Cmd ier_merge 432 Cmd icl_pop 433 Jmp header ; /CloseLoop 434 435 /At loop/exit/fail ; /Fail 436 Cmd ier_merge 437 Cmd icl_rewind 438 Cmd iok_ok 439 /Ok ; Exit 440 441 gas::done --> $t 442 return 443} 444 445proc ::page::compiler::peg::mecpu::SynthNode// {n} { 446 upvar 1 t t g g 447 448 set args [$t children $n] 449 450 if {![llength $args]} { 451 error "PANIC. Empty choice." 452 453 } elseif {[llength $args] == 1} { 454 # A choice over one branch is no real choice. The code 455 # generated for the child applies here as well. 456 457 gas::lift $t $n <-- [lindex $args 0] 458 return 459 } 460 461 # Choice over at least two branches. 462 # Build instruction graph. 463 464 # -> BRA 465 # 466 # BRA -> icl_push (-> ias_mark) -> ier_push -> (*) -ok -> ier_merge -> BRA'OK 467 # \-fail -> ier_merge (-> ias_mrewind) -> icl_rewind -> BRA'FAIL 468 # 469 # BRA'FAIL -> BRA 470 # BRA'FAIL -> FAIL (last branch) 471 # 472 # BRA'OK -> icl_pop -> OK 473 474 gas::begin $g $n okfail / 475 476 /Clear 477 Cmd icl_pop ; /Label BRA'OK ; /Ok ; Exit 478 /At entry 479 480 foreach pe $args { 481 set egen [$t get $pe gen] 482 483 # Note: We do not check for static match results. Doing so is 484 # an optimization we can do earlier, directly on the tree. 485 486 Cmd icl_push 487 if {$egen} {Cmd ias_mark} 488 489 Cmd ier_push 490 Inline $t $pe subexpr 491 492 /Ok 493 Cmd ier_merge 494 Jmp BRA'OK 495 496 /At subexpr/exit/fail ; /Fail 497 Cmd ier_merge 498 if {$egen} {Cmd ias_mrewind} 499 Cmd icl_rewind 500 501 # Branch failed. Go to the next branch. Fail completely at 502 # last branch. 503 } 504 505 /Fail ; Exit 506 507 gas::done --> $t 508 return 509} 510 511proc ::page::compiler::peg::mecpu::SynthNode/x {n} { 512 upvar 1 t t g g 513 514 set args [$t children $n] 515 516 if {![llength $args]} { 517 error "PANIC. Empty sequence." 518 519 } elseif {[llength $args] == 1} { 520 # A sequence of one element is no real sequence. The code 521 # generated for the child applies here as well. 522 523 gas::lift $t $n <-- [lindex $args 0] 524 return 525 } 526 527 # Sequence of at least two elements. 528 # Build instruction graph. 529 530 # -> icl_push -> SEG 531 # 532 # SEG (-> ias_mark) -> ier_push -> (*) -ok -> ier_merge -> SEG'OK 533 # \-fail -> ier_merge -> SEG'FAIL 534 # 535 # SEG'OK -> SEG 536 # SEG'OK -> icl_pop -> OK (last segment) 537 # 538 # SEG'FAIL (-> ias_mrewind) -> icl_rewind -> FAIL 539 540 gas::begin $g $n okfail x 541 542 /Clear 543 Cmd icl_rewind ; /Label SEG'FAIL ; /Fail ; Exit 544 545 /At entry 546 Cmd icl_push 547 548 set gen 0 549 foreach pe $args { 550 set egen [$t get $pe gen] 551 if {$egen && !$gen} { 552 set gen 1 553 554 # From here on out is the sequence able to generate 555 # semantic values which have to be canceled when 556 # backtracking. 557 558 Cmd ias_mark ; /Label @mark 559 560 /Clear 561 Cmd ias_mrewind ; Jmp SEG'FAIL ; /Label SEG'FAIL 562 563 /At @mark 564 } 565 566 Cmd ier_push 567 Inline $t $pe subexpr 568 569 /At subexpr/exit/fail ; /Fail 570 Cmd ier_merge 571 Jmp SEG'FAIL 572 573 /At subexpr/exit/ok ; /Ok 574 Cmd ier_merge 575 } 576 577 Cmd icl_pop 578 /Ok ; Exit 579 580 gas::done --> $t 581 return 582} 583 584proc ::page::compiler::peg::mecpu::SynthNode/& {n} { 585 upvar 1 t t g g 586 SynthLookahead $n no 587 return 588} 589 590proc ::page::compiler::peg::mecpu::SynthNode/! {n} { 591 upvar 1 t t g g 592 SynthLookahead $n yes 593 return 594} 595 596proc ::page::compiler::peg::mecpu::SynthNode/dot {n} { 597 upvar 1 t t g g 598 SynthTerminal $n {} "any character" 599 return 600} 601 602proc ::page::compiler::peg::mecpu::SynthNode/epsilon {n} { 603 upvar 1 t t g g 604 605 gas::begin $g $n okfail epsilon 606 607 Cmd iok_ok ; /Ok ; Exit 608 609 gas::done --> $t 610 return 611} 612 613proc ::page::compiler::peg::mecpu::SynthNode/alnum {n} { 614 upvar 1 t t g g 615 SynthClass $n alnum 616 return 617} 618 619proc ::page::compiler::peg::mecpu::SynthNode/alpha {n} { 620 upvar 1 t t g g 621 SynthClass $n alpha 622 return 623} 624 625proc ::page::compiler::peg::mecpu::SynthNode/digit {n} { 626 upvar 1 t t g g 627 SynthClass $n digit 628 return 629} 630 631proc ::page::compiler::peg::mecpu::SynthNode/xdigit {n} { 632 upvar 1 t t g g 633 SynthClass $n xdigit 634 return 635} 636 637proc ::page::compiler::peg::mecpu::SynthNode/punct {n} { 638 upvar 1 t t g g 639 SynthClass $n punct 640 return 641} 642 643proc ::page::compiler::peg::mecpu::SynthNode/space {n} { 644 upvar 1 t t g g 645 SynthClass $n space 646 return 647} 648 649proc ::page::compiler::peg::mecpu::SynthNode/.. {n} { 650 upvar 1 t t g g 651 # Range is [x-y] 652 653 set b [$t get $n begin] 654 set e [$t get $n end] 655 656 set tb [quote'tcl $b] 657 set te [quote'tcl $e] 658 659 set pb [quote'tclstr $b] 660 set pe [quote'tclstr $e] 661 662 SynthTerminal $n [list ict_match_tokrange $tb $te] "\\\[${pb}..${pe}\\\]" 663 return 664} 665 666proc ::page::compiler::peg::mecpu::SynthNode/t {n} { 667 upvar 1 t t g g 668 669 # Terminal node. Primitive matching. 670 # Code is parameterized by gen(X) of this node X. 671 672 set ch [$t get $n char] 673 set tch [quote'tcl $ch] 674 set pch [quote'tclstr $ch] 675 676 SynthTerminal $n [list ict_match_token $tch] $pch 677 return 678} 679 680proc ::page::compiler::peg::mecpu::SynthNode/n {n} { 681 upvar 1 t t g g 682 683 # Nonterminal node. Primitive matching. 684 # The code is parameterized by acc(X) of this node X, and gen(D) 685 # of the invoked nonterminal D. 686 687 set sym [$t get $n sym] 688 set def [$t get $n def] 689 690 gas::begin $g $n okfail call'$sym' 691 692 if {$def eq ""} { 693 # Invokation of an undefined nonterminal. This will always fail. 694 695 Note "Match for undefined symbol '$sym'" 696 Cmdd iok_fail ; /Fail ; Exit 697 gas::done --> $t 698 699 } else { 700 # Combinations 701 # Acc Gen Action 702 # --- --- ------ 703 # 0 0 Plain match 704 # 0 1 Match with canceling of the semantic value. 705 # 1 0 Plain match 706 # 1 1 Plain match 707 # --- --- ------ 708 709 if {[$t get $n acc] || ![$t get $def gen]} { 710 Cmd icf_ntcall sym_$sym ; /Label CALL 711 /Ok ; Exit 712 /Fail ; Exit 713 714 } else { 715 Cmd ias_mark 716 Cmd icf_ntcall sym_$sym ; /Label CALL 717 Cmd ias_mrewind 718 /Ok ; Exit 719 /Fail ; Exit 720 } 721 722 set caller [Who CALL] 723 gas::done --> $t 724 725 $t lappend $def gas::callers $caller 726 $t lappend root gas::called $def 727 } 728 729 return 730} 731 732proc ::page::compiler::peg::mecpu::SynthLookahead {n negated} { 733 upvar 1 g g t t 734 735 # Note: Per the rules about expression modes (! is a lookahead 736 # ____| operator) this node has a mode of 'discard', and its child 737 # ____| has so as well. 738 739 # assert t get n mode == discard 740 # assert t get pe mode == discard 741 742 set op [$t get $n op] 743 set pe [lindex [$t children $n] 0] 744 set eop [$t get $pe op] 745 746 # -> icl_push -> (*) -ok--> icl_rewind -> OK 747 # \--fail-> icl_rewind -> FAIL 748 749 # -> icl_push -> (*) -ok--> icl_rewind -> iok_negate -> FAIL 750 # \--fail-> icl_rewind -> iok_negate -> OK 751 752 gas::begin $g $n okfail [expr {$negated ? "!" : "&"}] 753 754 Cmd icl_push 755 Inline $t $pe subexpr 756 757 /Ok 758 Cmd icl_rewind 759 if {$negated} { Cmd iok_negate ; /Fail } else /Ok ; Exit 760 761 /At subexpr/exit/fail ; /Fail 762 Cmd icl_rewind 763 if {$negated} { Cmd iok_negate ; /Ok } else /Fail ; Exit 764 765 gas::done --> $t 766 return 767} 768 769proc ::page::compiler::peg::mecpu::SynthClass {n op} { 770 upvar 1 t t g g 771 SynthTerminal $n [list ict_match_tokclass $op] <$op> 772 return 773} 774 775proc ::page::compiler::peg::mecpu::SynthTerminal {n cmd msg} { 776 upvar 1 t t g g 777 778 # 4 cases (+/- cmd, +/- sv). 779 # 780 # (A) +cmd+sv 781 # entry -> advance -ok-> match -ok-> sv -> OK 782 # \ \ 783 # \ \-fail----------> FAIL 784 # \-fail----------------------/ 785 # 786 # (B) -cmd+sv 787 # entry -> advance -ok-> sv -> OK 788 # \ 789 # \-fail-----------> FAIL 790 # 791 # (C) +cmd-sv 792 # entry -> advance -ok-> match -ok-> OK 793 # \ \ 794 # \ \-fail---> FAIL 795 # \-fail---------------/ 796 # 797 # (D) -cmd-sv 798 # entry -> advance -ok-> OK 799 # \ 800 # \-fail-----> FAIL 801 802 gas::begin $g $n okfail M'[lindex $cmd 0] 803 804 Cmd ict_advance "Expected $msg (got EOF)" 805 /Fail ; Exit 806 /Ok 807 808 if {[llength $cmd]} { 809 lappend cmd "Expected $msg" 810 eval [linsert $cmd 0 Cmd] 811 /Fail ; Exit 812 /Ok 813 } 814 815 if {[$t get $n gen]} { 816 Cmd isv_terminal 817 /Ok 818 } 819 820 Exit 821 822 gas::done --> $t 823 return 824} 825 826# ### ### ### ######### ######### ######### 827## Internal. Extending the graph of instructions (expression 828## framework, new instructions, (un)conditional sequencing). 829 830# ### ### ### ######### ######### ######### 831## Internal. Working on the graph of instructions. 832 833proc ::page::compiler::peg::mecpu::2code {t g} { 834 page_info "* Generating ME assembler code" 835 836 set insn {} 837 set start [$t get root gas::entry] 838 set cat 0 839 set calls [list $start] 840 841 while {$cat < [llength $calls]} { 842 set now [lindex $calls $cat] 843 incr cat 844 845 set at 0 846 set pending [list $now] 847 848 while {$at < [llength $pending]} { 849 set current [lindex $pending $at] 850 incr at 851 852 while {$current ne ""} { 853 if {[$g node keyexists $current WRITTEN]} break 854 855 insn $g $current insn 856 $g node set $current WRITTEN . 857 858 if {[$g node keyexists $current SAVE]} { 859 lappend pending [$g node get $current SAVE] 860 } 861 if {[$g node keyexists $current CALL]} { 862 lappend calls [$g node get $current CALL] 863 } 864 865 set current [$g node get $current NEXT] 866 if {$current eq ""} break 867 if {[$g node keyexists $current WRITTEN]} { 868 lappend insn [list {} icf_jalways \ 869 [$g node get $current LABEL]] 870 break 871 } 872 873 # Process the following instruction, 874 # if there is any. 875 } 876 } 877 } 878 879 return $insn 880} 881 882proc ::page::compiler::peg::mecpu::insn {g current iv} { 883 upvar 1 $iv insn 884 885 set code [$g node get $current instruction] 886 set args [$g node get $current arguments] 887 888 set label {} 889 if {[$g node keyexists $current LABEL]} { 890 set label [$g node get $current LABEL] 891 } 892 893 lappend insn [linsert $args 0 $label $code] 894 return 895} 896 897if 0 { 898 if {[lindex $ins 0] eq "icf_ntcall"} { 899 set tmp {} 900 foreach b $branches { 901 if {[$g node keyexists $b START]} { 902 set sym [$g node get $b symbol] 903 lappend ins sym_$sym 904 } else { 905 lappend tmp $b 906 } 907 } 908 set branches $tmp 909 } 910} 911 912# ### ### ### ######### ######### ######### 913## Optimizations. 914# 915## I. Remove all nodes which are not connected to anything. 916## There should be none. 917 918proc ::page::compiler::peg::mecpu::remove_unconnected {g} { 919 page_info "* Remove unconnected instructions" 920 921 foreach n [$g nodes] { 922 if {[$g node degree $n] == 0} { 923 page_error "$n ([printinsn $g $n])" 924 page_error "Found unconnected node. This should not have happened." 925 page_error "Removing the bad node." 926 927 $g node delete $n 928 } 929 } 930} 931 932proc ::page::compiler::peg::mecpu::remove_dead {g} { 933 page_info "* Remove dead instructions" 934 935 set count 0 936 set runs 0 937 set hasdead 1 938 while {$hasdead} { 939 set hasdead 0 940 foreach n [$g nodes] { 941 if {[$g node keyexists $n START]} continue 942 if {[$g node degree -in $n] > 0} continue 943 944 page_log_info " [np $n] removed, dead ([printinsn $g $n])" 945 946 $g node delete $n 947 948 set hasdead 1 949 incr count 950 } 951 incr runs 952 } 953 954 page_info " Removed [plural $count instruction] in [plural $runs run]" 955 return 956} 957 958# ### ### ### ######### ######### ######### 959## Optimizations. 960# 961## II. We have lots of .NOP instructions in the control flow, as part 962## of the framework. They made the handling of expressions easier, 963## providing clear and fixed anchor nodes to connect to from 964## inside and outside, but are rather like the epsilon-transitions 965## in a (D,N)FA. Now is the time to get rid of them. 966# 967## We keep the .C'omments, and explicit .BRA'nches. 968## We should not have any .NOP which is a dead-end (without 969## successor), nor should we find .NOPs with more than one 970## successor. The latter should have been .BRA'nches. Both 971## situations are reported on. Dead-ends we 972## remove. Multi-destination NOPs we keep. 973# 974## Without the nops in place to confus the flow we can perform a 975## series peep-hole optimizations to merge/split branches. 976 977proc ::page::compiler::peg::mecpu::denop {g} { 978 # Remove the .NOPs and reroute control flow. We keep the pseudo 979 # instructions for comments (.C) and the explicit branch points 980 # (.BRA). 981 982 page_info "* Removing the helper .NOP instructions." 983 984 set count 0 985 foreach n [$g nodes] { 986 # Skip over nodes already deleted by a previous iteration. 987 if {[$g node get $n instruction] ne ".NOP"} continue 988 989 # We keep branching .NOPs, and warn user. There shouldn't be 990 # any. such should explicit bnrachpoints. 991 992 set destinations [$g arcs -out $n] 993 994 if {[llength $destinations] > 1} { 995 page_error "$n ([printinsn $g $n])" 996 page_error "Found a .NOP with more than one destination." 997 page_error "This should have been a .BRA instruction." 998 page_error "Not removed. Internal error. Fix the transformation." 999 continue 1000 } 1001 1002 # Nops without a destination, dead-end's are not wanted. They 1003 # should not exist either too. We will do a general dead-end 1004 # and dead-start removal as well. 1005 1006 if {[llength $destinations] < 1} { 1007 page_error "$n ([printinsn $g $n])" 1008 page_error "Found a .NOP without any destination, i.e. a dead end." 1009 page_error "This should not have happened. Removed the node." 1010 1011 $g node delete $n 1012 continue 1013 } 1014 1015 page_log_info " [np $n] removed, updated cflow ([printinsn $g $n])" 1016 1017 # As there is exactly one destination we can now reroute all 1018 # incoming arcs around the nop to the new destination. 1019 1020 set target [$g arc target [lindex $destinations 0]] 1021 foreach a [$g arcs -in $n] { 1022 $g arc move-target $a $target 1023 } 1024 1025 $g node delete $n 1026 incr count 1027 } 1028 1029 page_info " Removed [plural $count instruction]" 1030 return 1031} 1032 1033 1034# ### ### ### ######### ######### ######### 1035## Optimizations. 1036# 1037 1038# Merge parallel arcs (remove one, make the other unconditional). 1039 1040proc ::page::compiler::peg::mecpu::parcmerge {g} { 1041 page_info "* Search for identical parallel arcs and merge them" 1042 1043 #puts [join [info loaded] \n] /seg.fault induced with tcllibc! - tree! 1044 1045 set count 0 1046 foreach n [$g nodes] { 1047 set arcs [$g arcs -out $n] 1048 1049 if {[llength $arcs] < 2} continue 1050 if {[llength $arcs] > 2} { 1051 page_error " $n ([printinsn $g $n])" 1052 page_error " Instruction has more than two destinations." 1053 page_error " That is not possible. Internal error." 1054 continue 1055 } 1056 # Two way branch. Both targets the same ? 1057 1058 foreach {a b} $arcs break 1059 1060 if {[$g arc target $a] ne [$g arc target $b]} continue 1061 1062 page_log_info " [np $n] outbound arcs merged ([printinsn $g $n])" 1063 1064 $g arc set $a condition always 1065 $g arc delete $b 1066 1067 incr count 2 1068 } 1069 1070 page_info " Merged [plural $count arc]" 1071 return 1072} 1073 1074# Use knowledge of the match status before and after an instruction to 1075# label the arcs a bit better (This may guide the forward and backward 1076# merging.). 1077 1078# Forward merging of instructions. 1079# An ok/fail decision is done as late as possible. 1080# 1081# /- ok ---> Y -> U /- ok ---> U 1082# X ==> X -> Y 1083# \- fail -> Y -> V \- fail -> V 1084 1085# The Y must not have additional inputs. This more complex case we 1086# will look at later. 1087 1088proc ::page::compiler::peg::mecpu::forwmerge {g} { 1089 page_info "* Forward merging of identical instructions" 1090 page_info " Delaying decisions" 1091 set count 0 1092 set runs 0 1093 1094 set merged 1 1095 while {$merged} { 1096 set merged 0 1097 foreach n [$g nodes] { 1098 # Skip nodes already killed in previous rounds. 1099 if {![$g node exists $n]} continue 1100 1101 set outbound [$g arcs -out $n] 1102 if {[llength $outbound] != 2} continue 1103 1104 foreach {aa ab} $outbound break 1105 set na [$g arc target $aa] 1106 set nb [$g arc target $ab] 1107 1108 set ia [$g node get $na instruction][$g node get $na arguments] 1109 set ib [$g node get $nb instruction][$g node get $nb arguments] 1110 if {$ia ne $ib} continue 1111 1112 # Additional condition: Inbounds in the targets not > 1 1113 1114 if {([$g node degree -in $na] > 1) || 1115 ([$g node degree -in $nb] > 1)} continue 1116 1117 page_log_info " /Merge [np $n] : [np $na] <- [np $nb] ([printinsn $g $na])" 1118 1119 # Label all arcs out of na with the condition of the arc 1120 # into it. Ditto for the arcs out of nb. The latter also 1121 # get na as their new origin. The arcs out of n relabeled 1122 # to always. The nb is deleted. This creates the desired 1123 # control structure without having to create a new node 1124 # and filling it. We simply use na, discard nb, and 1125 # properly rewrite the arcs to have the correct 1126 # conditions. 1127 1128 foreach a [$g arcs -out $na] { 1129 $g arc set $a condition [$g arc get $aa condition] 1130 } 1131 foreach a [$g arcs -out $nb] { 1132 $g arc set $a condition [$g arc get $ab condition] 1133 $g arc move-source $a $na 1134 } 1135 $g arc set $aa condition always 1136 $g node delete $nb 1137 set merged 1 1138 incr count 1139 } 1140 incr runs 1141 } 1142 1143 # NOTE: This may require a parallel arc merge, with identification 1144 # of merge-able arcs based on the arc condition, i.e. labeling. 1145 1146 page_info " Merged [plural $count instruction] in [plural $runs run]" 1147 return 1148} 1149 1150# Backward merging of instructions. 1151# Common backends are put together. 1152# 1153# U -> Y ->\ U ->\ 1154# -> X ==> -> Y -> X 1155# V -> Y ->/ V ->/ 1156 1157# Note. It is possible for an instruction to be amenable to both for- 1158# and backward merging. No heuristics are known to decide which is 1159# better. 1160 1161proc ::page::compiler::peg::mecpu::backmerge {g} { 1162 page_info "* Backward merging of identical instructions" 1163 page_info " Unifying paths" 1164 set count 0 1165 set runs 0 1166 1167 set merged 1 1168 while {$merged} { 1169 set merged 0 1170 foreach n [$g nodes] { 1171 # Skip nodes already killed in previous rounds. 1172 if {![$g node exists $n]} continue 1173 1174 set inbound [$g arcs -in $n] 1175 if {[llength $inbound] < 2} continue 1176 1177 # We have more than 1 inbound arcs on this node. Check all 1178 # pairs of pre-decessors for possible unification. 1179 1180 # Additional condition: Outbounds in the targets not > 1 1181 # We check in different levels, to avoid redundant calls. 1182 1183 while {[llength $inbound] > 2} { 1184 set aa [lindex $inbound 0] 1185 set tail [lrange $inbound 1 end] 1186 1187 set na [$g arc source $aa] 1188 if {[$g node degree -out $na] > 1} { 1189 set inbound $tail 1190 continue 1191 } 1192 1193 set inbound {} 1194 foreach ab $tail { 1195 set nb [$g arc source $ab] 1196 if {[$g node degree -out $nb] > 1} continue 1197 1198 set ia [$g node get $na instruction][$g node get $na arguments] 1199 set ib [$g node get $nb instruction][$g node get $nb arguments] 1200 1201 if {$ia ne $ib} { 1202 lappend inbound $ab 1203 continue 1204 } 1205 1206 page_log_info " \\Merge [np $n] : [np $na] <- [np $nb] ([printinsn $g $na])" 1207 1208 # Discard the second node in the pair. Move all 1209 # arcs inbound into it so that they reach the 1210 # first node instead. 1211 1212 foreach a [$g arcs -in $nb] {$g arc move-target $a $na} 1213 $g node delete $nb 1214 set merged 1 1215 incr count 1216 } 1217 } 1218 } 1219 incr runs 1220 } 1221 1222 page_info " Merged [plural $count instruction] in [plural $runs run]" 1223 return 1224} 1225 1226# ### ### ### ######### ######### ######### 1227 1228proc ::page::compiler::peg::mecpu::pathlengths {g} { 1229 page_info "* Find maximum length paths" 1230 1231 set pending [llength [$g nodes]] 1232 1233 set nodes {} 1234 set loops {} 1235 foreach n [$g nodes] { 1236 $g node set $n WAIT [$g node degree -out $n] 1237 set insn [$g node get $n instruction] 1238 if {($insn eq "icf_halt") || ($insn eq "icf_ntreturn")} { 1239 lappend nodes $n 1240 } 1241 if {[$g node keyexists $n LOOP]} { 1242 lappend loops $n 1243 } 1244 } 1245 1246 set level 0 1247 while {[llength $nodes]} { 1248 incr pending -[llength $nodes] 1249 set nodes [closure $g $nodes $level] 1250 incr level 1251 } 1252 1253 if {[llength $loops]} { 1254 page_info " Loop levels" 1255 1256 set nodes $loops 1257 while {[llength $nodes]} { 1258 incr pending -[llength $nodes] 1259 set nodes [closure $g $nodes $level] 1260 incr level 1261 } 1262 } 1263 1264 if {$pending} { 1265 page_info " Remainder" 1266 1267 while {$pending} { 1268 set nodes {} 1269 foreach n [$g nodes] { 1270 if {[$g node keyexists $n LEVEL]} continue 1271 if {[$g node get $n WAIT] < [$g node degree -out $n]} { 1272 lappend nodes $n 1273 } 1274 } 1275 while {[llength $nodes]} { 1276 incr pending -[llength $nodes] 1277 set nodes [closure $g $nodes $level] 1278 incr level 1279 } 1280 } 1281 } 1282 return 1283} 1284 1285proc ::page::compiler::peg::mecpu::closure {g nodes level} { 1286 page_log_info " \[[format %6d $level]\] : $nodes" 1287 1288 foreach n $nodes {$g node set $n LEVEL $level} 1289 1290 set tmp {} 1291 foreach n $nodes { 1292 foreach pre [$g nodes -in $n] { 1293 # Ignore instructions already given a level. 1294 if {[$g node keyexists $pre LEVEL]} continue 1295 $g node set $pre WAIT [expr {[$g node get $pre WAIT] - 1}] 1296 if {[$g node get $pre WAIT] > 0} continue 1297 lappend tmp $pre 1298 } 1299 } 1300 return [lsort -uniq -dict $tmp] 1301} 1302 1303proc ::page::compiler::peg::mecpu::jumps {g} { 1304 page_info "* Insert explicit jumps and branches" 1305 1306 foreach n [$g nodes] { 1307 # Inbound > 1, at least one is from a jump, so a label is 1308 # needed. 1309 1310 if {[llength [$g arcs -in $n]] > 1} { 1311 set go bra[string range $n 4 end] 1312 $g node set $n LABEL $go 1313 } 1314 1315 set darcs [$g arcs -out $n] 1316 1317 if {[llength $darcs] == 0} { 1318 $g node set $n NEXT "" 1319 continue 1320 } 1321 1322 if {[llength $darcs] == 1} { 1323 set da [lindex $darcs 0] 1324 set dn [$g arc target $da] 1325 1326 if {[$g node get $dn LEVEL] > [$g node get $n LEVEL]} { 1327 # Flow is backward, an uncond. jump 1328 # is needed here. 1329 1330 set go bra[string range $dn 4 end] 1331 $g node set $dn LABEL $go 1332 set j [$g node insert] 1333 $g arc move-target $da $j 1334 $g node set $j instruction icf_jalways 1335 $g node set $j arguments $go 1336 1337 $g arc insert $j $dn 1338 1339 $g node set $n NEXT $j 1340 $g node set $j NEXT "" 1341 } else { 1342 $g node set $n NEXT $dn 1343 } 1344 continue 1345 } 1346 1347 set aok {} 1348 set afl {} 1349 foreach a $darcs { 1350 if {[$g arc get $a condition] eq "ok"} { 1351 set aok $a 1352 } else { 1353 set afl $a 1354 } 1355 } 1356 set nok [$g arc target $aok] 1357 set nfl [$g arc target $afl] 1358 1359 if {[$g node get $n instruction] eq "inc_restore"} { 1360 set go bra[string range $nok 4 end] 1361 $g node set $nok LABEL $go 1362 1363 $g node set $n NEXT $nfl 1364 $g node set $n SAVE $nok 1365 1366 $g node set $n arguments [linsert [$g node get $n arguments] 0 $go] 1367 continue 1368 } 1369 1370 if {[$g node get $n instruction] ne ".BRA"} { 1371 set bra [$g node insert] 1372 $g arc move-source $aok $bra 1373 $g arc move-source $afl $bra 1374 $g arc insert $n $bra 1375 $g node set $n NEXT $bra 1376 set n $bra 1377 } 1378 1379 if {[$g node get $nok LEVEL] > [$g node get $nfl LEVEL]} { 1380 # Ok branch is direct, Fail is jump. 1381 1382 $g node set $n NEXT $nok 1383 $g node set $n SAVE $nfl 1384 1385 set go bra[string range $nfl 4 end] 1386 $g node set $nfl LABEL $go 1387 $g node set $n instruction icf_jfail 1388 $g node set $n arguments $go 1389 } else { 1390 1391 # Fail branch is direct, Ok is jump. 1392 1393 $g node set $n NEXT $nfl 1394 $g node set $n SAVE $nok 1395 1396 set go bra[string range $nok 4 end] 1397 $g node set $nok LABEL $go 1398 $g node set $n instruction icf_jok 1399 $g node set $n arguments $go 1400 } 1401 } 1402} 1403 1404proc ::page::compiler::peg::mecpu::symbols {g t} { 1405 page_info "* Label subroutine heads" 1406 1407 # Label and mark the instructions where subroutines begin. 1408 # These markers are used by 2code to locate all actually 1409 # used subroutines. 1410 1411 foreach def [lsort -uniq [$t get root gas::called]] { 1412 set gdef [$t get $def gas::entry] 1413 foreach caller [$t get $def gas::callers] { 1414 1415 # Skip callers which are gone because of optimizations. 1416 if {![$g node exists $caller]} continue 1417 1418 $g node set $caller CALL $gdef 1419 $g node set $gdef LABEL \ 1420 [lindex [$g node set $caller arguments] 0] 1421 } 1422 } 1423 return 1424} 1425 1426# ### ### ### ######### ######### ######### 1427 1428proc ::page::compiler::peg::mecpu::statistics {code} { 1429 return 1430 # disabled 1431 page_info "* Statistics" 1432 statistics_si $code 1433 1434 # All higher order statistics are done only on the instructions in 1435 # a basic block, i.e. a linear sequence. We are looking for 1436 # high-probability blocks in itself, and then also for 1437 # high-probability partials. 1438 1439 set blocks [basicblocks $code] 1440 1441 # Basic basic block statistics (full blocks) 1442 1443 Init bl 1444 foreach b $blocks {Incr bl($b)} 1445 wrstat bl asm/statistics_bb.txt 1446 wrstatk bl asm/statistics_bbk.txt 1447 1448 # Statistics of all partial blocks, i.e. all possible 1449 # sub-sequences with length > 1. 1450 1451 Init ps 1452 foreach b $blocks { 1453 for {set s 0} {$s < [llength $b]} {incr s} { 1454 for {set e [expr {$s + 1}]} {$e < [llength $b]} {incr e} { 1455 Incr ps([lrange $b $s $e]) $bl($b) 1456 } 1457 } 1458 } 1459 1460 wrstat ps asm/statistics_ps.txt 1461 wrstatk ps asm/statistics_psk.txt 1462 return 1463} 1464 1465proc ::page::compiler::peg::mecpu::statistics_si {code} { 1466 page_info " Single instruction probabilities." 1467 1468 # What are the most used instructions, statically speaking, 1469 # without considering context ? 1470 1471 Init si 1472 foreach i $code { 1473 foreach {label name} $i break 1474 if {$name eq ".C"} continue 1475 Incr si($name) 1476 } 1477 1478 wrstat si asm/statistics_si.txt 1479 return 1480} 1481 1482proc ::page::compiler::peg::mecpu::Init {v} { 1483 upvar 1 $v var total total 1484 array set var {} 1485 set total 0 1486 return 1487} 1488 1489proc ::page::compiler::peg::mecpu::Incr {v {n 1}} { 1490 upvar 1 $v var total total 1491 if {![info exists var]} {set var $n ; incr total ; return} 1492 incr var $n 1493 incr total $n 1494 return 1495} 1496 1497proc ::page::compiler::peg::mecpu::wrstat {bv file} { 1498 upvar 1 $bv buckets total total 1499 1500 set tmp {} 1501 foreach {name count} [array get buckets] { 1502 lappend tmp [list $name $count] 1503 } 1504 1505 set lines {} 1506 lappend lines "Total: $total" 1507 1508 set half [expr {$total / 2}] 1509 set down $total 1510 1511 foreach item [lsort -index 1 -decreasing -integer [lsort -index 0 $tmp]] { 1512 foreach {key count} $item break 1513 1514 set percent [format %6.2f [expr {$count*100.0/$total}]]% 1515 set fcount [format %8d $count] 1516 1517 lappend lines " $fcount $percent $key" 1518 incr down -$count 1519 if {$half && ($down < $half)} { 1520 lappend lines ** 1521 set half 0 1522 } 1523 } 1524 1525 write $file [join $lines \n]\n 1526 return 1527} 1528 1529proc ::page::compiler::peg::mecpu::wrstatk {bv file} { 1530 upvar 1 $bv buckets total total 1531 1532 set tmp {} 1533 foreach {name count} [array get buckets] { 1534 lappend tmp [list $name $count] 1535 } 1536 1537 set lines {} 1538 lappend lines "Total: $total" 1539 1540 set half [expr {$total / 2}] 1541 set down $total 1542 1543 foreach item [lsort -index 0 [lsort -index 1 -decreasing -integer $tmp]] { 1544 foreach {key count} $item break 1545 1546 set percent [format %6.2f [expr {$count*100.0/$total}]]% 1547 set fcount [format %8d $count] 1548 1549 lappend lines " $fcount $percent $key" 1550 incr down -$count 1551 if {$down < $half} { 1552 lappend lines ** 1553 set half -1 1554 } 1555 } 1556 1557 write $file [join $lines \n]\n 1558 return 1559} 1560 1561proc ::page::compiler::peg::mecpu::basicblocks {code} { 1562 set blocks {} 1563 set block {} 1564 1565 foreach i $code { 1566 foreach {label name} $i break 1567 if { 1568 ($name eq ".C") || 1569 ($name eq "icf_jok") || 1570 ($name eq "icf_jfail") || 1571 ($name eq "icf_jalways") || 1572 ($name eq "icf_ntreturn") 1573 } { 1574 # Jumps stop a block, and are not put into the block 1575 # Except if the block is of length 1. Then it is of 1576 # interest to see if certain combinations are used 1577 # often. 1578 1579 if {[llength $block]} { 1580 if {[llength $block] == 1} {lappend block $name} 1581 lappend blocks $block 1582 } 1583 set block {} 1584 continue 1585 } elseif {$label ne ""} { 1586 # A labeled instruction starts a new block and belongs to 1587 # it. Note that the previous block is saved only if it is 1588 # of length > 1. A single instruction block is not 1589 # something we can optimize. 1590 1591 if {[llength $block] > 1} {lappend blocks $block} 1592 set block [list $name] 1593 continue 1594 } 1595 # Extend current block 1596 lappend block $name 1597 } 1598 1599 if {[llength $block]} {lappend blocks $block} 1600 return $blocks 1601} 1602 1603# ### ### ### ######### ######### ######### 1604 1605proc ::page::compiler::peg::mecpu::printinsn {g n} { 1606 return "[$g node get $n instruction] <[$g node get $n arguments]>" 1607} 1608 1609proc ::page::compiler::peg::mecpu::plural {n prefix} { 1610 return "$n ${prefix}[expr {$n == 1 ? "" : "s"}]" 1611} 1612 1613proc ::page::compiler::peg::mecpu::np {n} { 1614 format %-*s 8 $n 1615} 1616 1617proc ::page::compiler::peg::mecpu::status {g} { 1618 page_info "[plural [llength [$g nodes]] instruction]" 1619 return 1620} 1621 1622proc ::page::compiler::peg::mecpu::gdump {g file} { 1623 return 1624 # disabled 1625 variable gnext 1626 page_info " %% Saving graph to \"$file\" %%" 1627 write asm/[format %02d $gnext]_${file}.sgr [$g serialize] 1628 incr gnext 1629 return 1630} 1631 1632# ### ### ### ######### ######### ######### 1633## Internal. Strings. 1634 1635namespace eval ::page::compiler::peg::mecpu { 1636 variable gnext 0 1637} 1638 1639# ### ### ### ######### ######### ######### 1640## Ready 1641 1642package provide page::compiler::peg::mecpu 0.1.1 1643