1# peg_to_param.tcl -- 2# 3# Conversion of PEG to C PARAM, customizable text blocks. 4# 5# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net> 6# 7# See the file "license.terms" for information on usage and redistribution 8# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 9# 10# RCS: @(#) $Id: pt_peg_to_cparam.tcl,v 1.2 2010/04/07 19:40:54 andreas_kupries Exp $ 11 12# This package takes the canonical serialization of a parsing 13# expression grammar and produces text in PARAM assembler, i.e. 14# readable machine code for the PARAM virtual machine. 15 16## NOTE: Should have cheat sheet of PARAM instructions (which parts of 17## the arch state they touch, and secondly, bigger effects). 18 19# ### ### ### ######### ######### ######### 20## Requisites 21 22package require Tcl 8.5 23package require pt::peg ; # Verification that the input 24 # is proper. 25package require pt::pe ; # Walking an expression. 26package require pt::pe::op ; # String/Class fusing 27package require text::write ; # Text generation support 28package require char 29 30# ### ### ### ######### ######### ######### 31## 32 33namespace eval ::pt::peg::to::cparam { 34 namespace export \ 35 reset configure convert 36 37 namespace ensemble create 38} 39 40# ### ### ### ######### ######### ######### 41## API. 42 43proc ::pt::peg::to::cparam::reset {} { 44 variable template @code@ ; # -template 45 variable name a_pe_grammar ; # -name 46 variable file unknown ; # -file 47 variable user unknown ; # -user 48 variable self {} ; # -self-command 49 variable ns {} ; # -namespace 50 variable def static ; # -fun-qualifier 51 variable main __main ; # -main 52 variable indent 0 ; # -indent 53 variable prelude {} ; # -prelude 54 variable statedecl {RDE_PARAM p} ; # -state-decl 55 variable stateref {p} ; # -state-ref 56 variable strings p_string ; # -string-varname 57 return 58} 59 60proc ::pt::peg::to::cparam::configure {args} { 61 variable template 62 variable name 63 variable file 64 variable user 65 variable self 66 variable ns 67 variable def 68 variable main 69 variable omap 70 variable indent 71 variable prelude 72 variable statedecl 73 variable stateref 74 variable strings 75 76 if {[llength $args] == 0} { 77 return [list \ 78 -file $file \ 79 -fun-qualifier $def \ 80 -indent $indent \ 81 -main $main \ 82 -name $name \ 83 -namespace $ns \ 84 -self-command $self \ 85 -state-decl $statedecl \ 86 -state-ref $stateref \ 87 -string-varname $strings \ 88 -template $template \ 89 -user $user \ 90 ] 91 } elseif {[llength $args] == 1} { 92 lassign $args option 93 set variable [string range $option 1 end] 94 if {[info exists omap($variable)]} { 95 return [set $omap($variable)] 96 } else { 97 return -code error "Expected one of -file, -fun-qualifier, -indent, -main, -name, -namespace, -self-command, -state-decl, -state-ref, -string-varname, -template, or -user, got \"$option\"" 98 } 99 } elseif {[llength $args] % 2 == 0} { 100 foreach {option value} $args { 101 set variable [string range $option 1 end] 102 if {![info exists omap($variable)]} { 103 return -code error "Expected one of -file, -fun-qualifier, -indent, -main, -name, -namespace, -self-command, -state-decl, -state-ref, -string-varname, -template, or -user, got \"$option\"" 104 } 105 } 106 foreach {option value} $args { 107 set variable $omap([string range $option 1 end]) 108 switch -exact -- $variable { 109 template { 110 if {$value eq {}} { 111 return -code error "Expected template, got the empty string" 112 } 113 } 114 indent { 115 if {![string is integer -strict $value] || ($value < 0)} { 116 return -code error "Expected int > 0, got \"$value\"" 117 } 118 } 119 statedecl - 120 stateref - 121 strings - 122 self - 123 def - 124 ns - 125 main - 126 name - 127 file - 128 user { } 129 } 130 set $variable $value 131 } 132 } else { 133 return -code error {wrong#args, expected option value ...} 134 } 135} 136 137proc ::pt::peg::to::cparam::convert {serial} { 138 variable Op::Asm::cache 139 variable template 140 variable name 141 variable file 142 variable user 143 variable self 144 variable ns 145 variable def 146 variable main 147 variable indent 148 variable prelude 149 variable statedecl 150 variable stateref 151 variable strings 152 153 Op::Asm::Setup 154 155 ::pt::peg verify-as-canonical $serial 156 157 # Unpack the serialization, known as canonical 158 array set peg $serial 159 array set peg $peg(pt::grammar::peg) 160 unset peg(pt::grammar::peg) 161 162 set modes {} 163 foreach {symbol symdef} $peg(rules) { 164 lassign $symdef _ is _ mode 165 lappend modes $symbol $mode 166 } 167 168 text::write reset 169 Op::Asm::Header {Declaring the parse functions} 170 text::write /line 171 text::write store FORWARD 172 173 text::write clear 174 set blocks {} 175 176 # Translate all expressions/symbols, results are stored in 177 # text::write blocks, command results are the block ids. 178 179 set start [pt::pe::op flatten \ 180 [pt::pe::op fusechars \ 181 [pt::pe::op flatten \ 182 $peg(start)]]] 183 184 lappend blocks [set start [Expression $start $modes]] 185 186 foreach {symbol symdef} $peg(rules) { 187 lassign $symdef _ is _ mode 188 set is [pt::pe::op flatten \ 189 [pt::pe::op fusechars \ 190 [pt::pe::op flatten \ 191 $is]]] 192 lappend blocks [Symbol $symbol $mode $is $modes] 193 } 194 195 # Assemble the output from the stored blocks. 196 text::write clear 197 text::write recall FORWARD 198 text::write /line 199 200 Op::Asm::Header {Precomputed table of strings (symbols, error messages, etc.).} 201 text::write /line 202 set n [llength $cache(_strings)] 203 text::write field static char const* @strings@ \[$n\] = \{ 204 text::write /line 205 foreach s [lrange $cache(_strings) 0 end-1] { 206 text::write field " " ${s}, 207 text::write /line 208 } 209 text::write field " " [lindex $cache(_strings) end] 210 text::write /line 211 text::write field \}\; 212 text::write /line 213 text::write /line 214 215 Op::Asm::Header {Grammar Start Expression} 216 Op::Asm::FunStart @main@ 217 Op::Asm::Call $start 0 218 Op::Asm::CStmt return 219 Op::Asm::FunClose 220 221 foreach b $blocks { 222 Op::Asm::Use $b 223 text::write /line 224 } 225 226 # At last retrieve the fully assembled result and integrate with 227 # the chosen template. 228 229 set code [text::write get] 230 if {$indent} { 231 set code [Indent $code $indent] 232 } 233 234 set xprelude $prelude ; if {$xprelude ne {}} { set xprelude " $xprelude" } 235 set xself $self ; if {$xself ne {}} { append xself { } } 236 237 set code [string map \ 238 [list \ 239 @user@ $user \ 240 @format@ C/PARAM \ 241 @file@ $file \ 242 @name@ $name \ 243 @code@ $code] $template] 244 set code [string map \ 245 [list \ 246 @statedecl@ $statedecl \ 247 @stateref@ $stateref \ 248 @strings@ $strings \ 249 { @prelude@} $xprelude \ 250 {@self@ } $xself \ 251 @def@ $def \ 252 @ns@ $ns \ 253 @main@ $main] $code] 254 255 return $code 256 # ### ### ### ######### ######### ######### 257} 258 259# ### ### ### ######### ######### ######### 260## Internals 261 262proc ::pt::peg::to::cparam::Indent {text n} { 263 set b [string repeat { } $n] 264 return $b[join [split $text \n] \n$b] 265} 266 267proc ::pt::peg::to::cparam::Expression {expression modes} { 268 return [pt::pe bottomup \ 269 [list [namespace current]::Op $modes] \ 270 $expression] 271} 272 273proc ::pt::peg::to::cparam::Symbol {symbol mode rhs modes} { 274 275 set expression [Expression $rhs $modes] 276 277 text::write clear 278 Op::Asm::Header "$mode Symbol '$symbol'" 279 text::write store FUN_HEADER 280 281 Op::Asm::Start 282 Op::Asm::ReExpression $symbol 283 Op::Asm::GenAST $expression 284 Op::Asm::PE $rhs 285 286 set gen [dict get $result gen] 287 288 Op::Asm::Function sym_$symbol { 289 290 set msg [Op::Asm::String [list n $symbol]] 291 set symbol [Op::Asm::String $symbol] 292 293 # We have six possibilites for the combination of AST node 294 # generation by the rhs and AST generation by the symbol. Two 295 # of these (leaf/0, value/0 coincide, leaving 5). This 296 # controls the use of AS/ARS instructions. 297 298 switch -exact -- $mode/$gen { 299 value/1 { 300 # Generate value for symbol, rhs may have generated 301 # AST nodes as well, keep rhs 302 303 Op::Asm::CBlock if (rde_param_i_symbol_start_d (@stateref@, $symbol)) return \; 304 Op::Asm::Call $expression 305 Op::Asm::Ins symbol_done_d_reduce $symbol $msg 306 307 #Op::Asm::CBlock if (!rde_param_i_symbol_restore (@stateref@, $symbol)) \{ 308 #Op::Asm::>>> 4 309 310 #Op::Asm::Ins loc_push 311 #Op::Asm::Ins ast_push 312 313 #Op::Asm::Call $expression 314 315 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 316 #Op::Asm::>>> 4 317 #Op::Asm::Ins value_reduce $symbol 318 #Op::Asm::<<< 4 319 #Op::Asm::CBlock \} else \{ 320 #Op::Asm::>>> 4 321 #Op::Asm::Ins value_clear 322 #Op::Asm::<<< 4 323 #Op::Asm::CBlock \} 324 325 #Op::Asm::Ins symbol_save $symbol 326 #Op::Asm::Ins error_nonterminal $symbol 327 328 #Op::Asm::Ins ast_pop_rewind 329 #Op::Asm::Ins loc_pop_discard 330 331 #Op::Asm::<<< 4 332 #Op::Asm::CBlock \} 333 334 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 335 #Op::Asm::>>> 4 336 #Op::Asm::Ins ast_value_push 337 #Op::Asm::<<< 4 338 #Op::Asm::CBlock \} 339 } 340 leaf/0 - 341 value/0 { 342 # Generate value for symbol, rhs cannot generate its 343 # own AST nodes => leaf/0. 344 345 Op::Asm::CBlock if (rde_param_i_symbol_start (@stateref@, $symbol)) return \; 346 Op::Asm::Call $expression 347 Op::Asm::Ins symbol_done_leaf $symbol $msg 348 349 #Op::Asm::CBlock if (!rde_param_i_symbol_restore (@stateref@, $symbol)) \{ 350 #Op::Asm::>>> 4 351 352 #Op::Asm::Ins loc_push 353 354 #Op::Asm::Call $expression 355 356 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 357 #Op::Asm::>>> 4 358 #Op::Asm::Ins value_leaf $symbol 359 #Op::Asm::<<< 4 360 #Op::Asm::CBlock \} else \{ 361 #Op::Asm::>>> 4 362 #Op::Asm::Ins value_clear 363 #Op::Asm::<<< 4 364 #Op::Asm::CBlock \} 365 366 #Op::Asm::Ins symbol_save $symbol 367 #Op::Asm::Ins error_nonterminal $symbol 368 369 #Op::Asm::Ins loc_pop_discard 370 371 #Op::Asm::<<< 4 372 #Op::Asm::CBlock \} 373 374 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 375 #Op::Asm::>>> 4 376 #Op::Asm::Ins ast_value_push 377 #Op::Asm::<<< 4 378 #Op::Asm::CBlock \} 379 } 380 leaf/1 { 381 # Generate value for symbol, rhs may have generated 382 # AST nodes as well, discard rhs. 383 384 Op::Asm::CBlock if (rde_param_i_symbol_start_d (@stateref@, $symbol)) return \; 385 Op::Asm::Call $expression 386 Op::Asm::Ins symbol_done_d_leaf $symbol $msg 387 388 #Op::Asm::CBlock if (!rde_param_i_symbol_restore (@stateref@, $symbol)) \{ 389 #Op::Asm::>>> 4 390 391 #Op::Asm::Ins loc_push 392 #Op::Asm::Ins ast_push 393 394 #Op::Asm::Call $expression 395 396 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 397 #Op::Asm::>>> 4 398 #Op::Asm::Ins value_leaf $symbol 399 #Op::Asm::<<< 4 400 #Op::Asm::CBlock \} else \{ 401 #Op::Asm::>>> 4 402 #Op::Asm::Ins value_clear 403 #Op::Asm::<<< 4 404 #Op::Asm::CBlock \} 405 406 #Op::Asm::Ins symbol_save $symbol 407 #Op::Asm::Ins error_nonterminal $symbol 408 409 #Op::Asm::Ins ast_pop_rewind 410 #Op::Asm::Ins loc_pop_discard 411 412 #Op::Asm::<<< 4 413 #Op::Asm::CBlock \} 414 415 #Op::Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 416 #Op::Asm::>>> 4 417 #Op::Asm::Ins ast_value_push 418 #Op::Asm::<<< 4 419 #Op::Asm::CBlock \} 420 } 421 void/1 { 422 # Generate no value for symbol, rhs may have generated 423 # AST nodes as well, discard rhs. 424 # // test case missing // 425 426 Op::Asm::CBlock if (rde_param_i_symbol_void_start_d (@stateref@, $symbol)) return \; 427 Op::Asm::Call $expression 428 Op::Asm::Ins symbol_done_d_void $symbol $msg 429 430 #Op::Asm::CBlock if (!rde_param_i_symbol_restore (@stateref@, $symbol)) \{ 431 #Op::Asm::>>> 4 432 433 #Op::Asm::Ins loc_push 434 #Op::Asm::Ins ast_push 435 436 #Op::Asm::Call $expression 437 438 #Op::Asm::Ins value_clear 439 440 #Op::Asm::Ins symbol_save $symbol 441 #Op::Asm::Ins error_nonterminal $symbol 442 443 #Op::Asm::Ins ast_pop_rewind 444 #Op::Asm::Ins loc_pop_discard 445 446 #Op::Asm::<<< 4 447 #Op::Asm::CBlock \} 448 } 449 void/0 { 450 # Generate no value for symbol, rhs cannot generate 451 # its own AST nodes. Nothing to save nor discard. 452 453 Op::Asm::CBlock if (rde_param_i_symbol_void_start (@stateref@, $symbol)) return \; 454 Op::Asm::Call $expression 455 Op::Asm::Ins symbol_done_void $symbol $msg 456 457 #Op::Asm::CBlock if (!rde_param_i_symbol_restore (@stateref@, $symbol)) \{ 458 #Op::Asm::>>> 4 459 460 #Op::Asm::Ins loc_push 461 462 #Op::Asm::Call $expression 463 464 #Op::Asm::Ins value_clear 465 466 #Op::Asm::Ins symbol_save $symbol 467 #Op::Asm::Ins error_nonterminal $symbol 468 469 #Op::Asm::Ins loc_pop_discard 470 471 #Op::Asm::<<< 4 472 #Op::Asm::CBlock \} 473 } 474 } 475 } $expression 476 Op::Asm::Done 477} 478 479namespace eval ::pt::peg::to::cparam::Op { 480 namespace export \ 481 alpha alnum ascii digit graph lower print \ 482 punct space upper wordchar xdigit ddigit \ 483 dot epsilon t .. n ? * + & ! x / 484} 485 486proc ::pt::peg::to::cparam::Op {modes pe op arguments} { 487 return [namespace eval Op [list $op $modes {*}$arguments]] 488} 489 490proc ::pt::peg::to::cparam::Op::epsilon {modes} { 491 Asm::Start 492 Asm::ReExpression epsilon 493 Asm::Direct { 494 Asm::Ins status_ok 495 } 496 Asm::Done 497} 498 499proc ::pt::peg::to::cparam::Op::dot {modes} { 500 Asm::Start 501 Asm::ReExpression dot 502 Asm::Direct { 503 Asm::Ins input_next [Asm::String dot] 504 } 505 Asm::Done 506} 507 508foreach test { 509 alpha alnum ascii digit graph lower print 510 punct space upper wordchar xdigit ddigit 511} { 512 proc ::pt::peg::to::cparam::Op::$test {modes} \ 513 [string map [list @OP@ $test] { 514 Asm::Start 515 Asm::ReExpression @OP@ 516 Asm::Direct { 517 set m [Asm::String @OP@] 518 #Asm::Ins input_next [Asm::String @OP@] 519 #Asm::CStmt if (!rde_param_query_st(@stateref@)) return 520 #Asm::Ins test_@OP@ 521 Asm::Ins next_@OP@ $m 522 } 523 Asm::Done 524 }] 525} 526 527proc ::pt::peg::to::cparam::Op::t {modes char} { 528 Asm::Start 529 Asm::ReTerminal t $char 530 Asm::Direct { 531 set c [char quote tcl $char] 532 set m [Asm::String "t $c"] 533 534 #Asm::Ins input_next $m 535 #Asm::CStmt if (!rde_param_query_st(@stateref@)) return 536 #Asm::Ins test_char \"$c\" $m 537 Asm::Ins next_char \"$c\" $m 538 } 539 Asm::Done 540} 541 542proc ::pt::peg::to::cparam::Op::.. {modes chstart chend} { 543 Asm::Start 544 Asm::ReTerminal .. $chstart $chend 545 Asm::Direct { 546 set s [char quote tcl $chstart] 547 set e [char quote tcl $chend] 548 set m [Asm::String ".. $s $e"] 549 550 #Asm::Ins input_next $m 551 #Asm::CStmt if (!rde_param_query_st(@stateref@)) return 552 #Asm::Ins test_range \"$s\" \"$e\" $m 553 Asm::Ins next_range \"$s\" \"$e\" $m 554 } 555 Asm::Done 556} 557 558proc ::pt::peg::to::cparam::Op::str {modes args} { 559 Asm::Start 560 Asm::ReTerminal str {*}$args 561 Asm::Direct { 562 set str [join [char quote tcl {*}$args] {}] 563 set m [Asm::String "str '$str'"] 564 565 # Without fusing this would be rendered as a sequence of 566 # characters, with associated stack churn for each 567 # character/part (See Op::x, void/all). 568 569 Asm::Ins next_str \"$str\" $m 570 } 571 Asm::Done 572} 573 574proc ::pt::peg::to::cparam::Op::cl {modes args} { 575 # rorc = Range-OR-Char-List 576 Asm::Start 577 Asm::ReTerminal cl {*}$args 578 Asm::Direct { 579 # Without fusing this would be rendered as a choice of 580 # characters, with associated stack churn for each 581 # character/branch (See Op::/, void/all). 582 583 set cl [join [Ranges {*}$args] {}] 584 set m [Asm::String "cl '$cl'"] 585 586 Asm::Ins next_class \"$cl\" $m 587 } 588 Asm::Done 589} 590 591proc ::pt::peg::to::cparam::Op::Ranges {args} { 592 set res {} 593 foreach rorc $args { lappend res [Range $rorc] } 594 return $res 595} 596 597proc ::pt::peg::to::cparam::Op::Range {rorc} { 598 # See also pt::peg::to::peg 599 600 # We use string ops here to distinguish terminals and ranges. The 601 # input can be a single char, not a list, and further the char may 602 # not be a proper list. Example: double-apostroph. 603 if {[string length $rorc] > 1} { 604 lassign $rorc s e 605 606 # The whole range is expanded into its full set of characters. 607 # Beware, this may blow the process if the range tries to 608 # match a substantial part of the unicode character set. We 609 # should see if there is a way to keep it encoded as range 610 # without giving up on the fast matching. 611 612 set s [scan $s %c] 613 set e [scan $e %c] 614 615 set res {} 616 for {set i $s} {$i <= $e} {incr i} { 617 append res [format %c $i] 618 } 619 return $res 620 } else { 621 return [char quote tcl $rorc] 622 } 623} 624 625proc ::pt::peg::to::cparam::Op::n {modes symbol} { 626 # symbol mode determines AST generation 627 # void => non-generative, 628 # leaf/value => generative. 629 630 Asm::Start 631 Asm::ReTerminal n $symbol 632 633 if {![dict exists $modes $symbol]} { 634 # Incomplete grammar. The symbol has no definition. 635 Asm::Direct { 636 Asm::CStmt "/* Undefined symbol '$symbol' */" 637 Asm::Ins status_fail 638 } 639 } else { 640 Asm::GenAST [list gen [expr { [dict get $modes $symbol] ne "void" }]] 641 Asm::Direct { 642 Asm::Self sym_$symbol 643 } 644 } 645 Asm::Done 646} 647 648proc ::pt::peg::to::cparam::Op::& {modes expression} { 649 # Note: This operation could be inlined, as it has no special 650 # control flow. Not done to make the higher-level ops are 651 # similar in construction and use = consistent and simple. 652 653 Asm::Start 654 Asm::ReExpression & $expression 655 Asm::GenAST $expression 656 657 Asm::Function [Asm::NewBlock ahead] { 658 Asm::Ins loc_push 659 Asm::Call $expression 660 Asm::Ins loc_pop_rewind 661 } $expression 662 Asm::Done 663} 664 665proc ::pt::peg::to::cparam::Op::! {modes expression} { 666 # Note: This operation could be inlined, as it has no special 667 # control flow. Not done to make the higher-level ops are 668 # similar in construction and use = consistent and simple. 669 670 Asm::Start 671 Asm::ReExpression ! $expression 672 if {[dict get $expression gen]} { 673 Asm::Function [Asm::NewBlock notahead] { 674 # The sub-expression may generate AST elements. We must 675 # not pass them through. 676 677 #Asm::Ins loc_push 678 #Asm::Ins ast_push 679 680 Asm::Ins notahead_start_d 681 Asm::Call $expression 682 Asm::Ins notahead_exit_d 683 684 #Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 685 #Asm::>>> 4 686 #Asm::Ins ast_pop_rewind 687 #Asm::<<< 4 688 #Asm::CBlock \} else \{ 689 #Asm::>>> 4 690 #Asm::Ins ast_pop_discard 691 #Asm::<<< 4 692 #Asm::CBlock \} 693 694 #Asm::Ins loc_pop_rewind 695 #Asm::Ins status_negate 696 } $expression 697 } else { 698 Asm::Function [Asm::NewBlock notahead] { 699 # The sub-expression cannot generate AST elements. We can 700 # ignore AS/ARS, simplifying the code. 701 702 Asm::Ins loc_push 703 Asm::Call $expression 704 Asm::Ins notahead_exit 705 706 #Asm::Ins loc_pop_rewind 707 #Asm::Ins status_negate 708 } $expression 709 } 710 Asm::Done 711} 712 713proc ::pt::peg::to::cparam::Op::? {modes expression} { 714 # Note: This operation could be inlined, as it has no special 715 # control flow. Not done to make the higher-level ops are 716 # similar in construction and use => consistent and simple. 717 718 Asm::Start 719 Asm::ReExpression ? $expression 720 Asm::GenAST $expression 721 722 Asm::Function [Asm::NewBlock optional] { 723 #Asm::Ins loc_push 724 #Asm::Ins error_push 725 726 Asm::Ins state_push_2 727 Asm::Call $expression 728 Asm::Ins state_merge_ok 729 730 #Asm::Ins error_pop_merge 731 732 #Asm::CBlock if (rde_param_query_st(@stateref@)) \{ 733 #Asm::>>> 4 734 #Asm::Ins loc_pop_discard 735 #Asm::<<< 4 736 #Asm::CBlock \} else \{ 737 #Asm::>>> 4 738 #Asm::Ins loc_pop_rewind 739 #Asm::<<< 4 740 #Asm::CBlock \} 741 742 #Asm::Ins status_ok 743 } $expression 744 Asm::Done 745} 746 747proc ::pt::peg::to::cparam::Op::* {modes expression} { 748 Asm::Start 749 Asm::ReExpression * $expression 750 Asm::GenAST $expression 751 752 Asm::Function [Asm::NewBlock kleene] { 753 Asm::CBlock while (1) \{ 754 Asm::>>> 4 755 #Asm::Ins loc_push 756 #Asm::Ins error_push 757 758 Asm::Ins state_push_2 759 Asm::Call $expression 760 Asm::CStmt if (rde_param_i_kleene_close(@stateref@)) return 761 762 #Asm::Ins error_pop_merge 763 764 #Asm::CStmt if (!rde_param_query_st(@stateref@)) break 765 #Asm::Ins loc_pop_discard 766 Asm::<<< 4 767 Asm::CBlock \} 768 # FAILED, clean up and return OK. 769 #text::write /line 770 #Asm::Ins loc_pop_rewind 771 #Asm::Ins status_ok 772 } $expression 773 Asm::Done 774} 775 776proc ::pt::peg::to::cparam::Op::+ {modes expression} { 777 Asm::Start 778 Asm::ReExpression + $expression 779 Asm::GenAST $expression 780 781 Asm::Function [Asm::NewBlock poskleene] { 782 Asm::Ins loc_push 783 Asm::Call $expression 784 Asm::CStmt if (rde_param_i_kleene_abort(@stateref@)) return 785 786 #Asm::CStmt if (!rde_param_query_st(@stateref@)) goto error 787 #Asm::Ins loc_pop_discard 788 #text::write /line 789 790 Asm::CBlock while (1) \{ 791 Asm::>>> 4 792 #Asm::Ins loc_push 793 #Asm::Ins error_push 794 795 Asm::Ins state_push_2 796 Asm::Call $expression 797 Asm::CStmt if (rde_param_i_kleene_close(@stateref@)) return 798 799 #Asm::Ins error_pop_merge 800 801 #Asm::CStmt if (!rde_param_query_st(@stateref@)) break 802 #Asm::Ins loc_pop_discard 803 Asm::<<< 4 804 Asm::CBlock \} 805 # FAILED, clean up and return OK. 806 #text::write /line 807 #Asm::Ins status_ok 808 #Asm::CLabel error 809 #Asm::Ins loc_pop_rewind 810 } $expression 811 Asm::Done 812} 813 814proc ::pt::peg::to::cparam::Op::x {modes args} { 815 if {[llength $args] == 1} { 816 return [lindex $args 0] 817 } 818 819 Asm::Start 820 Asm::ReExpression x {*}$args 821 set gens [Asm::GenAST {*}$args] 822 823 # We have three possibilities regarding AST node generation, each 824 # requiring a slightly different instruction sequence. 825 826 # i. gen == 0 <=> No node generation at all. 827 # ii. gens[0] == 1 <=> We may have nodes from the beginning. 828 # iii. <=> Node generation starts in the middle. 829 830 if {![dict get $result gen]} { 831 set mode none 832 } elseif {[lindex $gens 0]} { 833 set mode all 834 } else { 835 set mode some 836 } 837 838 Asm::Function [Asm::NewBlock sequence] { 839 switch -exact -- $mode { 840 none { 841 # (Ad i) No AST node generation at all. 842 843 Asm::xinit0 844 845 # Note: This loop runs at code generation time. At 846 # runtime the entire construction is essentially a 847 # fully unrolled loop, with each iteration having its 848 # own block of instructions. 849 850 foreach expression [lrange $args 0 end-1] { 851 Asm::Call $expression 852 Asm::xinter00 853 } 854 Asm::Call [lindex $args end] 855 Asm::xexit0 856 } 857 all { 858 # (Ad ii) AST node generation from start to end. 859 860 Asm::xinit1 861 862 # Note: This loop runs at code generation time. At 863 # runtime the entire construction is essentially a 864 # fully unrolled loop, with each iteration having its 865 # own block of instructions. 866 867 foreach expression [lrange $args 0 end-1] { 868 Asm::Call $expression 869 Asm::xinter11 870 } 871 Asm::Call [lindex $args end] 872 Asm::xexit1 873 } 874 some { 875 # (Ad iii). Start without AST nodes, later parts do 876 # AST nodes. 877 878 Asm::xinit0 879 880 # Note: This loop runs at code generation time. At 881 # runtime the entire construction is essentially a 882 # fully unrolled loop, with each iteration having its 883 # own block of instructions. 884 885 set pushed 0 886 foreach expression [lrange $args 0 end-1] xgen [lrange $gens 1 end] { 887 Asm::Call $expression 888 if {!$pushed && $xgen} { 889 Asm::xinter01 890 set pushed 1 891 continue 892 } 893 if {$pushed} { 894 #Asm::xinter11 error_pushed 895 Asm::xinter11 896 } else { 897 Asm::xinter00 898 } 899 } 900 Asm::Call [lindex $args end] 901 #Asm::xexit1a 902 Asm::xexit1 903 } 904 } 905 } {*}$args 906 Asm::Done 907} 908 909proc ::pt::peg::to::cparam::Op::/ {modes args} { 910 if {[llength $args] == 1} { 911 return [lindex $args 0] 912 } 913 914 Asm::Start 915 Asm::ReExpression / {*}$args 916 set gens [Asm::GenAST {*}$args] 917 918 # Optimized AST handling: Handle each branch separately, based on 919 # its ability to generate AST nodes. 920 921 Asm::Function [Asm::NewBlock choice] { 922 set hasxgen 0 923 set hasnoxgen 0 924 if {[tcl::mathfunc::max {*}$gens]} { set hasxgen 1 } 925 if {![tcl::mathfunc::min {*}$gens]} { set hasnoxgen 1 } 926 927 set xgen [lindex $gens 0] 928 Asm::/init$xgen 929 930 # Note: This loop runs at code generation time. At runtime the 931 # entire construction is essentially a fully unrolled loop, 932 # with each iteration having its own block of instructions. 933 934 foreach expression [lrange $args 0 end-1] nxgen [lrange $gens 1 end] { 935 Asm::Call $expression 936 Asm::/inter$xgen$nxgen 937 set xgen $nxgen 938 } 939 940 Asm::Call [lindex $args end] 941 Asm::/exit$nxgen;#[expr {$nxgen ? $hasnoxgen : $hasxgen }] 942 943 } {*}$args 944 Asm::Done 945} 946 947# ### ### ### ######### ######### ######### 948## Assembler commands 949 950namespace eval ::pt::peg::to::cparam::Op::Asm {} 951 952# ### ### ### ######### ######### ######### 953## The various part of a sequence compilation. 954proc ::pt::peg::to::cparam::Op::Asm::xinit0 {} { 955 #Ins loc_push 956 #Ins error_clear 957 #text::write /line 958 #Ins error_push 959 960 Ins state_push_void 961 return 962} 963 964proc ::pt::peg::to::cparam::Op::Asm::xinit1 {} { 965 #Ins ast_push 966 #Ins loc_push 967 #Ins error_clear 968 #text::write /line 969 #Ins error_push 970 971 Ins state_push_value 972 return 973} 974 975proc ::pt::peg::to::cparam::Op::Asm::xinter00 {} { 976 #Ins error_pop_merge 977 # Stop the sequence on element failure, and 978 # restore state to before we tried the sequence. 979 #CStmt if (!rde_param_query_st(@stateref@)) goto error 980 #Ins error_push 981 982 CStmt if (rde_param_i_seq_void2void(@stateref@)) return 983 return 984} 985 986proc ::pt::peg::to::cparam::Op::Asm::xinter01 {} { 987 #Ins error_pop_merge 988 # Stop the sequence on element failure, and 989 # restore state to before we tried the sequence. 990 #CStmt if (!rde_param_query_st(@stateref@)) goto error 991 #Ins ast_push 992 #Ins error_push 993 994 CStmt if (rde_param_i_seq_void2value(@stateref@)) return 995 return 996} 997 998proc ::pt::peg::to::cparam::Op::Asm::xinter11 {{label error}} { 999 #Ins error_pop_merge 1000 # Stop the sequence on element failure, and 1001 # restore state to before we tried the sequence. 1002 #CStmt if (!rde_param_query_st(@stateref@)) goto $label 1003 #Ins error_push 1004 1005 CStmt if (rde_param_i_seq_value2value(@stateref@)) return 1006 return 1007} 1008 1009proc ::pt::peg::to::cparam::Op::Asm::xexit0 {} { 1010 #Ins error_pop_merge 1011 1012 # Stop the sequence on element failure, and 1013 # restore state to before we tried the sequence. 1014 1015 #CStmt if (!rde_param_query_st(@stateref@)) goto error 1016 1017 # All elements OK, squash backtracking state 1018 #text::write /line 1019 #Ins loc_pop_discard 1020 #CStmt return 1021 1022 #CLabel error 1023 #Ins loc_pop_rewind 1024 1025 Ins state_merge_void 1026 return 1027} 1028 1029proc ::pt::peg::to::cparam::Op::Asm::xexit1 {} { 1030 #Ins error_pop_merge 1031 1032 # Stop the sequence on element failure, and 1033 # restore state to before we tried the sequence. 1034 1035 #CStmt if (!rde_param_query_st(@stateref@)) goto error 1036 1037 # All elements OK, squash backtracking state 1038 #text::write /line 1039 #Ins ast_pop_discard 1040 #Ins loc_pop_discard 1041 #CStmt return 1042 1043 #CLabel error 1044 #Ins ast_pop_rewind 1045 #Ins loc_pop_rewind 1046 1047 Ins state_merge_value 1048 return 1049} 1050 1051proc ::pt::peg::to::cparam::Op::Asm::xexit1a {} { error deprecated/illegal-to-call 1052 Ins error_pop_merge 1053 1054 # Stop the sequence on element failure, and 1055 # restore state to before we tried the sequence. 1056 1057 CStmt if (!rde_param_query_st(@stateref@)) goto error_pushed 1058 1059 # All elements OK, squash backtracking state 1060 text::write /line 1061 Ins ast_pop_discard 1062 Ins loc_pop_discard 1063 CStmt return 1064 1065 CLabel error_pushed 1066 Ins ast_pop_rewind 1067 CLabel error 1068 Ins loc_pop_rewind 1069 return 1070} 1071 1072# ### ### ### ######### ######### ######### 1073## The various part of a choice compilation. 1074 1075proc ::pt::peg::to::cparam::Op::Asm::/init0 {} { 1076 #Ins error_clear 1077 #text::write /line 1078 #Ins loc_push 1079 #Ins error_push 1080 1081 Ins state_push_void 1082 return 1083} 1084 1085proc ::pt::peg::to::cparam::Op::Asm::/init1 {} { 1086 #Ins error_clear 1087 #text::write /line 1088 #Ins ast_push 1089 #Ins loc_push 1090 #Ins error_push 1091 1092 Ins state_push_value 1093 return 1094} 1095 1096proc ::pt::peg::to::cparam::Op::Asm::/inter00 {} { 1097 #Ins error_pop_merge 1098 #CStmt if (rde_param_query_st(@stateref@)) goto ok 1099 #Ins loc_pop_rewind 1100 #Ins loc_push 1101 #Ins error_push 1102 1103 CStmt if (rde_param_i_bra_void2void(@stateref@)) return 1104 return 1105} 1106 1107proc ::pt::peg::to::cparam::Op::Asm::/inter01 {} { 1108 #Ins error_pop_merge 1109 #CStmt if (rde_param_query_st(@stateref@)) goto ok 1110 #Ins loc_pop_rewind 1111 #Ins ast_push 1112 #Ins loc_push 1113 #Ins error_push 1114 1115 CStmt if (rde_param_i_bra_void2value(@stateref@)) return 1116 return 1117} 1118 1119proc ::pt::peg::to::cparam::Op::Asm::/inter10 {} { 1120 #Ins error_pop_merge 1121 #CStmt if (rde_param_query_st(@stateref@)) goto ok_xgen 1122 #Ins ast_pop_rewind 1123 #Ins loc_pop_rewind 1124 #Ins ast_push ??-wrong 1125 #Ins loc_push 1126 #Ins error_push 1127 1128 CStmt if (rde_param_i_bra_value2void(@stateref@)) return 1129 return 1130} 1131 1132proc ::pt::peg::to::cparam::Op::Asm::/inter11 {} { 1133 #Ins error_pop_merge 1134 #CStmt if (rde_param_query_st(@stateref@)) goto ok_xgen 1135 #Ins ast_pop_rewind 1136 #Ins loc_pop_rewind 1137 #Ins ast_push 1138 #Ins loc_push 1139 #Ins error_push 1140 1141 CStmt if (rde_param_i_bra_value2value(@stateref@)) return 1142 return 1143} 1144 1145proc ::pt::peg::to::cparam::Op::Asm::/exit0 {} { 1146 Ins state_merge_void 1147} 1148 1149proc ::pt::peg::to::cparam::Op::Asm::/exit1 {} { 1150 Ins state_merge_value 1151} 1152 1153proc ::pt::peg::to::cparam::Op::Asm::/exit00 {} { error deprecated 1154 Ins error_pop_merge 1155 1156 CStmt if (rde_param_query_st(@stateref@)) goto ok 1157 1158 Ins loc_pop_rewind 1159 1160 # All branches FAILED 1161 text::write /line 1162 Ins status_fail 1163 CStmt return 1164 1165 CLabel ok 1166 Ins loc_pop_discard 1167 return 1168} 1169 1170proc ::pt::peg::to::cparam::Op::Asm::/exit01 {} { error deprecated 1171 Ins error_pop_merge 1172 1173 CStmt if (rde_param_query_st(@stateref@)) goto ok 1174 1175 Ins loc_pop_rewind 1176 1177 # All branches FAILED 1178 text::write /line 1179 Ins status_fail 1180 CStmt return 1181 1182 CLabel ok_xgen 1183 Ins ast_pop_discard 1184 CLabel ok 1185 Ins loc_pop_discard 1186 return 1187} 1188 1189proc ::pt::peg::to::cparam::Op::Asm::/exit10 {} { error deprecated 1190 Ins error_pop_merge 1191 1192 CStmt if (rde_param_query_st(@stateref@)) goto ok_xgen 1193 Ins ast_pop_rewind 1194 1195 Ins loc_pop_rewind 1196 1197 # All branches FAILED 1198 text::write /line 1199 Ins status_fail 1200 CStmt return 1201 1202 CLabel ok_xgen 1203 Ins ast_pop_discard 1204 1205 Ins loc_pop_discard 1206 return 1207} 1208 1209proc ::pt::peg::to::cparam::Op::Asm::/exit11 {} { error deprecated 1210 Ins error_pop_merge 1211 1212 CStmt if (rde_param_query_st(@stateref@)) goto ok_xgen 1213 Ins ast_pop_rewind 1214 1215 Ins loc_pop_rewind 1216 1217 # All branches FAILED 1218 text::write /line 1219 Ins status_fail 1220 CStmt return 1221 1222 CLabel ok_xgen 1223 Ins ast_pop_discard 1224 1225 CLabel ok 1226 Ins loc_pop_discard 1227 return 1228} 1229 1230# ### ### ### ######### ######### ######### 1231## Allocate a text block / internal symbol / function 1232 1233proc ::pt::peg::to::cparam::Op::Asm::Start {} { 1234 upvar 1 result result 1235 set result {def {} use {} gen 0 pe {}} 1236 return 1237} 1238 1239proc ::pt::peg::to::cparam::Op::Asm::Done {} { 1240 upvar 1 result result 1241 return -code return $result 1242 return 1243} 1244 1245proc ::pt::peg::to::cparam::Op::Asm::ReExpression {op args} { 1246 upvar 1 result result 1247 1248 set pe $op 1249 foreach a $args { 1250 lappend pe [dict get $a pe] 1251 } 1252 1253 dict set result pe $pe 1254 PE $pe 1255 return 1256} 1257 1258proc ::pt::peg::to::cparam::Op::Asm::ReTerminal {op args} { 1259 upvar 1 result result 1260 1261 set pe [linsert $args 0 $op] 1262 dict set result pe $pe 1263 PE $pe 1264 return 1265} 1266 1267proc ::pt::peg::to::cparam::Op::Asm::GenAST {args} { 1268 upvar 1 result result 1269 1270 foreach a $args { 1271 lappend flags [dict get $a gen] 1272 } 1273 1274 dict set result gen [tcl::mathfunc::max {*}$flags] 1275 dict set result genmin [tcl::mathfunc::min {*}$flags] 1276 return $flags 1277} 1278 1279proc ::pt::peg::to::cparam::Op::Asm::NewBlock {type} { 1280 variable counter 1281 variable lastid ${type}_[incr counter] 1282 return $lastid 1283} 1284 1285proc ::pt::peg::to::cparam::Op::Asm::Function {name def args} { 1286 upvar 1 result result 1287 variable cache 1288 variable field 1289 1290 set k [list [dict get $result gen] [dict get $result pe]] 1291 1292 # Hardcoded 'compact == 1', compare "pt_peg_to_param.tcl" 1293 if {[info exists cache($k)]} { 1294 dict set result def {} 1295 dict set result use $cache($k) 1296 return 1297 } 1298 1299 text::write clear 1300 if {[text::write exists FUN_HEADER]} { 1301 text::write recall FUN_HEADER 1302 text::write undef FUN_HEADER 1303 } 1304 1305 FunStart $name 1306 1307 text::write recall PE ; # Generated in Asm::ReExpression, printed 1308 text::write undef PE ; # representation of the expression, to 1309 # make the generated code more readable. 1310 uplevel 1 $def 1311 CStmt return 1312 1313 FunClose 1314 1315 if {[llength $args]} { 1316 Use {*}$args 1317 } 1318 1319 text::write store $name 1320 1321 set useb [NewBlock anon] 1322 text::write clear 1323 Self $name 1324 text::write store $useb 1325 1326 dict set result def $name 1327 dict set result use $useb 1328 1329 set cache($k) $useb 1330 return 1331} 1332 1333proc ::pt::peg::to::cparam::Op::Asm::Direct {use} { 1334 variable field 1335 upvar 1 result result 1336 1337 set useb [NewBlock anon] 1338 text::write clear 1339 1340 set saved $field 1341 set field 0 1342 1343 uplevel 1 $use 1344 1345 text::write store $useb 1346 1347 set field $saved 1348 1349 dict set result def {} 1350 dict set result use $useb 1351 return 1352} 1353 1354proc ::pt::peg::to::cparam::Op::Asm::Call {expr {distance 1}} { 1355 variable field 1356 #if {$distance} { text::write /line } 1357 1358 set id [dict get $expr use] 1359 1360 text::write store CURRENT 1361 text::write clear 1362 text::write recall $id 1363 text::write indent $field 1364 text::write store CALL 1365 1366 text::write clear 1367 text::write recall CURRENT 1368 text::write recall CALL 1369 1370 text::write undef CURRENT 1371 text::write undef CALL 1372 1373 #if {$distance} { text::write /line } 1374 return 1375} 1376 1377proc ::pt::peg::to::cparam::Op::Asm::Use {args} { 1378 foreach item $args { 1379 set def [dict get $item def] 1380 if {$def eq {}} continue 1381 text::write recall $def 1382 text::write undef $def 1383 } 1384 return 1385} 1386 1387proc ::pt::peg::to::cparam::Op::Asm::FunStart {name} { 1388 text::write /line 1389 text::write field @def@ void @ns@$name (@statedecl@) \{ @prelude@ 1390 text::write /line 1391 text::write store CURRENT 1392 1393 text::write clear 1394 text::write recall FORWARD 1395 text::write field @def@ void @ns@$name (@statedecl@)\; 1396 text::write /line 1397 text::write store FORWARD 1398 1399 text::write clear 1400 text::write recall CURRENT 1401 return 1402} 1403 1404proc ::pt::peg::to::cparam::Op::Asm::FunClose {} { 1405 text::write field \} 1406 text::write /line 1407 return 1408} 1409 1410proc ::pt::peg::to::cparam::Op::Asm::Ins {args} { 1411 set args [lassign $args name] 1412 CStmt rde_param_i_$name ([join [linsert $args 0 @stateref@] {, }]) 1413 return 1414} 1415 1416proc ::pt::peg::to::cparam::Op::Asm::Self {args} { 1417 variable field 1418 set args [lassign $args name] 1419 set saved $field 1420 set field 0 1421 CStmt @self@ @ns@$name ([join [linsert $args 0 @stateref@] {, }]) 1422 set field $saved 1423 return 1424} 1425 1426proc ::pt::peg::to::cparam::Op::Asm::>>> {n} { 1427 variable field 1428 incr field $n 1429 return 1430} 1431 1432proc ::pt::peg::to::cparam::Op::Asm::<<< {n} { 1433 variable field 1434 incr field -$n 1435 return 1436} 1437 1438proc ::pt::peg::to::cparam::Op::Asm::CLabel {name} { 1439 text::write /line 1440 <<< 2 1441 CBlock ${name}: 1442 >>> 2 1443 return 1444} 1445 1446proc ::pt::peg::to::cparam::Op::Asm::CStmt {args} { 1447 variable field 1448 1449 # Note: The lreplace/lindex dance appends a ; to the last element 1450 # in the list, closing the statement. 1451 1452 text::write fieldl $field {} 1453 text::write field {*}[lreplace $args end end [lindex $args end]\;] 1454 text::write /line 1455 return 1456} 1457 1458proc ::pt::peg::to::cparam::Op::Asm::CBlock {args} { 1459 variable field 1460 text::write fieldl $field {} 1461 text::write field {*}$args 1462 text::write /line 1463 return 1464} 1465 1466proc ::pt::peg::to::cparam::Op::Asm::Header {text} { 1467 text::write field "/*" 1468 text::write /line 1469 text::write field " * $text" 1470 text::write /line 1471 text::write field " */" 1472 text::write /line 1473 #text::write /line 1474 return 1475} 1476 1477proc ::pt::peg::to::cparam::Op::Asm::PE {pe} { 1478 text::write clear 1479 text::write field " /*" 1480 text::write /line 1481 foreach l [split [pt::pe print $pe] \n] { 1482 text::write field " * $l" 1483 text::write /line 1484 } 1485 text::write field " */" 1486 text::write /line 1487 text::write /line 1488 text::write store PE 1489 return 1490} 1491 1492proc ::pt::peg::to::cparam::Op::Asm::String {s} { 1493 variable cache 1494 1495 set k str,$s 1496 1497 if {![info exists cache($k)]} { 1498 set id [incr cache(_str,counter)] 1499 set cache($k) $id 1500 1501 lappend cache(_strings) \ 1502 "/* [format %8d $id] = */ \"$s\"" 1503 } 1504 1505 return $cache($k) 1506} 1507 1508proc ::pt::peg::to::cparam::Op::Asm::Setup {} { 1509 variable counter 0 1510 variable field 3 1511 variable cache 1512 array unset cache * 1513 set cache(_str,counter) -1 1514 set cache(_strings) {} 1515 return 1516} 1517 1518# ### ### ### ######### ######### ######### 1519## Configuration 1520 1521namespace eval ::pt::peg::to::cparam { 1522 namespace eval ::pt::peg::to::cparam::Op::Asm { 1523 variable counter 0 1524 variable fieldlen {17 5 5} 1525 variable field 3 1526 variable cache 1527 array set cache {} 1528 set cache(_str,counter) -1 1529 set cache(_strings) {} 1530 } 1531 1532 variable omap ; array set omap { 1533 file file 1534 fun-qualifier def 1535 indent indent 1536 main main 1537 name name 1538 namespace ns 1539 prelude prelude 1540 self-command self 1541 state-decl statedecl 1542 state-ref stateref 1543 string-varname strings 1544 template template 1545 user user 1546 } 1547 1548 variable self {} 1549 variable ns {} 1550 variable def static 1551 variable main __main 1552 variable indent 0 1553 variable prelude {} 1554 variable statedecl {RDE_PARAM p} 1555 variable stateref p 1556 variable strings p_string 1557 1558 variable template @code@ ; # A string. Specifies how to 1559 # embed the generated code into a 1560 # larger frame- work (the 1561 # template). 1562 variable name a_pe_grammar ; # String. Name of the grammar. 1563 variable file unknown ; # String. Name of the file or 1564 # other entity the grammar came 1565 # from. 1566 variable user unknown ; # String. Name of the user on 1567 # which behalf the conversion has 1568 # been invoked. 1569} 1570 1571# ### ### ### ######### ######### ######### 1572## Ready 1573 1574package provide pt::peg::to::cparam 1.0.1 1575return 1576