1# -*- tcl -*- 2# 3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Parser Generator / Backend - PEG as ... PEG 5 6# ### ### ### ######### ######### ######### 7## Dumping the input grammar. But not as Tcl or other code. In PEG 8## format again, pretty printing. 9 10# ### ### ### ######### ######### ######### 11## Requisites 12 13package require textutil 14 15namespace eval ::page::gen::peg::canon {} 16 17# ### ### ### ######### ######### ######### 18## API 19 20proc ::page::gen::peg::canon {t chan} { 21 22 # Generate data for inherited attributes 23 # used during synthesis. 24 canon::Setup $t 25 26 # Synthesize all text fragments we need. 27 canon::Synth $t 28 29 # And write the grammar text. 30 puts $chan [$t get root TEXT] 31 return 32} 33 34# ### ### ### ######### ######### ######### 35## Internal. Helpers 36 37proc ::page::gen::peg::canon::Setup {t} { 38 # Phase 1: Top-down, inherited attributes: 39 # 40 # - Max length of nonterminal symbols defined by the grammar. 41 # 42 # - Indentation put on all rules to get enough space for 43 # definition attributes. 44 45 set max -1 46 array set modes {} 47 48 foreach {sym def} [$t get root definitions] { 49 set l [string length $sym] 50 if {$l > $max} {set max $l} 51 52 set mode [string index [$t get $def mode] 0] 53 set modes($mode) . 54 } 55 set modeset [join [lsort [array names modes]] ""] 56 set mlen [AttrFieldLength $modeset] 57 set heading [expr {$max + $mlen + 4}] 58 # The constant 4 is for ' <- ', see 59 # SynthNode/Nonterminal 60 61 # Save the computed information for access by the definitions and 62 # other operators. 63 64 $t set root SYM_FIELDLEN $max 65 $t set root ATT_FIELDLEN $mlen 66 $t set root ATT_BASE $modeset 67 $t set root HEADLEN $heading 68 return 69} 70 71proc ::page::gen::peg::canon::Synth {t} { 72 # Phase 2: Bottom-up, synthesized attributes 73 # 74 # - Text block per node, length and height. 75 76 $t walk root -order post -type dfs n { 77 SynthNode $t $n 78 } 79 return 80} 81 82proc ::page::gen::peg::canon::SynthNode {t n} { 83 if {$n eq "root"} { 84 set code Root 85 } elseif {[$t keyexists $n symbol]} { 86 set code Nonterminal 87 } elseif {[$t keyexists $n op]} { 88 set code [$t get $n op] 89 } else { 90 return -code error "PANIC. Bad node $n, cannot classify" 91 } 92 93 #puts stderr "SynthNode/$code $t $n" 94 95 SynthNode/$code $t $n 96 97 #SHOW [$t get $n TEXT] 1 0 98 #catch {puts stderr "\t.[$t get $n W]x[$t get $n H]"} 99 return 100} 101 102proc ::page::gen::peg::canon::SynthNode/Root {t n} { 103 # Root is the grammar itself. 104 105 # Get the data we need from our children, which are start 106 # expression and nonterminal definitions. 107 108 set gname [$t get root name] 109 set gstart [$t get root start] 110 if {$gstart ne ""} { 111 set stext [$t get $gstart TEXT] 112 } else { 113 puts stderr "No start expression." 114 set stext "" 115 } 116 set rules {} 117 foreach {sym def} [$t get root definitions] { 118 lappend rules [list $sym [$t get $def TEXT]] 119 } 120 121 # Combine them into a text for the whole grammar. 122 123 set intro "PEG $gname \(" 124 set ispace [::textutil::blank [string length $intro]] 125 126 set out "" 127 append out "# -*- text -*-" \n 128 append out "## Parsing Expression Grammar '$gname'." \n 129 append out "## Layouted by the PG backend 'PEGwriter'." \n 130 append out \n 131 append out $intro[::textutil::indent $stext $ispace 1]\) 132 append out \n 133 append out \n 134 135 foreach e [lsort -dict -index 0 $rules] { 136 foreach {sym text} $e break 137 append out $text \n 138 append out \n 139 } 140 141 append out "END\;" \n 142 143 $t set root TEXT $out 144 return 145} 146 147proc ::page::gen::peg::canon::SynthNode/Nonterminal {t n} { 148 # This is the root of a definition. We now 149 # have to combine the text block for the 150 # expression with nonterminal and attribute 151 # data. 152 153 variable ms 154 155 set abase [$t get root ATT_BASE] 156 set sfl [$t get root SYM_FIELDLEN] 157 set mode [$t get $n mode] 158 set sym [$t get $n symbol] 159 set etext [$t get [lindex [$t children $n] 0] TEXT] 160 161 set out "" 162 append out $ms($abase,$mode) 163 append out $sym 164 append out [::textutil::blank [expr {$sfl - [string length $sym]}]] 165 append out " <- " 166 167 set ispace [::textutil::blank [string length $out]] 168 169 append out [::textutil::indent $etext $ispace 1] 170 append out " ;" 171 172 $t set $n TEXT $out 173 return 174} 175 176proc ::page::gen::peg::canon::SynthNode/t {t n} { 177 # Terminal node. Primitive layout. 178 # Put the char into single or double quotes. 179 180 set ch [$t get $n char] 181 if {$ch eq "'"} {set q "\""} else {set q '} 182 183 set text $q$ch$q 184 185 SetBlock $t $n $text 186 return 187} 188 189proc ::page::gen::peg::canon::SynthNode/n {t n} { 190 # Nonterminal node. Primitive layout. Text is the name of smybol 191 # itself. 192 193 SetBlock $t $n [$t get $n sym] 194 return 195} 196 197proc ::page::gen::peg::canon::SynthNode/.. {t n} { 198 # Range is [x-y] 199 set b [$t get $n begin] 200 set e [$t get $n end] 201 SetBlock $t $n "\[${b}-${e}\]" 202 return 203} 204 205proc ::page::gen::peg::canon::SynthNode/alnum {t n} {SetBlock $t $n <alnum>} 206proc ::page::gen::peg::canon::SynthNode/alpha {t n} {SetBlock $t $n <alpha>} 207proc ::page::gen::peg::canon::SynthNode/dot {t n} {SetBlock $t $n .} 208proc ::page::gen::peg::canon::SynthNode/epsilon {t n} {SetBlock $t $n ""} 209 210proc ::page::gen::peg::canon::SynthNode/? {t n} {SynthSuffix $t $n ?} 211proc ::page::gen::peg::canon::SynthNode/* {t n} {SynthSuffix $t $n *} 212proc ::page::gen::peg::canon::SynthNode/+ {t n} {SynthSuffix $t $n +} 213 214proc ::page::gen::peg::canon::SynthNode/! {t n} {SynthPrefix $t $n !} 215proc ::page::gen::peg::canon::SynthNode/& {t n} {SynthPrefix $t $n &} 216 217proc ::page::gen::peg::canon::SynthSuffix {t n op} { 218 219 set sub [lindex [$t children $n] 0] 220 set sop [$t get $sub op] 221 set etext [$t get $sub TEXT] 222 223 WrapParens $op $sop etext 224 SetBlock $t $n $etext$op 225 return 226} 227 228proc ::page::gen::peg::canon::SynthPrefix {t n op} { 229 230 set sub [lindex [$t children $n] 0] 231 set sop [$t get $sub op] 232 set etext [$t get $sub TEXT] 233 234 WrapParens $op $sop etext 235 SetBlock $t $n $op$etext 236 return 237} 238 239proc ::page::gen::peg::canon::SynthNode/x {t n} { 240 variable llen 241 242 # Space given to us for an expression. 243 set lend [expr {$llen - [$t get root HEADLEN]}] 244 245 set clist [$t children $n] 246 if {[llength $clist] == 1} { 247 # Implicit cutting out of chains. 248 249 CopyBlock $t $n [lindex $clist 0] 250 251 #puts stderr <<implicit>> 252 return 253 } 254 255 set out "" 256 257 # We are not tracking the total width of the block, but only the 258 # width of the current line, as that is where we may have to 259 # wrap. The height however is the total height. 260 261 #puts stderr <<$clist>> 262 #puts stderr \t___________________________________ 263 264 set w 0 265 set h 0 266 foreach c $clist { 267 set sop [$t get $c op] 268 set sub [$t get $c TEXT] 269 set sw [$t get $c W] 270 set slw [$t get $c Wlast] 271 set sh [$t get $c H] 272 273 #puts stderr \t<$sop/$sw/$slw/$sh>___________________________________ 274 #SHOW $sub $slw $sh 275 276 if {[Paren x $sop]} { 277 set sub "([::textutil::indent $sub " " 1])" 278 incr slw 2 279 incr sw 2 280 281 #puts stderr /paren/ 282 #SHOW $sub $slw $sh 283 } 284 285 # Empty buffer ... Put element, and extend dimensions 286 287 #puts stderr \t.============================= 288 #SHOW $out $w $h 289 290 if {$w == 0} { 291 #puts stderr /init 292 append out $sub 293 set w $slw 294 set h $sh 295 } elseif {($w + $sw + 1) > $lend} { 296 #puts stderr /wrap/[expr {($w + $sw + 1)}]/$lend 297 # To large, wrap into next line. 298 append out \n $sub 299 incr h $sh 300 set w $slw 301 } else { 302 # We have still space to put the block in. Either by 303 # simply appending, or by indenting a multiline block 304 # properly so that its parts stay aligned with each other. 305 if {$sh == 1} { 306 #puts stderr /add/line 307 append out " " $sub 308 incr w ; incr w $slw 309 } else { 310 append out " " ; incr w 311 #puts stderr /add/block/$w 312 append out [::textutil::indent $sub [::textutil::blank $w] 1] 313 incr w $slw 314 incr h $sh ; incr h -1 315 } 316 } 317 318 #puts stderr \t.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 319 #SHOW $out $w $h 320 } 321 322 SetBlock $t $n $out 323 return 324} 325 326proc ::page::gen::peg::canon::SynthNode// {t n} { 327 # We take all branches and put them together, nicely aligned under 328 # each other. 329 330 set clist [$t children $n] 331 if {[llength $clist] == 1} { 332 # Implicit cutting out of chains. 333 334 CopyBlock $t $n [lindex $clist 0] 335 return 336 } 337 338 set out "" 339 foreach c $clist { 340 set sop [$t get $c op] 341 set sub [$t get $c TEXT] 342 WrapParens / $sop sub 343 append out "/ [::textutil::indent $sub " " 1]" \n 344 } 345 346 SetBlock $t $n " [string range $out 1 end]" 347 return 348} 349 350proc ::page::gen::peg::canon::WrapParens {op sop tvar} { 351 if {[Paren $op $sop]} { 352 upvar 1 $tvar text 353 set text "([::textutil::indent $text " " 1])" 354 } 355} 356 357proc ::page::gen::peg::canon::Paren {op sop} { 358 # sop is nested under op. 359 # Parens are required if sop has a lower priority than op. 360 361 return [expr {[Priority $sop] < [Priority $op]}] 362} 363 364proc ::page::gen::peg::canon::Priority {op} { 365 switch -exact -- $op { 366 t - 367 n - 368 .. - 369 alnum - 370 alpha - 371 dot - 372 epsilon {return 4} 373 ? - 374 * - 375 + {return 3} 376 ! - 377 & {return 2} 378 x {return 1} 379 / {return 0} 380 } 381 return -code error "Internal error, bad operator \"$op\"" 382} 383 384proc ::page::gen::peg::canon::CopyBlock {t n src} { 385 $t set $n TEXT [$t get $src TEXT] 386 $t set $n W [$t get $src W] 387 $t set $n Wlast [$t get $src Wlast] 388 $t set $n H [$t get $src H] 389 return 390} 391 392proc ::page::gen::peg::canon::SetBlock {t n text} { 393 set text [string trimright $text] 394 set lines [split $text \n] 395 set height [llength $lines] 396 397 if {$height > 1} { 398 set max -1 399 set ntext {} 400 401 foreach line $lines { 402 set line [string trimright $line] 403 set l [string length $line] 404 if {$l > $max} {set max $l} 405 lappend ntext $line 406 set wlast $l 407 } 408 set text [join $ntext \n] 409 set width $max 410 } else { 411 set width [string length $text] 412 set wlast $width 413 } 414 415 $t set $n TEXT $text 416 $t set $n W $width 417 $t set $n Wlast $wlast 418 $t set $n H $height 419 return 420} 421 422proc ::page::gen::peg::canon::AttrFieldLength {modeset} { 423 variable ms 424 return $ms($modeset,*) 425} 426 427if {0} { 428 proc ::page::gen::peg::canon::SHOW {text w h} { 429 set wl $w ; incr wl -1 430 puts stderr "\t/$h" 431 puts stderr "[textutil::indent $text \t|]" 432 puts stderr "\t\\[string repeat "-" $wl]^ ($w)" 433 return 434 } 435} 436 437# ### ### ### ######### ######### ######### 438## Internal. Strings. 439 440namespace eval ::page::gen::peg::canon { 441 variable llen 80 442 variable ms ; array set ms { 443 dlmv,discard {void: } 444 dlmv,leaf {leaf: } 445 dlmv,match {match: } 446 dlmv,value { } 447 dlmv,* 7 448 449 dlm,discard {void: } dlv,discard {void: } 450 dlm,leaf {leaf: } dlv,leaf {leaf: } 451 dlm,match {match: } dlv,value { } 452 dlm,* 7 dlv,* 6 453 454 dmv,discard {void: } lmv,leaf {leaf: } 455 dmv,match {match: } lmv,match {match: } 456 dmv,value { } lmv,value { } 457 dmv,* 7 lmv,* 7 458 459 dl,discard {void: } dm,discard {void: } 460 dl,leaf {leaf: } dm,match {match: } 461 dl,* 6 dm,* 7 462 463 lm,leaf {leaf: } dv,discard {void: } 464 lm,match {match: } dv,value { } 465 lm,* 7 dv,* 6 466 467 lv,leaf {leaf: } mv,match {match: } 468 lv,value { } mv,value { } 469 lv,* 6 mv,* 7 470 471 d,discard {void: } d,* 6 472 l,leaf {leaf: } l,* 6 473 m,match {match: } m,* 7 474 v,value {} v,* 0 475 } 476} 477 478# ### ### ### ######### ######### ######### 479## Ready 480 481package provide page::gen::peg::canon 0.1 482