1# -*- tcl -*- 2# 3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Parser Generator / Transformation - Normalize PEG AST for later. 5 6# This package assumes to be used from within a PAGE plugin. It uses 7# the API commands listed below. These are identical across the major 8# types of PAGE plugins, allowing this package to be used in reader, 9# transform, and writer plugins. It cannot be used in a configuration 10# plugin, and this makes no sense either. 11# 12# To ensure that our assumption is ok we require the relevant pseudo 13# package setup by the PAGE plugin management code. 14# 15# -----------------+-- 16# page_info | Reporting to the user. 17# page_warning | 18# page_error | 19# -----------------+-- 20# page_log_error | Reporting of internals. 21# page_log_warning | 22# page_log_info | 23# -----------------+-- 24 25# ### ### ### ######### ######### ######### 26## Requisites 27 28# @mdgen NODEP: page::plugin 29 30package require page::plugin ; # S.a. pseudo-package. 31package require treeql 32package require page::util::quote 33package require page::util::peg 34 35namespace eval ::page::util::norm::lemon { 36 # Get the peg char de/encoder commands. 37 # (unquote, quote'tcl) 38 39 namespace import ::page::util::quote::* 40 namespace import ::page::util::peg::* 41} 42 43# ### ### ### ######### ######### ######### 44## API 45 46proc ::page::util::norm::lemon {t} { 47 set q [treeql q -tree $t] 48 49 page_info {[Lemon Normalization]} 50 51 # Retrieve grammar name out of one directive. 52 # Or from LHS of first rule. 53 54 page_log_info ..Startsymbol 55 56 set start {} 57 58 $q query tree \ 59 withatt type nonterminal \ 60 withatt detail StartSymbol \ 61 descendants \ 62 withatt type terminal \ 63 over n { 64 65 lemon::TokReduce $t $n detail 66 set start [$t get $n detail] 67 68 page_info " StartSymbol: $start" 69 } 70 71 $q query tree \ 72 withatt type nonterminal \ 73 withatt detail Name \ 74 descendants \ 75 withatt type terminal \ 76 over n { 77 78 lemon::TokReduce $t $n detail 79 set name [$t get $n detail] 80 81 page_info " Name: $name" 82 83 $t set root name $name 84 } 85 86 page_log_info ..Drop ; lemon::Drop $q $t 87 page_log_info ..Terminals ; lemon::Terminals $q $t 88 page_log_info ..Definitions ; lemon::Definitions $q $t 89 page_log_info ..Rules ; lemon::Rules $q $t start 90 page_log_info ..Epsilon ; lemon::ElimEpsilon $q $t 91 page_log_info ..Autoclass ; lemon::AutoClassId $q $t 92 page_log_info ..Chains 93 94 # Find and cut operator chains, very restricted. Cut only chains 95 # of x- and /-operators. The other operators have only one child 96 # by definition and are thus not chains. 97 98 #set q [treeql q -tree $t] 99 # q query tree over n 100 foreach n [$t children -all root] { 101 if {[$t keyexists $n symbol]} continue 102 if {[llength [$t children $n]] != 1} continue 103 104 set op [$t get $n op] 105 if {($op ne "/") && ($op ne "x")} continue 106 $t cut $n 107 } 108 109 page_log_info ..Flatten 110 111 lemon::flatten $q $t 112 113 # Analysis: Left recursion, and where. 114 # Manual: Definitions for terminals. 115 # Definitions for space, comments. 116 # Integration of this into the grammar. 117 118 # Sentinel for PE algorithms. 119 $t set root symbol <StartExpression> 120 121 if {$start eq ""} { 122 page_error " Startsymbol missing" 123 } else { 124 set s [$t insert root end] 125 $t set $s op n 126 $t set $s sym $start 127 $t set root start $s 128 129 array set def [$t get root definitions] 130 131 if {![info exists def($start)]} { 132 page_error " Startsymbol is undefined" 133 $t set $s def "" 134 } else { 135 $t set $s def $def($start) 136 } 137 unset def 138 } 139 140 $q destroy 141 142 page_log_info Ok 143 return 144} 145 146# ### ### ### ######### ######### ######### 147## Documentation 148# 149## See doc_normalize.txt for the specification of the publicly visible 150## attributes. 151## 152## Internal attributes 153## - DATA - Transient storage for terminal data. 154 155# ### ### ### ######### ######### ######### 156## Internal. Helpers 157 158proc ::page::util::norm::lemon::Drop {q t} { 159 # Simple normalization. 160 # All lemon specific data is dropped completely. 161 162 foreach drop { 163 Directive Codeblock Label Precedence 164 } { 165 $q query tree withatt type nonterminal \ 166 withatt detail $drop over n { 167 $t delete $n 168 } 169 } 170 171 # Some nodes can be dropped, but not their children. 172 173 $q query tree withatt type nonterminal \ 174 withatt detail Statement over n { 175 $t cut $n 176 } 177 178 # Cut the ALL and LemonGrammar nodes, direct access, no search 179 # needed. 180 181 $t cut [lindex [$t children root] 0] 182 $t cut [lindex [$t children root] 0] 183 184 return 185} 186 187proc ::page::util::norm::lemon::Terminals {q t} { 188 # The data for all terminals is stored in their grandparental 189 # nodes. We get rid of both terminals and their parents. 190 191 $q query tree withatt type terminal over n { 192 set p [$t parent $n] 193 set gp [$t parent $p] 194 195 CopyLocation $t $n $gp 196 AttrCopy $t $n detail $gp DATA 197 TokReduce $t $gp DATA 198 $t delete $p 199 } 200 201 # We can now drop the type attribute, as all the remaining nodes 202 # (which have it) will contain the value 'nonterminal'. 203 204 $q query tree hasatt type over n { 205 $t unset $n type 206 } 207 return 208} 209 210proc ::page::util::norm::lemon::Definitions {q t} { 211 # Convert 'Definition' into the sequences they are. 212 # Sequences of length one will be flattened later. 213 # Empty sequences (Length zero) are epsilon. 214 # Epsilon will be later converted to ? of the 215 # whole choice they are part of. 216 217 $q query tree withatt detail Definition over n { 218 $t unset $n detail 219 220 if {[$t children $n] < 1} { 221 $t set $n op epsilon 222 } else { 223 $t set $n op x 224 } 225 } 226 return 227} 228 229proc ::page::util::norm::lemon::Rules {q t sv} { 230 upvar $sv start 231 # We move nonterminal hint information from nodes into attributes, 232 # and delete the now irrelevant nodes. 233 234 # Like with the global metadata we move definition specific 235 # information out of nodes into attributes, get rid of the 236 # superfluous nodes, and tag the definition roots with marker 237 # attributes. 238 239 array set defs {} 240 $q query tree withatt detail Rule over n { 241 set first [Child $t $n 0] 242 243 set sym [$t get $first DATA] 244 $t set $n symbol $sym 245 $t set $n label $sym 246 $t set $n users {} 247 $t set $n mode value 248 249 if {$start eq ""} { 250 page_info " StartSymbol: $sym" 251 set start $sym 252 } 253 254 # We get the left extend of the definition from the terminal 255 # for the symbol it defines. 256 257 MergeLocations $t $first [Rightmost $t $n] $n 258 $t unset $n detail 259 260 lappend defs($sym) $n 261 $t cut $first 262 } 263 264 set d {} 265 foreach sym [array names defs] { 266 set nodes $defs($sym) 267 if {[llength $nodes] == 1} { 268 lappend d $sym [lindex $nodes 0] 269 } else { 270 # Merge multi-node definition together, under a choice. 271 272 set r [$t insert root end] 273 set c [$t insert $r end] 274 275 $t set $r symbol $sym 276 $t set $r label $sym 277 $t set $r users {} 278 $t set $r mode value 279 $t set $c op / 280 281 foreach n $nodes { 282 set seq [lindex [$t children $n] 0] 283 $t move $c end $seq 284 $t delete $n 285 } 286 287 lappend d $sym $r 288 } 289 } 290 291 # We remember a mapping from nonterminal names to their defining 292 # nodes in the root as well, for quick reference later, when we 293 # build nonterminal usage references 294 295 $t set root definitions $d 296 return 297} 298 299proc ::page::util::norm::lemon::Rightmost {t n} { 300 # Determine the rightmost leaf under the specified node. 301 302 if {[$t isleaf $n]} {return $n} 303 return [Rightmost $t [lindex [$t children $n] end]] 304} 305 306proc ::page::util::norm::lemon::ElimEpsilon {q t} { 307 # We convert choices with an epsilon in them into 308 # optional choices without an epsilon branch. 309 310 $q query tree withatt op epsilon over n { 311 set choice [$t parent $n] 312 313 # Move branches into the epsilon, which becomes the new 314 # choice. And the choice becomes an option. 315 foreach c [$t children $choice] { 316 if {$c eq $n} continue 317 $t move $n end $c 318 } 319 $t set $n op / 320 $t set $choice op ? 321 } 322 return 323} 324 325proc ::page::util::norm::lemon::AutoClassId {q t} { 326 327 array set defs [$t get root definitions] 328 array set use {} 329 330 $q query tree \ 331 withatt op x \ 332 children \ 333 hasatt DATA \ 334 over n { 335 # All identifiers are nonterminals, and for the 336 # undefined ones we create rules which define 337 # them as terminal sequences. 338 339 set sym [$t get $n DATA] 340 $t unset $n DATA 341 342 $t set $n op n 343 $t set $n sym $sym 344 345 if {![info exists defs($sym)]} { 346 set defs($sym) [NewTerminal $t $sym] 347 } 348 $t set $n def $defs($sym) 349 350 lappend use($sym) $n 351 $t unset $n detail 352 } 353 354 $t set root definitions [array get defs] 355 356 foreach sym [array names use] { 357 $t set $defs($sym) users $use($sym) 358 } 359 360 $t set root undefined {} 361 return 362} 363 364proc ::page::util::norm::lemon::NewTerminal {t sym} { 365 page_log_info " Terminal: $sym" 366 367 set r [$t insert root end] 368 $t set $r symbol $sym 369 $t set $r label $sym 370 $t set $r users {} 371 $t set $r mode leaf 372 373 set s [$t insert $r end] 374 $t set $s op x 375 376 foreach ch [split $sym {}] { 377 set c [$t insert $s end] 378 $t set $c op t 379 $t set $c char $ch 380 } 381 return $r 382} 383 384# ### ### ### ######### ######### ######### 385## Internal. Low-level helpers. 386 387proc ::page::util::norm::lemon::CopyLocation {t src dst} { 388 $t set $dst range [$t get $src range] 389 $t set $dst range_lc [$t get $src range_lc] 390 return 391} 392 393proc ::page::util::norm::lemon::MergeLocations {t srca srcb dst} { 394 set ar [$t get $srca range] 395 set arlc [$t get $srca range_lc] 396 397 set br [$t get $srcb range] 398 set brlc [$t get $srcb range_lc] 399 400 $t set $dst range [list [lindex $ar 0] [lindex $br 1]] 401 $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]] 402 return 403} 404 405proc ::page::util::norm::lemon::AttrCopy {t src asrc dst adst} { 406 $t set $dst $adst [$t get $src $asrc] 407 return 408} 409 410proc ::page::util::norm::lemon::Child {t n index} { 411 return [lindex [$t children $n] $index] 412} 413 414proc ::page::util::norm::lemon::TokReduce {t src attr} { 415 set tokens [$t get $src $attr] 416 set ch {} 417 foreach tok $tokens { 418 lappend ch [lindex $tok 0] 419 } 420 $t set $src $attr [join $ch {}] 421 return 422} 423 424# ### ### ### ######### ######### ######### 425## Ready 426 427package provide page::util::norm::lemon 0.1 428