1# -*- tcl -*- 2# 3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4# Parser Generator / Transformation - Normalize PEG AST for later. 5 6# This package assumes to be used from within a PAGE plugin. It uses 7# the API commands listed below. These are identical across the major 8# types of PAGE plugins, allowing this package to be used in reader, 9# transform, and writer plugins. It cannot be used in a configuration 10# plugin, and this makes no sense either. 11# 12# To ensure that our assumption is ok we require the relevant pseudo 13# package setup by the PAGE plugin management code. 14# 15# -----------------+-- 16# page_info | Reporting to the user. 17# page_warning | 18# page_error | 19# -----------------+-- 20# page_log_error | Reporting of internals. 21# page_log_warning | 22# page_log_info | 23# -----------------+-- 24 25# ### ### ### ######### ######### ######### 26## Requisites 27 28# @mdgen NODEP: page::plugin 29 30package require page::plugin ; # S.a. pseudo-package. 31package require treeql 32package require page::util::quote 33 34namespace eval ::page::util::norm::peg { 35 # Get the peg char de/encoder commands. 36 # (unquote, quote'tcl) 37 38 namespace import ::page::util::quote::* 39} 40 41# ### ### ### ######### ######### ######### 42## API 43 44proc ::page::util::norm::peg {t} { 45 set q [treeql q -tree $t] 46 47 page_info {[PEG Normalization]} 48 page_log_info ..Terminals ; peg::Terminals $q $t 49 page_log_info ..Chains ; peg::CutChains $q $t 50 page_log_info ..Metadata ; peg::Metadata $q $t 51 page_log_info ..Definitions ; peg::Definitions $q $t 52 page_log_info ..Expressions ; peg::Expressions $q $t 53 54 # Sentinel for PE algorithms. 55 $t set root symbol <StartExpression> 56 $q destroy 57 58 page_log_info Ok 59 return 60} 61 62# ### ### ### ######### ######### ######### 63## Documentation 64# 65## See doc_normalize.txt for the specification of the publicly visible 66## attributes. 67## 68## Internal attributes 69## - DATA - Transient storage for terminal data. 70 71# ### ### ### ######### ######### ######### 72## Internal. Helpers 73 74proc ::page::util::norm::peg::Terminals {q t} { 75 # The data for all terminals is stored in their grandparental 76 # nodes. We get rid of both terminals and their parents. 77 78 $q query tree withatt type terminal over n { 79 set p [$t parent $n] 80 set gp [$t parent $p] 81 82 CopyLocation $t $n $gp 83 AttrCopy $t $n detail $gp DATA 84 TokReduce $t $gp DATA 85 $t delete $p 86 } 87 88 # We can now drop the type attribute, as all the remaining nodes 89 # (which have it) will contain the value 'nonterminal'. 90 91 $q query tree hasatt type over n { 92 $t unset $n type 93 } 94 return 95} 96 97proc ::page::util::norm::peg::CutChains {q t} { 98 # All nodes which have exactly one child are irrelevant. We get 99 # rid of them. The root node is the sole exception. The immediate 100 # child of the root however is superfluous as well. 101 102 $q query tree notq {root} over n { 103 if {[llength [$t children $n]] != 1} continue 104 $t cut $n 105 } 106 107 foreach n [$t children root] {$t cut $n} 108 return 109} 110 111proc ::page::util::norm::peg::Metadata {q t} { 112 # Having the name of the grammar in a tree node is overkill. We 113 # move this information into an attribute of the root node. 114 # The node keeping the start expression separate is irrelevant as 115 # well. We get rid of it, and tag the root of the start expression 116 # with a marker attribute. 117 118 $q query tree withatt detail Header over n { 119 set tmp [Child $t $n 0] 120 set sexpr [Child $t $n 1] 121 122 AttrCopy $t $tmp DATA root name 123 $t cut $tmp 124 $t cut $n 125 break 126 } 127 128 # Remember the node for the start expression in the root for quick 129 # access by later stages. 130 131 $t set root start $sexpr 132 return 133} 134 135proc ::page::util::norm::peg::Definitions {q t} { 136 # We move nonterminal hint information from nodes into attributes, 137 # and delete the now irrelevant nodes. 138 139 # NOTE: This transformation is dependent on the removal of all 140 # nodes with exactly one child, as it removes the all 'Attribute' 141 # nodes already. Otherwise this transformation would have to put 142 # the information into the grandparental node. 143 144 # The default mode for nonterminals is 'value'. 145 146 $q query tree withatt detail Definition over n { 147 $t set $n mode value 148 } 149 150 foreach {a mode} { 151 VOID discard 152 MATCH match 153 LEAF leaf 154 } { 155 $q query tree withatt detail $a over n { 156 set p [$t parent $n] 157 $t set $p mode $mode 158 $t delete $n 159 } 160 } 161 162 # Like with the global metadata we move definition specific 163 # information out of nodes into attributes, get rid of the 164 # superfluous nodes, and tag the definition roots with marker 165 # attributes. 166 167 set defs {} 168 $q query tree withatt detail Definition over n { 169 # Define mode information for all nonterminals without an 170 # explicit specification. We also save the mode information 171 # from deletion when we redo the definition node. 172 173 set first [Child $t $n 0] 174 175 set sym [$t get $first DATA] 176 $t set $n symbol $sym 177 $t set $n label $sym 178 $t set $n users {} 179 180 # Now determine the range in the input covered by the 181 # definition. The left extent comes from the terminal for the 182 # nonterminal symbol it defines. The right extent comes from 183 # the rightmost child under the definition. While this not an 184 # expression tree yet the location data is sound already. 185 186 MergeLocations $t $first [Rightmost $t $n] $n 187 $t unset $n detail 188 189 lappend defs $sym $n 190 $t cut $first 191 } 192 193 # We remember a mapping from nonterminal names to their defining 194 # nodes in the root as well, for quick reference later, when we 195 # build nonterminal usage references 196 197 $t set root definitions $defs 198 return 199} 200 201proc ::page::util::norm::peg::Rightmost {t n} { 202 # Determine the rightmost leaf under the specified node. 203 204 if {[$t isleaf $n]} {return $n} 205 return [Rightmost $t [lindex [$t children $n] end]] 206} 207 208proc ::page::util::norm::peg::Expressions {q t} { 209 # We now transform the remaining nodes into proper expression 210 # trees. The order matters, to shed as much nodes as possible 211 # early, and to avoid unncessary work. 212 213 ExprRanges $q $t 214 ExprUnaryOps $q $t 215 ExprChars $q $t 216 ExprNonterminals $q $t 217 ExprOperators $q $t 218 ExprFlatten $q $t 219 return 220} 221 222proc ::page::util::norm::peg::ExprRanges {q t} { 223 # Ranges = .. operator 224 225 $q query tree withatt detail Range over n { 226 # Two the children, both of text 'Char', their data is what we 227 # take. The children become irrelevant and are removed. 228 229 foreach {b e} [$t children $n] break 230 set begin [unquote [$t get $b DATA]] 231 set end [unquote [$t get $e DATA]] 232 233 $t set $n op .. 234 $t set $n begin $begin 235 $t set $n end $end 236 237 MergeLocations $t $b $e $n 238 239 $t unset $n detail 240 241 $t delete $b 242 $t delete $e 243 } 244 return 245} 246 247proc ::page::util::norm::peg::ExprUnaryOps {q t} { 248 # Unary operators ... Their transformation sheds more nodes. 249 250 foreach {a op} { 251 QUESTION ? 252 STAR * 253 PLUS + 254 AND & 255 NOT ! 256 } { 257 $q query tree withatt detail $a over n { 258 set p [$t parent $n] 259 260 $t set $p op $op 261 $t cut $n 262 263 $t unset $p detail 264 } 265 } 266 return 267} 268 269proc ::page::util::norm::peg::ExprChars {q t} { 270 # Chars = t operator (The remaining Char'acters are plain terminal 271 # symbols. 272 273 $q query tree withatt detail Char over n { 274 set ch [unquote [$t get $n DATA]] 275 276 $t set $n op t 277 $t set $n char $ch 278 279 $t unset $n detail 280 $t unset $n DATA 281 } 282 return 283} 284 285proc ::page::util::norm::peg::ExprNonterminals {q t} { 286 # Identifiers = n operator (nonterminal references) ... 287 288 array set defs [$t get root definitions] 289 array set undefined {} 290 291 $q query tree withatt detail Identifier over n { 292 set sym [$t get $n DATA] 293 294 $t set $n op n 295 $t set $n sym $sym 296 297 $t unset $n detail 298 $t unset $n DATA 299 300 # Create x-references between the users and the definition of 301 # a nonterminal symbol. 302 303 if {![info exists defs($sym)]} { 304 $t set $n def {} 305 lappend undefined($sym) $n 306 continue 307 } else { 308 set def $defs($sym) 309 $t set $n def $def 310 } 311 312 set users [$t get $def users] 313 lappend users $n 314 $t set $def users $users 315 } 316 317 $t set root undefined [array get undefined] 318 return 319} 320 321proc ::page::util::norm::peg::ExprOperators {q t} { 322 # The remaining operator nodes can be changed directly from node 323 # text to operator. Se we do. 324 325 foreach {a op} { 326 EPSILON epsilon 327 ALNUM alnum 328 ALPHA alpha 329 DOT dot 330 Literal x 331 Class / 332 Sequence x 333 Expression / 334 } { 335 $q query tree withatt detail $a over n { 336 $t set $n op $op 337 $t unset $n detail 338 } 339 } 340 return 341} 342 343proc ::page::util::norm::peg::ExprFlatten {q t} { 344 # Last tweaks of the expressions. Classes inside of Expressions, 345 # and Literals in Sequences create nested / or x expressions. We 346 # locate such and flatten the nested expression, cutting out the 347 # superfluous operator. 348 349 foreach op {x /} { 350 # Locate all x operators, whose parents are x operators as 351 # well, then go back to the child operators and cut them out. 352 353 $q query tree withatt op $op \ 354 parent unique withatt op $op \ 355 children withatt op $op \ 356 over n { 357 $t cut $n 358 } 359 360 # Locate all x operators without children and convert them 361 # into epsilon operators. Because that is what they accept, 362 # nothing. 363 364 $q query tree withatt op $op over n { 365 if {[$t numchildren $n]} continue 366 $t set $n op epsilon 367 } 368 } 369 return 370} 371 372# ### ### ### ######### ######### ######### 373## Internal. Low-level helpers. 374 375proc ::page::util::norm::peg::CopyLocation {t src dst} { 376 $t set $dst range [$t get $src range] 377 $t set $dst range_lc [$t get $src range_lc] 378 return 379} 380 381proc ::page::util::norm::peg::MergeLocations {t srca srcb dst} { 382 set ar [$t get $srca range] 383 set arlc [$t get $srca range_lc] 384 385 set br [$t get $srcb range] 386 set brlc [$t get $srcb range_lc] 387 388 $t set $dst range [list [lindex $ar 0] [lindex $br 1]] 389 $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]] 390 return 391} 392 393proc ::page::util::norm::peg::TokReduce {t src attr} { 394 set tokens [$t get $src $attr] 395 set ch {} 396 foreach tok $tokens { 397 lappend ch [lindex $tok 0] 398 } 399 $t set $src $attr [join $ch {}] 400 return 401} 402 403proc ::page::util::norm::peg::AttrCopy {t src asrc dst adst} { 404 $t set $dst $adst [$t get $src $asrc] 405 return 406} 407 408proc ::page::util::norm::peg::Child {t n index} { 409 return [lindex [$t children $n] $index] 410} 411 412# ### ### ### ######### ######### ######### 413## Ready 414 415package provide page::util::norm::peg 0.1 416