1# -*- tcl -*- 2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net> 3 4# Utility commands operating on parsing expressions. 5 6# # ## ### ##### ######## ############# ##################### 7## Requirements 8 9package require Tcl 8.5 ; # Required runtime. 10package require pt::pe ; # PE basics 11package require struct::set ; # Set operations (symbol sets) 12 13# # ## ### ##### ######## ############# ##################### 14## 15 16namespace eval ::pt::pe::op { 17 namespace export \ 18 drop rename called flatten fusechars 19 20 namespace ensemble create 21} 22 23# # ## ### ##### ######## ############# 24## Public API 25 26proc ::pt::pe::op::rename {nt ntnew serial} { 27 if {$nt eq $ntnew} { 28 return $serial 29 } 30 return [pt::pe bottomup \ 31 [list [namespace current]::Rename $nt $ntnew] \ 32 $serial] 33} 34 35proc ::pt::pe::op::drop {dropset serial} { 36 set res [pt::pe bottomup \ 37 [list [namespace current]::Drop $dropset] \ 38 $serial] 39 if {$res eq "@@"} { set res [pt::pe epsilon] } 40 return $res 41} 42 43proc ::pt::pe::op::called {serial} { 44 return [pt::pe bottomup \ 45 [list [namespace current]::Called] \ 46 $serial] 47} 48 49proc ::pt::pe::op::flatten {serial} { 50 return [pt::pe bottomup \ 51 [list [namespace current]::Flatten] \ 52 $serial] 53} 54 55proc ::pt::pe::op::fusechars {serial} { 56 return [pt::pe bottomup \ 57 [list [namespace current]::FuseChars] \ 58 $serial] 59} 60 61# # ## ### ##### ######## ############# 62## Internals 63 64proc ::pt::pe::op::Drop {dropset pe op arguments} { 65 if {$op eq "n"} { 66 lassign $arguments symbol 67 if {[struct::set contains $dropset $symbol]} { 68 return @@ 69 } else { 70 return $pe 71 } 72 } 73 74 switch -exact -- $op { 75 / - x - * - + - ? - & - ! { 76 set newarg {} 77 foreach a $arguments { 78 if {$a eq "@@"} continue 79 lappend newarg $a 80 } 81 82 if {![llength $newarg]} { 83 # Nothing remained, drop the whole expression 84 return [pt::pe epsilon] 85 } elseif {[llength $newarg] < [llength $argument]} { 86 # Some removed, construct a new expression 87 set pe [list $op {*}$newarg] 88 } ; # None removed, no change. 89 } 90 } 91 92 return $pe 93} 94 95proc ::pt::pe::op::Rename {nt ntnew pe op arguments} { 96 #puts R($op)/$arguments/ 97 if {($op eq "n") && ([lindex $arguments 0] eq $nt)} { 98 return [pt::pe nonterminal $ntnew] 99 } else { 100 return $pe 101 } 102} 103 104proc ::pt::pe::op::Called {pe op arguments} { 105 # arguments = list(set-of-symbols) for operators, and n. 106 # ignored for terminal expressions. 107 # result = set-of-symbols 108 109 #puts -nonewline C|$op|$arguments|= 110 switch -exact -- $op { 111 n - & - ! - * - + - ? { 112 #puts |[lindex $arguments 0]| 113 return [lindex $arguments 0] 114 } 115 x - / { 116 #puts |[struct::set union {*}$arguments]| 117 return [struct::set union {*}$arguments] 118 } 119 } 120 #puts || 121 return {} 122} 123 124proc ::pt::pe::op::Flatten {pe op arguments} { 125 switch -exact -- $op { 126 x - / { 127 if {[llength $arguments] == 1} { 128 # Cut single-child x/ out of the tree 129 return [lindex $arguments 0] 130 } else { 131 set res {} 132 foreach c $arguments { 133 if {[lindex $c 0] eq $op} { 134 # Cut x in x (/ in /) operator out of the 135 # tree. 136 lappend res {*}[lrange $c 1 end] 137 } else { 138 # Leave anything else unchanged. 139 lappend res $c 140 } 141 } 142 return [list $op {*}$res] 143 } 144 } 145 default { 146 # Leave anything not x/ unchanged 147 return $pe 148 } 149 } 150} 151 152proc ::pt::pe::op::FuseChars {pe op arguments} { 153 switch -exact -- $op { 154 x { 155 set changed 0 ; # boolean flag showing if fuse ops were done. 156 set buf {} ; # accumulator of chars in a string. 157 set res {} ; # accumulator of new children for operator. 158 159 foreach c $arguments { 160 CollectTerminal $c 161 FuseTerminal 162 lappend res $c 163 } 164 165 # Capture a run of characters at the end of the sequence. 166 FuseTerminal 167 168 if {$changed} { 169 return [list x {*}$res] 170 } else { 171 return $pe 172 } 173 } 174 / { 175 set changed 0 ; # boolean flag showing if fuse ops were done. 176 set buf {} ; # accumulator of chars and ranges in a class. 177 set res {} ; # accumulator of new children for operator. 178 179 foreach c $arguments { 180 CollectClass $c 181 FuseClass 182 lappend res $c 183 } 184 185 # Capture a run of characters and ranges at the end of the 186 # sequence. 187 FuseClass 188 189 if {$changed} { 190 return [list / {*}$res] 191 } else { 192 return $pe 193 } 194 } 195 default { 196 # Leave anything not x/ unchanged 197 return $pe 198 } 199 } 200} 201 202# # ## ### ##### ######## ############# 203## Fuser Support 204 205proc ::pt::pe::op::CollectTerminal {c} { 206 if {[lindex $c 0] ne "t"} return 207 208 # A terminal. Just extend the accumulator. The main processing 209 # happens after each run of t-operators, see FuseTerminal. 210 211 upvar 1 buf buf 212 lappend buf [lindex $c 1] 213 return -code continue 214} 215 216proc ::pt::pe::op::FuseTerminal {} { 217 upvar 1 changed changed res res buf buf 218 219 # Nothing has accumulated, nothing to fuse. 220 if {$buf eq {}} return 221 222 # The current non-t operator is after one or more t-operators. We 223 # have to flush its accumulated data to keep the expression 224 # correct. 225 226 if {[llength $buf] > 1} { 227 # We are behind an actual series of t-operators, i.e. a 228 # string. We flush it and signal the change to the processing 229 # after the loop, 230 231 lappend res [list str {*}$buf] 232 set changed 1 233 } else { 234 # We are behind a single t-operator. We keep it as is, there 235 # is no actual need to make it a string. 236 237 lappend res [pt::pe terminal [lindex $buf 0]] 238 } 239 240 # Reset the accumulator for the next series. 241 set buf {} 242 return 243} 244 245# # ## ### ##### ######## ############# 246 247proc ::pt::pe::op::CollectClass {c} { 248 if {[lindex $c 0] ni {t ..}} return 249 250 # A terminal or range. Just extend the accumulator. The main processing 251 # happens after each run of t-operators, see FuseTerminal. 252 253 upvar 1 buf buf 254 set new [lrange $c 1 end] 255 if {([llength $new] == 1) || ([lindex $new 0] eq [lindex $new 1])} { 256 set new [lindex $new 0] 257 } 258 lappend buf $new 259 return -code continue 260} 261 262proc ::pt::pe::op::FuseClass {} { 263 upvar 1 changed changed res res buf buf 264 265 # Nothing has accumulated, nothing to fuse. 266 if {$buf eq {}} return 267 268 # The current non-t operator is after one or more 269 # t/..-operators. We have to flush the accumulated data to keep 270 # the expression correct. 271 272 if {[llength $buf] > 1} { 273 # We are behind an actual series of t/..-operators, i.e. a 274 # class. We flush it, signal the change to the processing 275 # after the loop, and reset the accumulator for the next 276 # series. 277 278 # TODO :: Sort class elements, aggregate adjacents into larger 279 # ranges if possible and worthwhile (>= 3), look for 280 # overlapping ranges and merge. 281 282 lappend res [list cl {*}$buf] 283 set changed 1 284 } else { 285 # We are behind a single t- or ..-operator. A terminal can be 286 # kept as is, but a range has to be encapsulated into a class, 287 # except of the range is something like a-a, then this is just 288 # a different coding of a single character ... 289 290 set args [lindex $buf 0] 291 if {[llength $args] == 1} { 292 lappend res [pt::pe terminal [lindex $args 0]] 293 } else { 294 lassign $args a b 295 set changed 1 296 if {$a ne $b} { 297 lappend res [list cl {*}$buf] 298 } else { 299 lappend res [pt::pe terminal $a] 300 } 301 } 302 } 303 304 # Reset the accumulator for the next series. 305 set buf {} 306 return 307} 308 309# # ## ### ##### ######## ############# 310## State / Configuration :: n/a 311 312namespace eval ::pt::pe::op {} 313 314# # ## ### ##### ######## ############# ##################### 315## Ready 316 317package provide pt::pe::op 1 318return 319