1# -*- tcl -*- 2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net> 3 4# Verification of serialized parsing expressions, conversion 5# between such and other data structures, and their construction. 6 7# # ## ### ##### ######## ############# ##################### 8## Requirements 9 10package require Tcl 8.5 ; # Required runtime. 11package require char ; # Character quoting utilities. 12 13# # ## ### ##### ######## ############# ##################### 14## 15 16namespace eval ::pt::pe { 17 namespace export \ 18 verify verify-as-canonical canonicalize \ 19 bottomup topdown print equal \ 20 \ 21 epsilon dot alnum alpha ascii digit graph lower printable \ 22 punct space upper wordchar xdigit ddigit \ 23 nonterminal optional repeat0 repeat1 ahead notahead \ 24 choice sequence \ 25 terminal range 26 27 namespace ensemble create 28} 29 30# # ## ### ##### ######## ############# 31## Public API 32 33# Check that the proposed serialization of a keyword index is 34# indeed such. 35 36proc ::pt::pe::verify {serial {canonvar {}}} { 37 variable ourprefix 38 variable ourempty 39 #puts "V <$serial> /[llength [info level 0]] / [info level 0]" 40 41 if {[llength $serial] == 0} { 42 return -code error $ourprefix$ourempty 43 } 44 45 if {$canonvar ne {}} { 46 upvar 1 $canonvar iscanonical 47 set iscanonical [string equal $serial [list {*}$serial]] 48 } 49 50 topdown [list [namespace current]::Verify] $serial 51 return 52} 53 54proc ::pt::pe::verify-as-canonical {serial} { 55 verify $serial iscanonical 56 if {!$iscanonical} { 57 variable ourprefix 58 variable ourimpure 59 return -code error $ourprefix$ourimpure 60 } 61 return 62} 63 64proc ::pt::pe::Verify {pe op arguments} { 65 variable ourprefix 66 variable ourbadop 67 variable ourarity 68 variable ourwrongargs 69 variable ourempty 70 71 #puts "VE <$pe /$op /$arguments>" 72 if {[llength $pe] == 0} { 73 return -code error $ourprefix$ourempty 74 } 75 76 if {![info exists ourarity($op)]} { 77 return -code error $ourprefix[format $ourbadop $op] 78 } 79 80 lassign $ourarity($op) min max 81 82 set n [llength $arguments] 83 if {($n < $min) || (($max >= 0) && ($n > $max))} { 84 return -code error $ourprefix[format $ourwrongargs $op] 85 } 86 87 upvar 1 iscanonical iscanonical 88 if { 89 [info exists iscanonical] && 90 (($pe ne [list {*}$pe]) || 91 ($op eq "..") && ([lindex $arguments 0] eq [lindex $arguments 1])) 92 } { 93 # Reject coding with superfluous whitespace, and the use of 94 # {.. x x} as coding for {t x} as non-canonical. 95 96 set iscanonical 0 97 } 98 return 99} 100 101# # ## ### ##### ######## ############# 102 103proc ::pt::pe::canonicalize {serial} { 104 verify $serial iscanonical 105 if {$iscanonical} { return $serial } 106 return [bottomup [list [namespace current]::Canonicalize] $serial] 107} 108 109proc ::pt::pe::Canonicalize {pe op arguments} { 110 # The input is mostly already pulled apart into its elements. Now 111 # we construct a pure list out of them, and if necessary, convert 112 # a {.. x x} expression into the canonical {t x} representation. 113 114 if {($op eq ".." ) && 115 ([lindex $arguments 0] eq [lindex $arguments 1])} { 116 return [list t [lindex $arguments 0]] 117 } 118 return [list $op {*}$arguments] 119} 120 121# # ## ### ##### ######## ############# 122 123# Converts a parsing expression serialization into a human readable 124# string for test results. It assumes that the serialization is at 125# least structurally sound. 126 127proc ::pt::pe::print {serial} { 128 return [join [bottomup [list [namespace current]::Print] $serial] \n] 129} 130 131proc ::pt::pe::Print {pe op arguments} { 132 switch -exact -- $op { 133 epsilon - alpha - alnum - ascii - digit - graph - lower - print - \ 134 punct - space - upper - wordchar - xdigit - ddigit - dot { 135 return [list <$op>] 136 } 137 str { return [list "\"[join [char quote comment {*}$arguments] {}]\""] } 138 cl { return [list "\[[join [char quote comment {*}$arguments] {}]\]"] } 139 n { return [list "([lindex $arguments 0])"] } 140 t { return [list "'[char quote comment [lindex $arguments 0]]'"] } 141 .. { 142 lassign $arguments ca ce 143 return [list "range ([char quote comment $ca] .. [char quote comment $ce])"] 144 } 145 } 146 # The arguments are already processed for printing 147 148 set out {} 149 lappend out $op 150 foreach a $arguments { 151 foreach line $a { 152 lappend out " $line" 153 } 154 } 155 return $out 156} 157 158# # ## ### ##### ######## ############# 159 160proc ::pt::pe::equal {seriala serialb} { 161 return [string equal \ 162 [canonicalize $seriala] \ 163 [canonicalize $serialb]] 164} 165 166# # ## ### ##### ######## ############# 167 168proc ::pt::pe::bottomup {cmdprefix pe} { 169 Bottomup 2 $cmdprefix $pe 170} 171 172proc ::pt::pe::Bottomup {level cmdprefix pe} { 173 set op [lindex $pe 0] 174 set ar [lrange $pe 1 end] 175 176 switch -exact -- $op { 177 & - ! - * - + - ? - x - / { 178 set clevel $level 179 incr clevel 180 set nar {} 181 foreach a $ar { 182 lappend nar [Bottomup $clevel $cmdprefix $a] 183 } 184 set ar $nar 185 set pe [list $op {*}$nar] 186 } 187 default {} 188 } 189 190 return [uplevel $level [list {*}$cmdprefix $pe $op $ar]] 191} 192 193proc ::pt::pe::topdown {cmdprefix pe} { 194 Topdown 2 $cmdprefix $pe 195 return 196} 197 198proc ::pt::pe::Topdown {level cmdprefix pe} { 199 set op [lindex $pe 0] 200 set ar [lrange $pe 1 end] 201 202 uplevel $level [list {*}$cmdprefix $pe $op $ar] 203 204 switch -exact -- $op { 205 & - ! - * - + - ? - x - / { 206 incr level 207 foreach a $ar { 208 Topdown $level $cmdprefix $a 209 } 210 } 211 default {} 212 } 213 return 214} 215 216# # ## ### ##### ######## ############# 217 218proc ::pt::pe::epsilon {} { return epsilon } 219proc ::pt::pe::dot {} { return dot } 220proc ::pt::pe::alnum {} { return alnum } 221proc ::pt::pe::alpha {} { return alpha } 222proc ::pt::pe::ascii {} { return ascii } 223proc ::pt::pe::digit {} { return digit } 224proc ::pt::pe::graph {} { return graph } 225proc ::pt::pe::lower {} { return lower } 226proc ::pt::pe::printable {} { return print } 227proc ::pt::pe::punct {} { return punct } 228proc ::pt::pe::space {} { return space } 229proc ::pt::pe::upper {} { return upper } 230proc ::pt::pe::wordchar {} { return wordchar } 231proc ::pt::pe::xdigit {} { return xdigit } 232proc ::pt::pe::ddigit {} { return ddigit } 233 234proc ::pt::pe::nonterminal {nt} { list n $nt } 235proc ::pt::pe::optional {pe} { list ? $pe } 236proc ::pt::pe::repeat0 {pe} { list * $pe } 237proc ::pt::pe::repeat1 {pe} { list + $pe } 238proc ::pt::pe::ahead {pe} { list & $pe } 239proc ::pt::pe::notahead {pe} { list ! $pe } 240 241proc ::pt::pe::choice {pe args} { linsert $args 0 / $pe } 242proc ::pt::pe::sequence {pe args} { linsert $args 0 x $pe } 243 244proc ::pt::pe::terminal {t} { list t $t } 245proc ::pt::pe::range {ta tb} { 246 if {$ta eq $tb} { 247 list t $ta 248 } else { 249 list .. $ta $tb 250 } 251} 252 253namespace eval ::pt::pe { 254 # # ## ### ##### ######## ############# 255 ## Strings for error messages. 256 257 variable ourprefix "error in serialization:" 258 variable ourempty " got empty string" 259 variable ourwrongargs " wrong#args for \"%s\"" 260 variable ourbadop " invalid operator \"%s\"" 261 variable ourimpure " has irrelevant whitespace or (.. X X)" 262 263 # # ## ### ##### ######## ############# 264 ## operator arities 265 266 variable ourarity 267 array set ourarity { 268 epsilon {0 0} 269 alpha {0 0} 270 alnum {0 0} 271 ascii {0 0} 272 digit {0 0} 273 graph {0 0} 274 lower {0 0} 275 print {0 0} 276 punct {0 0} 277 space {0 0} 278 upper {0 0} 279 wordchar {0 0} 280 xdigit {0 0} 281 ddigit {0 0} 282 dot {0 0} 283 .. {2 2} 284 n {1 1} 285 t {1 1} 286 & {1 1} 287 ! {1 1} 288 * {1 1} 289 + {1 1} 290 ? {1 1} 291 x {1 -1} 292 / {1 -1} 293 } 294 295 ## 296 # # ## ### ##### ######## ############# 297} 298 299# # ## ### ##### ######## ############# ##################### 300## Ready 301 302package provide pt::pe 1 303return 304