1# -*- tcl -*- 2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net> 3 4# Verification of serialized PEGs, and conversion between 5# serializations and other data structures. 6 7# # ## ### ##### ######## ############# ##################### 8## Requirements 9 10package require Tcl 8.5 ; # Required runtime. 11package require pt::pe 12 13# # ## ### ##### ######## ############# ##################### 14## 15 16namespace eval ::pt::peg { 17 namespace export \ 18 verify verify-as-canonical canonicalize print merge equal 19 namespace ensemble create 20} 21 22# # ## ### ##### ######## ############# 23## Public API 24 25# Check that the proposed serialization of a keyword index is 26# indeed such. 27 28proc ::pt::peg::verify {serial {canonvar {}}} { 29 variable ourprefix 30 variable ourshort 31 variable ourtag 32 variable ourcbadlen 33 variable ourmiss 34 variable ourbadpe 35 variable ourcode 36 37 # Basic syntax: Length and outer type code 38 if {[llength $serial] != 2} { 39 return -code error $ourprefix$ourshort 40 } 41 42 lassign $serial tag contents 43 44 if {$tag ne $ourcode} { 45 return -code error $ourprefix[format $ourtag $tag] 46 } 47 48 # contents = dict (rules, start -> ...) 49 50 if {[llength $contents] != 4} { 51 return -code error $ourprefix$ourcbadlen 52 } 53 54 # Unpack the contents, then check that all necessary keys are 55 # present. Together with the length check we can then also be 56 # sure that no other key is present either. 57 array set peg $contents 58 foreach k {rules start} { 59 if {[info exists peg($k)]} continue 60 return -code error $ourprefix[format $ourmiss $k] 61 } 62 63 if {[catch { 64 pt::pe verify $peg(start) canon 65 } msg]} { 66 return -code error \ 67 [string map \ 68 [list \ 69 {error in serialization:} \ 70 $ourprefix[format $ourbadpe start]] \ 71 $msg] 72 } 73 74 if {$canonvar eq {}} { 75 VerifyRules $peg(rules) 76 } else { 77 upvar 1 $canonvar iscanonical 78 set iscanonical $canon 79 80 VerifyRules $peg(rules) iscanonical 81 82 # Quick exit if the inner structure was already 83 # non-canonical. 84 if {!$iscanonical} return 85 86 # Now various checks if the keys and identifiers are 87 # properly sorted to make this a canonical serialization. 88 89 lassign $contents a _ b _ 90 if {[list $a $b] ne {rules start}} { 91 set iscanonical 0 92 } 93 94 if {$serial ne [list {*}$serial]} { 95 set iscanonical 0 96 } 97 98 if {$contents ne [list {*}$contents]} { 99 set iscanonical 0 100 } 101 } 102 103 # Everything checked out. 104 return 105} 106 107proc ::pt::peg::verify-as-canonical {serial} { 108 verify $serial iscanonical 109 if {!$iscanonical} { 110 variable ourprefix 111 variable ourdupsort 112 return -code error $ourprefix$ourdupsort 113 } 114 return 115} 116 117proc ::pt::peg::canonicalize {serial} { 118 variable ourcode 119 120 verify $serial iscanonical 121 if {$iscanonical} { return $serial } 122 123 # Unpack the serialization. 124 array set peg $serial 125 array set peg $peg($ourcode) 126 unset peg($ourcode) 127 128 # Construct result, inside out 129 set rules {} 130 array set r $peg(rules) 131 foreach symbol [lsort -dict [array names r]] { 132 array set sd $r($symbol) 133 lappend rules \ 134 $symbol [list \ 135 is [pt::pe \ 136 canonicalize $sd(is)] \ 137 mode $sd(mode)] 138 unset sd 139 } 140 141 set serial [list $ourcode \ 142 [list \ 143 rules $rules \ 144 start [pt::pe \ 145 canonicalize $peg(start)]]] 146 return $serial 147} 148 149# Converts a PEG serialization into a human readable string for 150# test results. It assumes that the serialization is at least 151# structurally sound. 152 153proc ::pt::peg::print {serial} { 154 variable ourcode 155 156 # Unpack the serialization. 157 array set peg $serial 158 array set peg $peg($ourcode) 159 unset peg($ourcode) 160 # Print 161 set lines {} 162 lappend lines $ourcode 163 lappend lines " start := [join [split [pt::pe print $peg(start)] \n] "\n "]" 164 lappend lines { rules} 165 foreach {symbol value} $peg(rules) { 166 array set sd $value 167 # keys :: is, mode 168 lappend lines " $symbol :: <$sd(mode)> :=" 169 lappend lines " [join [split [pt::pe print $sd(is)] \n] "\n "]" 170 unset sd 171 } 172 return [join $lines \n] 173} 174 175# # ## ### ##### ######## ############# 176 177proc ::pt::peg::merge {seriala serialb} { 178 variable ourcode 179 180 verify $seriala 181 verify $serialb 182 183 array set pega $seriala 184 array set pega $pega($ourcode) 185 unset pega($ourcode) 186 187 array set pegb $serialb 188 array set pegb $pegb($ourcode) 189 unset pegb($ourcode) 190 191 array set ra $pega(rules) 192 array set rb $pegb(rules) 193 194 foreach symbol [array names rb] { 195 if {![info exists ra($symbol)]} { 196 # No conflict possible, copy over 197 set ra($symbol) $rb($symbol) 198 } else { 199 # unpack definitions, check for conflicts 200 array set sda $ra($symbol) 201 array set sdb $rb($symbol) 202 203 if {$sda(mode) ne $sdb(mode)} { 204 return -code "Merge error for nonterminal \"$symbol\", semantic mode mismatch" 205 } 206 207 # Merge parsing expressions, if not identical ... 208 if {![pt::pe equal \ 209 $sda(is) \ 210 $sdb(is)]} { 211 set sda(is) [pt::pe choice \ 212 $sda(is) \ 213 $sdb(is)] 214 set ra($symbol) [array get sda] 215 } 216 217 unset sda 218 unset sdb 219 } 220 } 221 222 # Construct result, inside out 223 224 set rules {} 225 foreach symbol [lsort -dict [array names ra]] { 226 array set sd $ra($symbol) 227 lappend rules \ 228 $symbol [list \ 229 is $sd(is) \ 230 mode $sd(mode)] 231 unset sd 232 } 233 234 if {![pt::pe equal \ 235 $pega(start) \ 236 $pegb(start)]} { 237 set start [pt::pe choice \ 238 $pega(start) \ 239 $pegb(start)] 240 } else { 241 set start $pega(start) 242 } 243 244 set serial [list $ourcode \ 245 [list \ 246 rules $rules \ 247 start $start]] 248 return $serial 249 250} 251 252# # ## ### ##### ######## ############# 253 254proc ::pt::peg::equal {seriala serialb} { 255 # syntactical (intensional) grammar equality. 256 string equal \ 257 [canonicalize $seriala] \ 258 [canonicalize $serialb] 259} 260 261# # ## ### ##### ######## ############# 262 263 264proc ::pt::peg::VerifyRules {rules {canonvar {}}} { 265 variable ourprefix 266 variable ourrbadlen 267 variable oursdup 268 variable oursempty 269 variable oursbadlen 270 variable oursmiss 271 variable ourbadpe 272 variable ourbadmode 273 variable ourmode 274 275 if {$canonvar ne {}} { 276 upvar 1 $canonvar iscanonical 277 } 278 279 if {[llength $rules] % 2 == 1} { 280 return -code error $ourprefix$ourrbadlen 281 } 282 283 if {$rules ne [list {*}$rules]} { 284 set iscanonical 0 285 } 286 287 array set r $rules 288 289 if {([array size r]*2) < [llength $rules]} { 290 return -code error $ourprefix$oursdup 291 } 292 293 foreach symbol [array names r] { 294 if {$symbol eq {}} { 295 return -code error $ourprefix$oursempty 296 } 297 298 set def $r($symbol) 299 300 if {[llength $def] != 4} { 301 return -code error $ourprefix[format $oursbadlen $symbol] 302 } 303 304 if {$def ne [list {*}$def]} { 305 set iscanonical 0 306 } 307 308 array set sd $def 309 foreach k {is mode} { 310 if {[info exists sd($k)]} continue 311 return -code error $ourprefix[format $oursmiss $symbol $k] 312 } 313 314 if {[catch { 315 pt::pe verify $sd(is) canon 316 } msg]} { 317 return -code error \ 318 [string map \ 319 [list \ 320 {error in serialization:} \ 321 $ourprefix[format $ourbadpe ($symbol)]] \ 322 $msg] 323 } 324 325 if {![info exists ourmode($sd(mode))]} { 326 return -code error $ourprefix[format $ourbadmode $symbol $sd(mode)] 327 } 328 329 # Now various checks if the keys and identifiers are 330 # properly sorted to make this a canonical serialization. 331 332 if {!$canon} { 333 set iscanonical 0 334 continue 335 } 336 337 lassign $def a _ b _ 338 if {[list $a $b] ne {is mode}} { 339 set iscanonical 0 340 } 341 } 342 return 343} 344 345namespace eval ::pt::peg { 346 # # ## ### ##### ######## ############# 347 348 variable ourcode pt::grammar::peg 349 variable ourprefix {error in serialization:} 350 # # Test cases (grammar-peg-structure-) 351 variable ourshort { dictionary too short, expected exactly one key} ; # 352 variable ourtag { bad type tag "%s"} ; # 353 variable ourcbadlen { dictionary of bad length, expected exactly two keys} ; # 354 variable ourmiss { missing expected key "%s"} ; # 355 variable oursmiss { symbol "%s", missing expected key "%s"} ; # 356 variable ourbadpe { bad %s parsing expression:} ; # 357 variable ourbadmode { symbol "%s", bad nonterminal mode "%s"} ; # 358 variable ourrbadlen { rule dictionary of bad length, not a dictionary} ; # 359 variable oursempty { expected symbol name, got empty string} 360 variable oursbadlen { symbol dictionary for "%s" of bad length, expected exactly two keys} ; # 361 variable oursdup { duplicate nonterminal keywords} ; # 362 # Message for non-canonical serialization when expecting canonical form 363 variable ourdupsort { duplicate and/or unsorted keywords and/or irrelevant whitespace} ; # 364 365 variable ourmode 366 array set ourmode { 367 value . 368 leaf . 369 void . 370 } 371 372 ## 373 # # ## ### ##### ######## ############# 374} 375 376# # ## ### ##### ######## ############# ##################### 377## Ready 378 379package provide pt::peg 1 380return 381