1# pt_peg_from_peg.tcl -- 2# 3# Conversion from PEG (Human readable text) to PEG. 4# 5# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net> 6# 7# See the file "license.terms" for information on usage and redistribution 8# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 9# 10# RCS: @(#) $Id: pt_peg_from_peg.tcl,v 1.1 2010/03/26 05:07:24 andreas_kupries Exp $ 11 12# This package takes text for a human-readable PEG and produces the 13# canonical serialization of a parsing expression grammar. 14 15# TODO :: APIs for reading from arbitrary channel. 16 17# ### ### ### ######### ######### ######### 18## Requisites 19 20package require Tcl 8.5 21package require pt::peg ; # Verification that the input is proper. 22#package require pt::peg::interp 23#package require pt::peg::container::peg 24package require pt::parse::peg 25package require pt::ast 26package require pt::pe 27package require pt::pe::op 28 29# ### ### ### ######### ######### ######### 30## 31 32namespace eval ::pt::peg::from::peg { 33 namespace export convert convert-file 34 namespace ensemble create 35} 36 37# ### ### ### ######### ######### ######### 38## API. 39 40proc ::pt::peg::from::peg::convert {text} { 41 # Initialize data for the pseudo-channel 42 variable input $text 43 variable loc 0 44 variable max [expr { [string length $text] - 1 }] 45 46 return [Convert] 47} 48 49proc ::pt::peg::from::peg::convert-file {path} { 50 # Initialize data for the pseudo-channel 51 variable input [fileutil::cat $path] 52 variable loc 0 53 variable max [expr { [string length $input] - 1 }] 54 55 return [Convert] 56} 57 58# ### ### ### ######### ######### ######### 59 60proc ::pt::peg::from::peg::Convert {} { 61 # Create the runtime ... 62 set c [chan create read pt::peg::from::peg::CHAN] ; # pseudo-channel for input 63 64 #set g [pt::peg::container::peg %AUTO] ; # load peg grammar 65 #set i [pt::peg::interp %AUTO% $g] ; # grammar interpreter / parser 66 #$g destroy 67 set i [pt::parse::peg] 68 69 # Parse input. 70 set fail [catch { 71 set ast [$i parse $c] 72 } msg] 73 if {$fail} { 74 set ei $::errorInfo 75 set ec $::errorCode 76 } 77 78 $i destroy 79 close $c 80 81 if {$fail} { 82 variable input {} 83 return -code error -errorinfo $ei -errorcode $ec $msg 84 } 85 86 # Now convert the AST to the grammar serial. 87 set serial [pt::ast bottomup \ 88 pt::peg::from::peg::GEN \ 89 $ast] 90 91 variable input {} 92 return $serial 93 94 # ### ### ### ######### ######### ######### 95} 96 97# ### ### ### ######### ######### ######### 98## Internals - Pseudo channel to couple the in-memory text with the 99## RDE. 100 101namespace eval ::pt::peg::from::peg::CHAN { 102 namespace export initialize finalize read watch 103 namespace ensemble create 104} 105 106proc pt::peg::from::peg::CHAN::initialize {c mode} { 107 return {initialize finalize watch read} 108} 109 110proc pt::peg::from::peg::CHAN::finalize {c} {} 111proc pt::peg::from::peg::CHAN::watch {c events} {} 112 113proc pt::peg::from::peg::CHAN::read {c n} { 114 # Note: Should have binary string of the input, to properly handle 115 # encodings ... 116 variable ::pt::peg::from::peg::input 117 variable ::pt::peg::from::peg::loc 118 variable ::pt::peg::from::peg::max 119 120 if {$loc >= $max} { return {} } 121 122 set end [expr {$loc + $n - 1}] 123 set res [string range $input $loc $end] 124 125 incr loc $n 126 127 return $res 128} 129 130# ### ### ### ######### ######### ######### 131## Internals - Bottom up walk converting AST to PEG serialization. 132## Pseudo-ensemble 133 134namespace eval ::pt::peg::from::peg::GEN {} 135 136proc pt::peg::from::peg::GEN {ast} { 137 # The reason for not being an ensemble, an additional param 138 # (8.6+ can code that as ensemble). 139 return [namespace eval GEN $ast] 140} 141 142proc pt::peg::from::peg::GEN::ALNUM {s e} { 143 return [pt::pe alnum] 144} 145 146proc pt::peg::from::peg::GEN::ALPHA {s e} { 147 return [pt::pe alpha] 148} 149 150proc pt::peg::from::peg::GEN::AND {s e} { 151 return [pt::pe ahead [pt::pe dot]] ; # -> Prefix 152} 153 154proc pt::peg::from::peg::GEN::ASCII {s e} { 155 return [pt::pe ascii] 156} 157 158proc pt::peg::from::peg::GEN::Attribute {s e args} { 159 return [lindex $args 0] ; # -> Definition 160} 161 162proc pt::peg::from::peg::GEN::Char {s e args} { 163 return [lindex $args 0] 164} 165 166proc pt::peg::from::peg::GEN::CharOctalFull {s e} { 167 variable ::pt::peg::from::peg::input 168 return [pt::pe terminal [char unquote [string range $input $s $e]]] 169} 170 171proc pt::peg::from::peg::GEN::CharOctalPart {s e} { 172 variable ::pt::peg::from::peg::input 173 return [pt::pe terminal [char unquote [string range $input $s $e]]] 174} 175 176proc pt::peg::from::peg::GEN::CharSpecial {s e} { 177 variable ::pt::peg::from::peg::input 178 return [pt::pe terminal [char unquote [string range $input $s $e]]] 179} 180 181proc pt::peg::from::peg::GEN::CharUnescaped {s e} { 182 variable ::pt::peg::from::peg::input 183 return [pt::pe terminal [string range $input $s $e]] 184} 185 186proc pt::peg::from::peg::GEN::CharUnicode {s e} { 187 variable ::pt::peg::from::peg::input 188 return [pt::pe terminal [char unquote [string range $input $s $e]]] 189} 190 191proc pt::peg::from::peg::GEN::Class {s e args} { 192 if {[llength $args] == 1} { ; # integrated pe::op flatten 193 return [lindex $args 0] 194 } else { 195 return [pt::pe choice {*}$args] ; # <- Chars and Ranges 196 } 197} 198 199proc pt::peg::from::peg::GEN::CONTROL {s e} { 200 return [pt::pe ddigit] 201} 202 203proc pt::peg::from::peg::GEN::DDIGIT {s e} { 204 return [pt::pe ddigit] 205} 206 207proc pt::peg::from::peg::GEN::Definition {s e args} { 208 # args = list/2 (symbol pe) | <- Ident(ifier) Expression 209 # args = list/3 (mode symbol pe) | <- Attribute Ident(ifier) Expression 210 if {[llength $args] == 3} { 211 lassign $args mode sym pe 212 } else { 213 lassign $args sym pe 214 set mode value 215 } 216 # sym = list/2 ('n' name) 217 return [list [lindex $sym 1] $mode [pt::pe::op flatten $pe]] 218} 219 220proc pt::peg::from::peg::GEN::DIGIT {s e} { 221 return [pt::pe digit] 222} 223 224proc pt::peg::from::peg::GEN::DOT {s e} { 225 return [pt::pe dot] 226} 227 228proc pt::peg::from::peg::GEN::Expression {s e args} { 229 if {[llength $args] == 1} { ; # integrated pe::op flatten 230 return [lindex $args 0] 231 } else { 232 return [pt::pe choice {*}$args] ; # <- Primary 233 } 234} 235 236proc pt::peg::from::peg::GEN::Grammar {s e args} { 237 # args = list (start, list/3(symbol, mode, rule)...) <- Header Definition* 238 array set symbols {} 239 set rules {} 240 foreach def [lsort -index 0 -dict [lassign $args startexpr]] { 241 lassign $def sym mode rhs 242 if {[info exists symbol($sym)]} { 243 return -code error "Double declaration of symolb '$sym'" 244 } 245 set symbols($sym) . 246 lappend rules $sym [list is $rhs mode $mode] 247 } 248 # Full grammar 249 return [list pt::grammar::peg [list rules $rules start $startexpr]] 250} 251 252proc pt::peg::from::peg::GEN::GRAPH {s e} { 253 return [pt::pe graph] 254} 255 256proc pt::peg::from::peg::GEN::Header {s e args} { 257 # args = list/2 (list/2 ('n', name), pe) <- Ident(ifier) StartExpr 258 return [lindex $args 1] ; # StartExpr passes through 259} 260 261proc pt::peg::from::peg::GEN::Ident {s e} { 262 variable ::pt::peg::from::peg::input 263 return [pt::pe nonterminal [string range $input $s $e]] 264} 265 266proc pt::peg::from::peg::GEN::Identifier {s e args} { 267 return [lindex $args 0] ; # <- Ident, passes through 268} 269 270proc pt::peg::from::peg::GEN::LEAF {s e} { 271 return leaf 272} 273 274proc pt::peg::from::peg::GEN::LOWER {s e} { 275 return [pt::pe lower] 276} 277 278proc pt::peg::from::peg::GEN::Literal {s e args} { 279 if {[llength $args] == 1} { ; # integrated pe::op flatten 280 return [lindex $args 0] 281 } else { 282 return [pt::pe sequence {*}$args] ; # Series of chars -> Primary 283 } 284} 285 286proc pt::peg::from::peg::GEN::NOT {s e} { 287 return [pt::pe notahead [pt::pe dot]] ; # -> Prefix (dot is placeholder) 288} 289 290proc pt::peg::from::peg::GEN::PLUS {s e} { 291 return [pt::pe repeat1 [pt::pe dot]] ; # -> Suffix (dot is placeholder) 292} 293 294proc pt::peg::from::peg::GEN::Primary {s e args} { 295 return [lindex $args 0] ; # -> Expression, pass through 296} 297 298proc pt::peg::from::peg::GEN::Prefix {s e args} { 299 # args = list/1 (pe) | <- AND/NOT, Expression 300 # args = list/2 (pe/prefix, pe) | <- Expression 301 if {[llength $args] == 2} { 302 # Prefix operator present ... Replace its child (dot, 303 # placeholder) with our second, the actual expression. 304 return [lreplace [lindex $args 0] 1 1 [lindex $args 1]] 305 } else { 306 # Pass the sub-expression 307 return [lindex $args 0] 308 } 309} 310 311proc pt::peg::from::peg::GEN::PRINTABLE {s e} { 312 return [pt::pe printable] 313} 314 315proc pt::peg::from::peg::GEN::PUNCT {s e} { 316 return [pt::pe punct] 317} 318 319proc pt::peg::from::peg::GEN::QUESTION {s e} { 320 return [pt::pe optional [pt::pe dot]] ; # -> Suffix (dot is placeholder) 321} 322 323proc pt::peg::from::peg::GEN::Range {s e args} { 324 # args = list/1 (pe/t) | <- Char (pass through) 325 # args = list/2 (pe/t, pe/t) | <- Char, Char 326 if {[llength $args] == 2} { 327 # Convert two terminals to range 328 return [pt::pe range [lindex $args 0 1] [lindex $args 1 1]] 329 } else { 330 # Pass the char ... 331 return [lindex $args 0] 332 } 333} 334 335proc pt::peg::from::peg::GEN::Sequence {s e args} { 336 if {[llength $args] == 1} { ; # integrated pe::op flatten 337 return [lindex $args 0] 338 } else { 339 return [pt::pe sequence {*}$args] ; # <- Prefix+ 340 } 341} 342 343proc pt::peg::from::peg::GEN::SPACE {s e} { 344 return [pt::pe space] 345} 346 347proc pt::peg::from::peg::GEN::STAR {s e} { 348 return [pt::pe repeat0 [pt::pe dot]] ; # -> Suffix (dot is placeholder) 349} 350 351proc pt::peg::from::peg::GEN::StartExpr {s e args} { 352 # args = list/1 (pe) | <- Expression, -> Header 353 return [pt::pe::op flatten [lindex $args 0]] 354} 355proc pt::peg::from::peg::GEN::Suffix {s e args} { 356 # args = list/1 (pe) | <- Expression 357 # args = list/2 (pe, pe/suffix) | <- Expression */+/? 358 if {[llength $args] == 2} { 359 # Suffix operator present ... Replace its child (dot, 360 # placeholder) with our first, the actual expression. 361 return [lreplace [lindex $args 1] 1 1 [lindex $args 0]] 362 } else { 363 # Pass the sub-expression 364 return [lindex $args 0] 365 } 366} 367 368proc pt::peg::from::peg::GEN::UPPER {s e} { 369 return [pt::pe upper] 370} 371 372proc pt::peg::from::peg::GEN::VOID {s e} { 373 return void 374} 375 376proc pt::peg::from::peg::GEN::WORDCHAR {s e} { 377 return [pt::pe wordchar] 378} 379 380proc pt::peg::from::peg::GEN::XDIGIT {s e} { 381 return [pt::pe xdigit] 382} 383 384# ### ### ### ######### ######### ######### 385## Ready 386 387package provide pt::peg::from::peg 1 388return 389