1# -*- tcl -*- 2# 3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net> 4 5# Interpreter for parsing expression grammars. In essence a recursive 6# descent parser configurable with a parsing expression grammar. 7 8# ### ### ### ######### ######### ######### 9## Package description 10 11## The instances of this class parse a text provided through a channel 12## based on a parsing expression grammar provided by a peg container 13## object. The parsing process is interpretative, i.e. the parsing 14## expressions are decoded and checked on the fly and possibly 15## multiple times, as they are encountered. 16 17## The interpreter operates in pull-push mode, i.e. the interpreter 18## object is in charge and reads the characters from the channel as 19## needed, and returns with the result of the parse, either when 20## encountering an error, or when the parse was successful. 21 22# ### ### ### ######### ######### ######### 23## Requisites 24 25package require Tcl 8.5 26package require pt::rde ; # Virtual machine geared to the parsing of PEGs. 27package require snit 28 29# ### ### ### ######### ######### ######### 30## Implementation 31 32snit::type ::pt::peg::interp { 33 34 # ### ### ### ######### ######### ######### 35 ## Instance API 36 37 constructor {} {} 38 39 method use {grammar} {} 40 41 method parse {channel} {} ; # Parse the contents of the channel 42 # against the configured grammar. 43 44 method parset {text} {} ; # Parse the text against the 45 # configured grammar. 46 47 # ### ### ### ######### ######### ######### 48 ## Options 49 50 ## None 51 52 # ### ### ### ######### ######### ######### 53 ## Instance API Implementation. 54 55 constructor {} { 56 # Create the runtime supporting the parsing process. 57 set myparser [pt::rde ${selfns}::ENGINE] 58 return 59 } 60 61 method use {grammar} { 62 # Release the information of any previously used grammar. 63 64 array unset myrhs * 65 array unset mymode * 66 set mystart epsilon 67 68 # Copy the grammar into internal tables. 69 70 # Note how the grammar is not used in any way, shape, or form 71 # afterward. 72 73 # Note also that it is not required to verify the 74 # grammar. This was done while it was loaded into the grammar 75 # object, be it incrementally or at once. 76 77 array set myrhs [$grammar rules] 78 array set mymode [$grammar modes] 79 set mystart [$grammar start] 80 return 81 } 82 83 method parse {channel} { 84 $myparser reset $channel 85 $self {*}$mystart 86 return [$myparser complete] 87 } 88 89 method parset {text} { 90 $myparser reset 91 $myparser data $text 92 $self {*}$mystart 93 return [$myparser complete] 94 } 95 96 # ### ### ### ######### ######### ######### 97 ## Parse operator implementation 98 99 # No input to parse, nor consume. Ok, always. 100 101 method epsilon {} { 102 $myparser i_status_ok 103 return 104 } 105 106 # Parse and consume one character. No matter which character. This 107 # fails only when reaching EOF. Does not consume input on failure. 108 109 method dot {} { 110 $self Next 111 return 112 } 113 114 # Parse and consume one specific character. This fails if the 115 # character at the location is not in the specified character 116 # class. Does not consume input on failure. 117 118 foreach operator { 119 alnum alpha ascii ddigit digit graph 120 lower print punct space upper wordchar 121 xdigit 122 } { 123 method $operator {} [string map [list @ $operator] { 124 $self Next 125 $myparser i:fail_return 126 $myparser i_test_@ 127 return 128 }] 129 } 130 131 # Parse and consume one specific character. This fails if the 132 # character at the location is not the expected character. Does 133 # not consume input on failure. 134 135 method t {char} { 136 $self Next 137 $myparser i:fail_return 138 $myparser i_test_char $char 139 return 140 } 141 142 # Parse and consume one character, if in the specified range. This 143 # fails if the read character is outside of the range. Does not 144 # consume input on failure. 145 146 method .. {chstart chend} { 147 $self Next 148 $myparser i:fail_return 149 $myparser i_test_range $chstart $chend 150 return 151 } 152 153 # To parse a nonterminal symbol in the input we execute its 154 # parsing expression, i.e its right-hand side. This can be cut 155 # short if the necessary information can be obtained from the 156 # nonterminal cache. Does not consume input on failure. 157 158 method n {symbol} { 159 set savemode $mycurrentmode 160 set mycurrentmode $mymode($symbol) 161 162 # Query NC, and shortcut 163 if {[$myparser i_symbol_restore $symbol]} { 164 $self ASTFinalize 165 return 166 } 167 168 # Save location and AST construction state 169 $myparser i_loc_push ; # (i) 170 $myparser i_ast_push ; # (1) 171 172 # Run the right hand side. 173 $self {*}$myrhs($symbol) 174 175 # Generate a semantic value, based on the currently active 176 # semantic mode. 177 switch -exact -- $mycurrentmode { 178 value { $myparser i_value_clear/reduce $symbol } 179 leaf { $myparser i_value_clear/leaf $symbol } 180 void { $myparser i_value_clear } 181 } 182 183 $myparser i_symbol_save $symbol 184 185 # Drop ARS. Unconditional as any necessary reduction was done 186 # already (See (a)), and left the result in SV 187 $myparser i_ast_pop_rewind ; # (Ad 1) 188 $self ASTFinalize 189 190 # Even if parse is ok. 191 $myparser i_error_nonterminal $symbol 192 $myparser i_loc_pop_discard ; # (Ad i) 193 return 194 } 195 196 # And lookahead predicate. We parse the expression against the 197 # input and return the parse result. No input is consumed. 198 199 method & {expression} { 200 $myparser i_loc_push 201 202 $self {*}$expression 203 204 $myparser i_loc_pop_rewind 205 return 206 } 207 208 # Negated lookahead predicate. We parse the expression against the 209 # input and returns the negated parse result. No input is 210 # consumed. 211 212 method ! {expression} { 213 $myparser i_loc_push 214 $myparser i_ast_push 215 216 $self {*}$expression 217 218 $myparser i_ast_pop_discard/rewind ;# -- fail/ok 219 $myparser i_loc_pop_rewind 220 $myparser i_status_negate 221 return 222 } 223 224 # Parsing an optional expression. This tries to parse the sub 225 # expression. It will never fail, even if the sub expression 226 # itself is not succesful. Consumes only input if it could parse 227 # the sub expression. Like *, but without the repetition. 228 229 method ? {expression} { 230 $myparser i_loc_push 231 $myparser i_error_push 232 233 $self {*}$expression 234 235 $myparser i_error_pop_merge 236 $myparser i_loc_pop_rewind/discard ;# -- fail/ok 237 $myparser i_status_ok 238 return 239 } 240 241 # Parse zero or more repetitions of an expression (Kleene 242 # closure). This consumes as much input as we were able to parse 243 # the sub expression. The expresion as a whole is always 244 # succesful, even if the sub expression fails (zero repetitions). 245 246 method * {expression} { 247 # do { ... } while ok. 248 while {1} { 249 $myparser i_loc_push 250 $myparser i_error_push 251 252 $self {*}$expression 253 254 $myparser i_error_pop_merge 255 $myparser i_loc_pop_rewind/discard ;# -- fail/ok 256 $myparser i:ok_continue 257 break 258 } 259 $myparser i_status_ok 260 return 261 } 262 263 # Parse one or more repetitions of an expression (Positive kleene 264 # closure). This is similar to *, except for one round at the 265 # front which has to parse for success of the whole. This 266 # expression can fail. It will consume only as much input as it 267 # was able to parse. 268 269 method + {expression} { 270 $myparser i_loc_push 271 272 $self {*}$expression 273 274 $myparser i_loc_pop_rewind/discard ;# -- fail/ok 275 $myparser i:fail_return 276 277 $self * $expression 278 return 279 } 280 281 # Parsing a sequence of expressions. This parses each sub 282 # expression in turn, each consuming input. In the case of failure 283 # by one of the sequence's elements nothing is consumed at all. 284 285 method x {args} { 286 $myparser i_loc_push 287 $myparser i_ast_push 288 $myparser i_error_clear 289 290 foreach expression $args { 291 $myparser i_error_push 292 293 $self {*}$expression 294 295 $myparser i_error_pop_merge 296 # Branch failed, track back and report to caller. 297 $myparser i:fail_ast_pop_rewind 298 $myparser i:fail_loc_pop_rewind 299 $myparser i:fail_return ; # Stop trying on element failure 300 } 301 302 # All elements OK, squash backtracking state 303 $myparser i_loc_pop_discard 304 $myparser i_ast_pop_discard 305 return 306 } 307 308 # Parsing a series of alternatives (Choice). This parses each 309 # alternative in turn, always starting from the current 310 # location. Nothing is consumed if all alternatives fail. Consumes 311 # as much as was consumed by the succesful branch. 312 313 method / {args} { 314 $myparser i_error_clear 315 316 foreach expression $args { 317 $myparser i_loc_push 318 $myparser i_ast_push 319 $myparser i_error_push 320 321 $self {*}$expression 322 323 $myparser i_error_pop_merge 324 $myparser i_ast_pop_rewind/discard 325 $myparser i_loc_pop_rewind/discard 326 $myparser i:fail_continue 327 return ; # Stop trying on finding a successful branch. 328 } 329 330 # All branches FAIL 331 $myparser i_status_fail 332 return 333 } 334 335 # ### ### ### ######### ######### ######### 336 337 method Next {} { 338 # We are processing the outer method call into an atomic 339 # parsing expression for error messaging. 340 $myparser i_input_next [regsub {^.*Snit_method} [lreplace [info level -1] 1 4] {}] 341 return 342 } 343 344 method ASTFinalize {} { 345 if {$mycurrentmode ne "void"} { 346 $myparser i:ok_ast_value_push 347 } 348 upvar 1 savemode savemode 349 set mycurrentmode $savemode 350 return 351 } 352 353 # ### ### ### ######### ######### ######### 354 ## State Interpreter data structures. 355 356 variable myparser {} ; # Our PARAM instantiation. 357 variable myrhs -array {} ; # Dictionary mapping nonterminal 358 # symbols to parsing expressions 359 # describing their sentence 360 # structure. 361 variable mymode -array {} ; # Dictionary mapping nonterminal 362 # symbols to semantic modes 363 # (controlling AST generation). 364 variable mystart epsilon ; # The parsing expression to start 365 # the parse process with. 366 variable mycurrentmode value ; # The currently active semantic mode. 367 368 # ### ### ### ######### ######### ######### 369 ## Debugging helper. To activate 370 ## string map {{self {*}} {self TRACE {*}}} 371 372 method TRACE {args} { 373 puts |$args|enter 374 set res [$self {*}$args] 375 puts |$args|return 376 return $res 377 } 378 379 ## 380 # ### ### ### ######### ######### ######### 381} 382 383# ### ### ### ######### ######### ######### 384## Package Management 385 386package provide pt::peg::interp 1 387