1# -*- tcl -*-
2#
3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4
5# Interpreter for parsing expression grammars. In essence a recursive
6# descent parser configurable with a parsing expression grammar.
7
8# ### ### ### ######### ######### #########
9## Package description
10
11## The instances of this class parse a text provided through a channel
12## based on a parsing expression grammar provided by a peg container
13## object. The parsing process is interpretative, i.e. the parsing
14## expressions are decoded and checked on the fly and possibly
15## multiple times, as they are encountered.
16
17## The interpreter operates in pull-push mode, i.e. the interpreter
18## object is in charge and reads the characters from the channel as
19## needed, and returns with the result of the parse, either when
20## encountering an error, or when the parse was successful.
21
22# ### ### ### ######### ######### #########
23## Requisites
24
25package require Tcl 8.5
26package require pt::rde ; # Virtual machine geared to the parsing of PEGs.
27package require snit
28
29# ### ### ### ######### ######### #########
30## Implementation
31
32snit::type ::pt::peg::interp {
33
34    # ### ### ### ######### ######### #########
35    ## Instance API
36
37    constructor {} {}
38
39    method use {grammar} {}
40
41    method parse {channel} {} ; # Parse the contents of the channel
42				# against the configured grammar.
43
44    method parset {text}   {} ; # Parse the text against the
45                                # configured grammar.
46
47    # ### ### ### ######### ######### #########
48    ## Options
49
50    ## None
51
52    # ### ### ### ######### ######### #########
53    ## Instance API Implementation.
54
55    constructor {} {
56	# Create the runtime supporting the parsing process.
57	set myparser [pt::rde ${selfns}::ENGINE]
58	return
59    }
60
61    method use {grammar} {
62	# Release the information of any previously used grammar.
63
64	array unset myrhs  *
65	array unset mymode *
66	set mystart epsilon
67
68	# Copy the grammar into internal tables.
69
70	# Note how the grammar is not used in any way, shape, or form
71	# afterward.
72
73	# Note also that it is not required to verify the
74	# grammar. This was done while it was loaded into the grammar
75	# object, be it incrementally or at once.
76
77	array set myrhs  [$grammar rules]
78	array set mymode [$grammar modes]
79	set mystart      [$grammar start]
80	return
81    }
82
83    method parse {channel} {
84	$myparser reset $channel
85	$self {*}$mystart
86	return [$myparser complete]
87    }
88
89    method parset {text} {
90	$myparser reset
91	$myparser data $text
92	$self {*}$mystart
93	return [$myparser complete]
94    }
95
96    # ### ### ### ######### ######### #########
97    ## Parse operator implementation
98
99    # No input to parse, nor consume. Ok, always.
100
101    method epsilon {} {
102	$myparser i_status_ok
103	return
104    }
105
106    # Parse and consume one character. No matter which character. This
107    # fails only when reaching EOF. Does not consume input on failure.
108
109    method dot {} {
110	$self Next
111	return
112    }
113
114    # Parse and consume one specific character. This fails if the
115    # character at the location is not in the specified character
116    # class. Does not consume input on failure.
117
118    foreach operator {
119	alnum alpha ascii ddigit digit  graph
120	lower print punct space  upper  wordchar
121	xdigit
122    } {
123	method $operator {} [string map [list @ $operator] {
124	    $self Next
125	    $myparser i:fail_return
126	    $myparser i_test_@
127	    return
128	}]
129    }
130
131    # Parse and consume one specific character. This fails if the
132    # character at the location is not the expected character. Does
133    # not consume input on failure.
134
135    method t {char} {
136	$self Next
137	$myparser i:fail_return
138	$myparser i_test_char $char
139	return
140    }
141
142    # Parse and consume one character, if in the specified range. This
143    # fails if the read character is outside of the range. Does not
144    # consume input on failure.
145
146    method .. {chstart chend} {
147	$self Next
148	$myparser i:fail_return
149	$myparser i_test_range $chstart $chend
150	return
151    }
152
153    # To parse a nonterminal symbol in the input we execute its
154    # parsing expression, i.e its right-hand side. This can be cut
155    # short if the necessary information can be obtained from the
156    # nonterminal cache. Does not consume input on failure.
157
158    method n {symbol} {
159	set savemode      $mycurrentmode
160	set mycurrentmode $mymode($symbol)
161
162	# Query NC, and shortcut
163	if {[$myparser i_symbol_restore $symbol]} {
164	    $self ASTFinalize
165	    return
166	}
167
168	# Save location and AST construction state
169	$myparser i_loc_push ; # (i)
170	$myparser i_ast_push ; # (1)
171
172	# Run the right hand side.
173	$self {*}$myrhs($symbol)
174
175	# Generate a semantic value, based on the currently active
176	# semantic mode.
177	switch -exact -- $mycurrentmode {
178	    value   { $myparser i_value_clear/reduce $symbol }
179	    leaf    { $myparser i_value_clear/leaf   $symbol }
180	    void    { $myparser i_value_clear }
181	}
182
183	$myparser i_symbol_save $symbol
184
185	# Drop ARS. Unconditional as any necessary reduction was done
186	# already (See (a)), and left the result in SV
187	$myparser i_ast_pop_rewind ; # (Ad 1)
188	$self ASTFinalize
189
190	# Even if parse is ok.
191	$myparser i_error_nonterminal $symbol
192	$myparser i_loc_pop_discard ; # (Ad i)
193	return
194    }
195
196    # And lookahead predicate. We parse the expression against the
197    # input and return the parse result. No input is consumed.
198
199    method & {expression} {
200	$myparser i_loc_push
201
202	    $self {*}$expression
203
204	$myparser i_loc_pop_rewind
205	return
206    }
207
208    # Negated lookahead predicate. We parse the expression against the
209    # input and returns the negated parse result. No input is
210    # consumed.
211
212    method ! {expression} {
213	$myparser i_loc_push
214	$myparser i_ast_push
215
216	$self {*}$expression
217
218	$myparser i_ast_pop_discard/rewind ;# -- fail/ok
219	$myparser i_loc_pop_rewind
220	$myparser i_status_negate
221	return
222    }
223
224    # Parsing an optional expression. This tries to parse the sub
225    # expression. It will never fail, even if the sub expression
226    # itself is not succesful. Consumes only input if it could parse
227    # the sub expression. Like *, but without the repetition.
228
229    method ? {expression} {
230	$myparser i_loc_push
231	$myparser i_error_push
232
233	$self {*}$expression
234
235	$myparser i_error_pop_merge
236	$myparser i_loc_pop_rewind/discard ;# -- fail/ok
237	$myparser i_status_ok
238	return
239    }
240
241    # Parse zero or more repetitions of an expression (Kleene
242    # closure).  This consumes as much input as we were able to parse
243    # the sub expression. The expresion as a whole is always
244    # succesful, even if the sub expression fails (zero repetitions).
245
246    method * {expression} {
247	# do { ... } while ok.
248	while {1} {
249	    $myparser i_loc_push
250	    $myparser i_error_push
251
252	    $self {*}$expression
253
254	    $myparser i_error_pop_merge
255	    $myparser i_loc_pop_rewind/discard ;# -- fail/ok
256	    $myparser i:ok_continue
257	    break
258	}
259	$myparser i_status_ok
260	return
261    }
262
263    # Parse one or more repetitions of an expression (Positive kleene
264    # closure). This is similar to *, except for one round at the
265    # front which has to parse for success of the whole. This
266    # expression can fail. It will consume only as much input as it
267    # was able to parse.
268
269    method + {expression} {
270	$myparser i_loc_push
271
272	$self {*}$expression
273
274	$myparser i_loc_pop_rewind/discard ;# -- fail/ok
275	$myparser i:fail_return
276
277	$self * $expression
278	return
279    }
280
281    # Parsing a sequence of expressions. This parses each sub
282    # expression in turn, each consuming input. In the case of failure
283    # by one of the sequence's elements nothing is consumed at all.
284
285    method x {args} {
286	$myparser i_loc_push
287	$myparser i_ast_push
288	$myparser i_error_clear
289
290	foreach expression $args {
291	    $myparser i_error_push
292
293	    $self {*}$expression
294
295	    $myparser i_error_pop_merge
296	    # Branch failed, track back and report to caller.
297	    $myparser i:fail_ast_pop_rewind
298	    $myparser i:fail_loc_pop_rewind
299	    $myparser i:fail_return         ; # Stop trying on element failure
300	}
301
302	# All elements OK, squash backtracking state
303	$myparser i_loc_pop_discard
304	$myparser i_ast_pop_discard
305	return
306    }
307
308    # Parsing a series of alternatives (Choice). This parses each
309    # alternative in turn, always starting from the current
310    # location. Nothing is consumed if all alternatives fail. Consumes
311    # as much as was consumed by the succesful branch.
312
313    method / {args} {
314	$myparser i_error_clear
315
316	foreach expression $args {
317	    $myparser i_loc_push
318	    $myparser i_ast_push
319	    $myparser i_error_push
320
321	    $self {*}$expression
322
323	    $myparser i_error_pop_merge
324	    $myparser i_ast_pop_rewind/discard
325	    $myparser i_loc_pop_rewind/discard
326	    $myparser i:fail_continue
327	    return ; # Stop trying on finding a successful branch.
328	}
329
330	# All branches FAIL
331	$myparser i_status_fail
332	return
333    }
334
335    # ### ### ### ######### ######### #########
336
337    method Next {} {
338	# We are processing the outer method call into an atomic
339	# parsing expression for error messaging.
340	$myparser i_input_next [regsub {^.*Snit_method} [lreplace [info level -1] 1 4] {}]
341	return
342    }
343
344    method ASTFinalize {} {
345	if {$mycurrentmode ne "void"} {
346	    $myparser i:ok_ast_value_push
347	}
348	upvar 1 savemode savemode
349	set mycurrentmode $savemode
350	return
351    }
352
353    # ### ### ### ######### ######### #########
354    ## State Interpreter data structures.
355
356    variable myparser      {}    ; # Our PARAM instantiation.
357    variable myrhs  -array {}    ; # Dictionary mapping nonterminal
358				   # symbols to parsing expressions
359				   # describing their sentence
360				   # structure.
361    variable mymode -array {}    ; # Dictionary mapping nonterminal
362				   # symbols to semantic modes
363				   # (controlling AST generation).
364    variable mystart  epsilon    ; # The parsing expression to start
365				   # the parse process with.
366    variable mycurrentmode value ; # The currently active semantic mode.
367
368    # ### ### ### ######### ######### #########
369    ## Debugging helper. To activate
370    ## string map {{self {*}} {self TRACE {*}}}
371
372    method TRACE {args} {
373	puts |$args|enter
374	set res [$self {*}$args]
375	puts |$args|return
376	return $res
377    }
378
379    ##
380    # ### ### ### ######### ######### #########
381}
382
383# ### ### ### ######### ######### #########
384## Package Management
385
386package provide pt::peg::interp 1
387