1# -*- tcl -*-
2#
3# Copyright (c) 2009 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4
5# # ## ### ##### ######## ############# #####################
6## Package description
7
8## Implementation of the PackRat Machine (PARAM), a virtual machine on
9## top of which parsers for Parsing Expression Grammars (PEGs) can be
10## realized. This implementation is tied to Tcl for control flow. We
11## (will) have alternate implementations written in TclOO, and critcl,
12## all exporting the same API.
13#
14## RD stands for Recursive Descent.
15
16# # ## ### ##### ######## ############# #####################
17## Requisites
18
19package require Tcl 8.5
20package require TclOO
21package require struct::stack 1.4 ; # Requiring get, trim methods
22package require pt::ast
23package require pt::pe
24
25# # ## ### ##### ######## ############# #####################
26## Implementation
27
28oo::class create ::pt::rde::oo {
29
30    # # ## ### ##### ######## ############# #####################
31    ## API - Lifecycle
32
33    constructor {} {
34	set selfns [info object namespace]
35
36	set mystackloc  [struct::stack ${selfns}::LOC]  ; # LS
37	set mystackerr  [struct::stack ${selfns}::ERR]  ; # ES
38	set mystackast  [struct::stack ${selfns}::AST]  ; # ARS/AS
39	set mystackmark [struct::stack ${selfns}::MARK] ; # s.a.
40
41	my reset
42	return
43    }
44
45    method reset {chan} {
46	set mychan    $chan      ; # IN
47	set myline    1          ; #
48	set mycolumn  0          ; #
49	set mycurrent {}         ; # CC
50	set myloc     -1         ; # CL
51	set myok      0          ; # ST
52	set msvalue   {}         ; # SV
53	set myerror   {}         ; # ER
54	set mytoken   {}         ; # TC
55	array unset   mysymbol * ; # NC
56
57	$mystackloc  clear
58	$mystackerr  clear
59	$mystackast  clear
60	$mystackmark clear
61	return
62    }
63
64    method complete {} {
65	if {$myok} {
66	    set n [$mystackast size]
67	    if {$n > 1} {
68		set  pos [$mystackloc peek]
69		incr pos
70		set children [lreverse [$mystackast peek [$mystackast size]]]     ; # SaveToMark
71		return [pt::ast new {} $pos $myloc {*}$children] ; # Reduce ALL
72	    } else {
73		return [$mystackast peek]
74	    }
75	} else {
76	    lassign $myerror loc messages
77	    return -code error [list pt::rde $loc [$self position $loc] $messages]
78	}
79    }
80
81    # # ## ### ##### ######## ############# #####################
82    ## API - State accessors
83
84    method chan   {} { return $mychan }
85    method line   {} { return $myline }
86    method column {} { return $mycolumn }
87
88    # - - -- --- ----- --------
89
90    method current  {} { return $mycurrent }
91    method location {} { return $myloc }
92    method lmarked  {} { return [lreverse [$mystackloc get]] }
93
94    # - - -- --- ----- --------
95
96    method ok      {} { return $myok      }
97    method value   {} { return $mysvalue  }
98    method error   {} { return $myerror   }
99    method emarked {} { return [lreverse [$mystackerr get]] }
100
101    # - - -- --- ----- --------
102
103    method tokens {{from {}} {to {}}} {
104	switch -exact [llength [info level 0]] {
105	    4 { return $mytoken }
106	    5 { return [lrange $mytoken $from $from] }
107	    6 { return [lrange $mytoken $from $to] }
108	}
109    }
110
111    method symbols {} {
112	return [array get mysymbol]
113    }
114
115    method scached {} {
116	return [array names mysymbol]
117    }
118
119    # - - -- --- ----- --------
120
121    method asts    {} { return [lreverse [$mystackast  get]] }
122    method amarked {} { return [lreverse [$mystackmark get]] }
123    method ast     {} { return [$mystackast peek] }
124
125    # - - -- --- ----- --------
126
127    method position {loc} {
128	return [lrange [lindex $mytoken $loc] 1 2]
129    }
130
131    # # ## ### ##### ######## ############# #####################
132    ## API - Instructions - Control flow
133
134    method i:ok_continue {} {
135	if {!$myok} return
136	return -code continue
137    }
138
139    method i:fail_continue {} {
140	if {$myok} return
141	return -code continue
142    }
143
144    method i:fail_return {} {
145	if {$myok} return
146	return -code return
147    }
148
149    method i:ok_return {} {
150	if {!$myok} return
151	return -code return
152    }
153
154    # # ## ### ##### ######## ############# #####################
155    ##  API - Instructions - Unconditional matching.
156
157    method i_status_ok {} {
158	set myok 1
159	return
160    }
161
162    method i_status_fail {} {
163	set myok 0
164	return
165    }
166
167    method i_status_negate {} {
168	set myok [expr {!$myok}]
169	return
170    }
171
172    # # ## ### ##### ######## ############# #####################
173    ##  API - Instructions - Error handling.
174
175    method i_error_clear {} {
176	set myerror {}
177	return
178    }
179
180    method i_error_push {} {
181	$mystackerr push $myerror
182	return
183    }
184
185    method i_error_pop_merge {} {
186	set olderror [$mystackerr pop]
187
188	# We have either old or new error data, keep it.
189
190	if {![llength $myerror]}  { set myerror $olderror ; return }
191	if {![llength $olderror]} return
192
193	# If one of the errors is further on in the input choose that as
194	# the information to propagate.
195
196	lassign $myerror  loe msgse
197	lassign $olderror lon msgsn
198
199	if {$lon > $loe} { set myerror $olderror ; return }
200	if {$loe > $lon} return
201
202	# Equal locations, merge the message lists.
203	#set myerror [list $loe [struct::set union $msgse $msgsn]]
204	set myerror [list $loe [lsort -uniq [list {*}$msgse {*}$msgsn]]]
205	return
206    }
207
208    method i_error_nonterminal {symbol} {
209	# Inlined: Errors, Expected.
210	if {![llength $myerror]} return
211	set pos [$mystackloc peek]
212	incr pos
213	lassign $myerror loc messages
214	if {$loc != $pos} return
215	set myerror [list $loc [list $symbol]]
216	return
217    }
218
219    # # ## ### ##### ######## ############# #####################
220    ##  API - Instructions - Basic input handling and tracking
221
222    method i_loc_pop_rewind/discard {} {
223	#$myparser i:fail_loc_pop_rewind
224	#$myparser i:ok_loc_pop_discard
225	#return
226	set last [$mystackloc pop]
227	if {!$myok} {
228	    set myloc $last
229	}
230	return
231    }
232
233    method i_loc_pop_discard {} {
234	$mystackloc pop
235	return
236    }
237
238    method i_loc_pop_rewind {} {
239	set myloc [$mystackloc pop]
240	return
241    }
242
243    method i:fail_loc_pop_rewind {} {
244	if {$myok} return
245	set myloc [$mystackloc pop]
246	return
247    }
248
249    method i_loc_push {} {
250	$mystackloc push $myloc
251	return
252    }
253
254    # # ## ### ##### ######## ############# #####################
255    ##  API - Instructions - AST stack handling
256
257    method i_ast_pop_rewind/discard {} {
258	#$myparser i:fail_ast_pop_rewind
259	#$myparser i:ok_ast_pop_discard
260	#return
261	set mark [$mystackmark pop]
262	if {$myok} return
263	$mystackast trim $mark
264	return
265    }
266
267    method i_ast_pop_discard/rewind {} {
268	#$myparser i:ok_ast_pop_rewind
269	#$myparser i:fail_ast_pop_discard
270	#return
271	set mark [$mystackmark pop]
272	if {!$myok} return
273	$mystackast trim $mark
274	return
275    }
276
277    method i_ast_pop_discard {} {
278	$mystackmark pop
279	return
280    }
281
282    method i_ast_pop_rewind {} {
283	$mystackast trim [$mystackmark pop]
284	return
285    }
286
287    method i:fail_ast_pop_rewind {} {
288	if {$myok} return
289	$mystackast trim [$mystackmark pop]
290	return
291    }
292
293    method i_ast_push {} {
294	$mystackmark push [$mystackast size]
295	return
296    }
297
298    method i:ok_ast_value_push {} {
299	if {!$myok} return
300	$mystackast push $mysvalue
301	return
302    }
303
304    # # ## ### ##### ######## ############# #####################
305    ## API - Instructions - Nonterminal cache
306
307    method i_symbol_restore {symbol} {
308	# Satisfy from cache if possible.
309	set k [list $myloc $symbol]
310	if {![info exists mysymbol($k)]} { return 0 }
311	lassign $mysymbol($k) myloc myok myerror mysvalue
312	# We go forward, as the nonterminal matches (or not).
313	return 1
314    }
315
316    method i_symbol_save {symbol} {
317	# Store not only the value, but also how far
318	# the match went (if it was a match).
319	set at [$mystackloc peek]
320	set k  [list $at $symbol]
321	set mysymbol($k) [list $myloc $myok $myerror $mysvalue]
322	return
323    }
324
325    # # ## ### ##### ######## ############# #####################
326    ##  API - Instructions - Semantic values.
327
328    method i_value_clear {} {
329	set mysvalue {}
330	return
331    }
332
333    method i_value_clear/leaf {symbol} {
334	# not quite value_lead (guarded, and clear on fail)
335	# Inlined clear, reduce, and optimized.
336	# Clear ; if {$ok} {Reduce $symbol}
337	set mysvalue {}
338	if {!$myok} return
339	set  pos [$mystackloc peek]
340	incr pos
341	set mysvalue [pt::ast new $symbol $pos $myloc]
342	return
343    }
344
345    method i_value_clear/reduce {symbol} {
346	set mysvalue {}
347	if {!$myok} return
348
349	set  mark [$mystackmark peek];# Old size of stack before current nt pushed more.
350	set  newa [expr {[$mystackast size] - $mark}]
351
352	set  pos  [$mystackloc  peek]
353	incr pos
354
355	if {!$newa} {
356	    set mysvalue {}
357	} elseif {$newa == 1} {
358	    # peek 1 => single element comes back
359	    set mysvalue [list [$mystackast peek]]     ; # SaveToMark
360	} else {
361	    # peek n > 1 => list of elements comes back
362	    set mysvalue [lreverse [$mystackast peek $newa]]     ; # SaveToMark
363	}
364
365	set mysvalue [pt::ast new $symbol $pos $myloc {*}$mysvalue] ; # Reduce $symbol
366	return
367    }
368
369    # # ## ### ##### ######## ############# #####################
370    ## API - Instructions - Terminal matching
371
372    method i_input_next {msg} {
373	# Inlined: Getch, Expected, ClearErrors
374	# Satisfy from input cache if possible.
375
376	incr myloc
377	if {$myloc < [llength $mytoken]} {
378	    set mycurrent [lindex $mytoken $myloc 0]
379	    set myok    1
380	    set myerror {}
381	    return
382	}
383
384	# Actually read from the input, and remember
385	# the information.
386	# Note: We are implicitly incrementing the location!
387
388	set token [my ReadChar]
389
390	if {![llength $token]} {
391	    set myok    0
392	    set myerror [list $myloc [list $msg]]
393	    return
394	}
395
396	lappend mytoken   $token
397	set     mycurrent [lindex $token 0]
398	set     myok      1
399	set     myerror   {}
400	return
401    }
402
403    method i_test_alnum {} {
404	set myok [string is alnum -strict $mycurrent]
405	my OkFail [pt::pe alnum]
406	return
407    }
408
409    method i_test_alpha {} {
410	set myok [string is alpha -strict $mycurrent]
411	my OkFail [pt::pe alpha]
412	return
413    }
414
415    method i_test_ascii {} {
416	set myok [string is ascii -strict $mycurrent]
417	my OkFail [pt::pe ascii]
418	return
419    }
420
421    method i_test_char {tok} {
422	set myok [expr {$tok eq $mycurrent}]
423	my OkFail [pt::pe terminal $tok]
424	return
425    }
426
427    method i_test_ddigit {} {
428	set myok [string match {[0-9]} $mycurrent]
429	my OkFail [pt::pe ddigit]
430	return
431    }
432
433    method i_test_digit {} {
434	set myok [string is digit -strict $mycurrent]
435	my OkFail [pt::pe digit]
436	return
437    }
438
439    method i_test_graph {} {
440	set myok [string is graph -strict $mycurrent]
441	my OkFail [pt::pe graph]
442	return
443    }
444
445    method i_test_lower {} {
446	set myok [string is lower -strict $mycurrent]
447	my OkFail [pt::pe lower]
448	return
449    }
450
451    method i_test_print {} {
452	set myok [string is print -strict $mycurrent]
453	my OkFail [pt::pe printable]
454	return
455    }
456
457    method i_test_punct {} {
458	set myok [string is punct -strict $mycurrent]
459	my OkFail [pt::pe punct]
460	return
461    }
462
463    method i_test_range {toks toke} {
464	set myok [expr {
465			([string compare $toks $mycurrent] <= 0) &&
466			([string compare $mycurrent $toke] <= 0)
467		    }] ; # {}
468	my OkFail [pt::pe range $toks $toke]
469	return
470    }
471
472    method i_test_space {} {
473	set myok [string is space -strict $mycurrent]
474	my OkFail [pt::pe space]
475	return
476    }
477
478    method i_test_upper {} {
479	set myok [string is upper -strict $mycurrent]
480	my OkFail [pt::pe upper]
481	return
482    }
483
484    method i_test_wordchar {} {
485	set myok [string is wordchar -strict $mycurrent]
486	my OkFail [pt::pe wordchar]
487	return
488    }
489
490    method i_test_xdigit {} {
491	set myok [string is xdigit -strict $mycurrent]
492	my OkFail [pt::pe xdigit]
493	return
494    }
495
496    # # ## ### ##### ######## ############# #####################
497    ## Internals
498
499    method ReadChar {} {
500	upvar 1 mychan mychan myline myline mycolumn mycolumn
501
502	if {[eof $mychan]} {return {}}
503
504	set ch [read $mychan 1]
505	if {$ch eq ""} {return {}}
506
507	set token [list $ch $myline $mycolumn]
508
509	if {$ch eq "\n"} {
510	    incr myline
511	    set  mycolumn 0
512	} else {
513	    incr mycolumn
514	}
515
516	return $token
517    }
518
519    method OkFail {msg} {
520	upvar 1 myok myok myerror myerror myloc myloc
521	# Inlined: Expected, Unget, ClearErrors
522	if {!$myok} {
523	    set myerror [list $myloc [list $msg]]
524	    incr myloc -1
525	} else {
526	    set myerror {}
527	}
528	return
529    }
530
531    # # ## ### ##### ######## ############# #####################
532    ## Data structures.
533    ## Mainly the architectural state of the instance's PARAM.
534
535    variable \
536	mychan myline mycolumn \
537	mycurrent myloc mystackloc \
538	myok mysvalue myerror mystackerr \
539	mytoken mysymbol \
540	mystackast mystackmark
541
542    # Parser Input (channel, location (line, column)) ...........
543    # Token, current parsing location, stack of locations .......
544    # Match state .  ........ ............. .....................
545    # Caches for tokens and nonterminals .. .....................
546    # Abstract syntax tree (AST) .......... .....................
547
548    # # ## ### ##### ######## ############# #####################
549}
550
551# # ## ### ##### ######## ############# #####################
552## Ready
553
554package provide pt::rde 1
555return
556