1# -*- tcl -*-
2# ### ### ### ######### ######### #########
3## Package description
4
5## Implementation of the ME virtual machine as a singleton, tied to
6## Tcl for control flow and stack handling (except the AST stack).
7
8# ### ### ### ######### ######### #########
9## Requisites
10
11# ### ### ### ######### ######### #########
12## Implementation
13
14namespace eval ::grammar::me::tcl {
15    namespace export \
16	init lc tok sv tokens ast \
17	astall ctok nc next ord \
18	\
19	isv_clear              ict_advance        inc_save    \
20	isv_terminal           ict_match_token    inc_restore \
21	isv_nonterminal_leaf   ict_match_tokrange icl_get     \
22	isv_nonterminal_range  ict_match_tokclass icl_rewind  \
23	isv_nonterminal_reduce iok_ok      \
24	ier_clear              iok_fail    \
25	ier_get                iok_negate  \
26	ier_expected           ias_push    \
27	ier_nonterminal        ias_mark    \
28	ier_merge              ias_pop2mark
29
30    variable ok
31}
32
33# ### ### ### ######### ######### #########
34## Implementation, API. Ensemble command.
35
36proc ::grammar::me::tcl {cmd args} {
37    # Dispatcher for the ensemble command.
38    variable tcl::cmds
39    return [uplevel 1 [linsert $args 0 $cmds($cmd)]]
40}
41
42namespace eval grammar::me::tcl {
43    variable cmds
44
45    # Mapping from cmd names to procedures for quick dispatch. The
46    # objects will shimmer into resolved command references.
47
48    array set cmds {
49	init   ::grammar::me::tcl::init
50	lc     ::grammar::me::tcl::lc
51	tok    ::grammar::me::tcl::tok
52	sv     ::grammar::me::tcl::sv
53	tokens ::grammar::me::tcl::tokens
54	ast    ::grammar::me::tcl::ast
55	astall ::grammar::me::tcl::astall
56	ctok   ::grammar::me::tcl::ctok
57	nc     ::grammar::me::tcl::nc
58	next   ::grammar::me::tcl::next
59	ord    ::grammar::me::tcl::ord
60    }
61}
62
63# ### ### ### ######### ######### #########
64## API Implementation.
65
66proc ::grammar::me::tcl::init {nxcmd {tokmap {}}} {
67    variable next  $nxcmd
68    variable as    {}
69    variable ok    0
70    variable error {}
71    variable sv    {}
72    variable loc  -1
73    variable ct    {}
74    variable tc    {}
75    variable nc
76    variable tokOrd
77    variable tokUseOrd 0
78
79    array unset nc     *
80    array unset tokOrd *
81
82    if {[llength $tokmap]} {
83	if {[llength $tokmap] % 2 == 1} {
84	    return -code error \
85		    "Bad token order map, not a dictionary"
86	}
87	array set tokOrd $tokmap
88	set tokUseOrd 1
89    }
90    return
91}
92
93proc ::grammar::me::tcl::lc {pos} {
94    variable tc
95    return [lrange [lindex $tc $pos] 2 3]
96}
97
98proc ::grammar::me::tcl::tok {from {to {}}} {
99    variable tc
100    if {$to == {}} {set to $from}
101    return [lrange $tc $from $to]
102}
103
104proc ::grammar::me::tcl::tokens {} {
105    variable tc
106    return [llength $tc]
107}
108
109proc ::grammar::me::tcl::sv {} {
110    variable sv
111    return  $sv
112}
113
114proc ::grammar::me::tcl::ast {} {
115    variable as
116    return [lindex $as end]
117}
118
119proc ::grammar::me::tcl::astall {} {
120    variable as
121    return $as
122}
123
124proc ::grammar::me::tcl::ctok {} {
125    variable ct
126    return  $ct
127}
128
129proc ::grammar::me::tcl::nc {} {
130    variable nc
131    return [array get nc]
132}
133
134proc ::grammar::me::tcl::next {} {
135    variable next
136    return  $next
137}
138
139proc ::grammar::me::tcl::ord {} {
140    variable tokOrd
141    return [array get tokOrd]
142}
143
144# ### ### ### ######### ######### #########
145## Terminal matching
146
147proc ::grammar::me::tcl::ict_advance {msg} {
148    # Inlined: Getch, Expected, ClearErrors
149
150    variable ok
151    variable error
152    # ------------------------
153    variable tc
154    variable loc
155    variable ct
156    # ------------------------
157    variable next
158    # ------------------------
159
160    # Satisfy from input cache if possible.
161    incr loc
162    if {$loc < [llength $tc]} {
163	set ct [lindex $tc $loc 0]
164	set ok 1
165	set error {}
166	return
167    }
168
169    # Actually read from the input, and remember
170    # the information.
171
172    # Read from buffer, and remember.
173    # Note: loc is the instance variable.
174    # This implicitly increments the location!
175
176    set tokdata [uplevel \#0 $next]
177    if {![llength $tokdata]} {
178	set ok 0
179	set error [list $loc [list $msg]]
180	return
181    } elseif {[llength $tokdata] != 4} {
182	return -code error "Bad callback result, expected 4 elements"
183    }
184
185    lappend tc $tokdata
186    set ct [lindex $tokdata 0]
187    set ok    1
188    set error {}
189    return
190}
191
192proc ::grammar::me::tcl::ict_match_token {tok msg} {
193    variable ct
194    variable ok
195
196    set ok [expr {$tok eq $ct}]
197
198    OkFail $msg
199    return
200}
201
202proc ::grammar::me::tcl::ict_match_tokrange {toks toke msg} {
203    variable ct
204    variable ok
205    variable tokUseOrd
206    variable tokOrd
207
208    if {$tokUseOrd} {
209	set ord $tokOrd($ct)
210	set ok [expr {
211	    ($toks <= $ord) &&
212	    ($ord <= $toke)
213	}] ; # {}
214    } else {
215	set ok [expr {
216	    ([string compare $toks   $ct] <= 0) &&
217	    ([string compare $ct   $toke] <= 0)
218	}] ; # {}
219    }
220
221    OkFail $msg
222    return
223}
224
225proc ::grammar::me::tcl::ict_match_tokclass {code msg} {
226    variable ct
227    variable ok
228
229    set ok [string is $code -strict $ct]
230
231    OkFail $msg
232    return
233}
234
235proc ::grammar::me::tcl::OkFail {msg} {
236    variable ok
237    variable error
238    variable loc
239
240    # Inlined: Expected, Unget, ClearErrors
241
242    if {!$ok} {
243	set error [list $loc [list $msg]]
244	incr loc -1
245    } else {
246	set error {}
247    }
248    return
249}
250
251# ### ### ### ######### ######### #########
252## Nonterminal cache
253
254proc ::grammar::me::tcl::inc_restore {symbol} {
255    variable loc
256    variable nc
257    variable ok
258    variable error
259    variable sv
260
261    # Satisfy from cache if possible.
262    if {[info exists nc($loc,$symbol)]} {
263	foreach {go ok error sv} $nc($loc,$symbol) break
264
265	# Go forward, as the nonterminal matches (or not).
266	set loc $go
267	return 1
268    }
269    return 0
270}
271
272proc ::grammar::me::tcl::inc_save {symbol at} {
273    variable loc
274    variable nc
275    variable ok
276    variable error
277    variable sv
278
279    if 0 {
280	if {[info exists nc($at,$symbol)]} {
281	    return -code error "Cannot overwrite\
282		    existing data @ ($at, $symbol)"
283	}
284    }
285
286    # FIXME - end location should be argument.
287
288    # Store not only the value, but also how far
289    # the match went (if it was a match).
290
291    set nc($at,$symbol) [list $loc $ok $error $sv]
292    return
293}
294
295# ### ### ### ######### ######### #########
296## Unconditional matching.
297
298proc ::grammar::me::tcl::iok_ok {} {
299    variable ok 1
300    return
301}
302
303proc ::grammar::me::tcl::iok_fail {} {
304    variable ok 0
305    return
306}
307
308proc ::grammar::me::tcl::iok_negate {} {
309    variable ok
310    set ok [expr {!$ok}]
311    return
312}
313
314# ### ### ### ######### ######### #########
315## Basic input handling and tracking
316
317proc ::grammar::me::tcl::icl_get {} {
318    variable loc
319    return  $loc
320}
321
322proc ::grammar::me::tcl::icl_rewind {oldloc} {
323    variable loc
324
325    if 0 {
326	if {($oldloc < -1) || ($oldloc > $loc)} {
327	    return -code error "Bad location \"$oldloc\" (vs $loc)"
328	}
329    }
330    set loc $oldloc
331    return
332}
333
334# ### ### ### ######### ######### #########
335## Error handling.
336
337proc ::grammar::me::tcl::ier_get {} {
338    variable error
339    return  $error
340}
341
342proc ::grammar::me::tcl::ier_clear {} {
343    variable error {}
344    return
345}
346
347proc ::grammar::me::tcl::ier_nonterminal {msg pos} {
348    # Inlined: Errors, Expected.
349
350    variable error
351
352    if {[llength $error]} {
353	foreach {l m} $error break
354	incr pos
355	if {$l == $pos} {
356	    set error [list $l [list $msg]]
357	}
358    }
359}
360
361proc ::grammar::me::tcl::ier_merge {new} {
362    variable error
363
364    # We have either old or new error data, keep it.
365
366    if {![llength $error]} {set error $new ; return}
367    if {![llength $new]}   {return}
368
369    # If one of the errors is further on in the input choose that as
370    # the information to propagate.
371
372    foreach {loe msgse} $error break
373    foreach {lon msgsn} $new   break
374
375    if {$lon > $loe} {set error $new ; return}
376    if {$loe > $lon} {return}
377
378    # Equal locations, merge the message lists.
379
380    foreach m $msgsn {lappend msgse $m}
381    set error [list $loe [lsort -uniq $msgse]]
382    return
383}
384
385# ### ### ### ######### ######### #########
386## Operations for the construction of the
387## abstract syntax tree (AST).
388
389proc ::grammar::me::tcl::isv_clear {} {
390    variable sv {}
391    return
392}
393
394proc ::grammar::me::tcl::isv_terminal {} {
395    variable loc
396    variable sv
397    variable as
398
399    set sv [list {} $loc $loc]
400    lappend as $sv
401    return
402}
403
404proc ::grammar::me::tcl::isv_nonterminal_leaf {nt pos} {
405    # Inlined clear, reduce, and optimized.
406    variable ok
407    variable loc
408    variable sv {}
409
410    # Clear ; if {$ok} {Reduce $nt}
411
412    if {$ok} {
413	incr pos
414	set sv [list $nt $pos $loc]
415    }
416    return
417}
418
419proc ::grammar::me::tcl::isv_nonterminal_range {nt pos} {
420    variable ok
421    variable loc
422    variable sv {}
423
424    if {$ok} {
425	# TerminalString $pos
426	# Get all characters after 'pos' to current location as terminal data.
427
428	incr pos
429	set sv [list $nt $pos $loc [list {} $pos $loc]]
430
431	#set sv [linsert $sv 0 $nt] ;#Reduce $nt
432    }
433    return
434}
435
436proc ::grammar::me::tcl::isv_nonterminal_reduce {nt pos {mrk 0}} {
437    variable ok
438    variable as
439    variable loc
440    variable sv {}
441
442    if {$ok} {
443	incr pos
444	set sv [lrange $as $mrk end]         ;#SaveToMark $mrk
445	set sv [linsert $sv 0 $nt $pos $loc] ;#Reduce $nt
446    }
447    return
448}
449
450# ### ### ### ######### ######### #########
451## AST stack handling
452
453proc ::grammar::me::tcl::ias_push {} {
454    variable as
455    variable sv
456    lappend as $sv
457    return
458}
459
460proc ::grammar::me::tcl::ias_mark {} {
461    variable as
462    return [llength $as]
463}
464
465proc ::grammar::me::tcl::ias_pop2mark {mark} {
466    variable as
467    if {[llength $as] <= $mark} return
468    incr mark -1
469    set as [lrange $as 0 $mark]
470    return
471}
472
473# ### ### ### ######### ######### #########
474## Data structures.
475
476namespace eval ::grammar::me::tcl {
477    # ### ### ### ######### ######### #########
478    ## Public State of MVM (Matching Virtual Machine)
479
480    variable ok   0  ; # Boolean: Ok/Fail of last match operation.
481
482    # ### ### ### ######### ######### #########
483    ## Internal state.
484
485    variable ct   {}  ; # Current token.
486    variable loc  0   ; # Location of 'ct' as offset in input.
487
488    variable error {} ; # Error data for last match.
489    #                 ; # == List (loc, list of strings)
490    #                 ; # or empty list
491    variable sv   {}  ; # Semantic value for last match.
492
493    # ### ### ### ######### ######### #########
494    ## Data structures for AST construction
495
496    variable as {} ; # Stack of values for AST
497
498    # ### ### ### ######### ######### #########
499    ## Memo data structures for tokens and match results.
500
501    variable tc {}
502    variable nc ; array set nc {}
503
504    # ### ### ### ######### ######### #########
505    ## Input buffer, location of next character to read.
506    ## ASSERT (loc <= cloc)
507
508    variable next   ; # Callback to get next character.
509
510    # Token ordering for range checks. Optional
511
512    variable tokOrd ; array set tokOrd {}
513    variable tokUseOrd 0
514
515    # ### ### ### ######### ######### #########
516}
517
518# ### ### ### ######### ######### #########
519## Package Management
520
521package provide grammar::me::tcl 0.1
522