1# -*- tcl -*-
2# (C) 2005-2006 Andreas Kupries <andreas_kupries@users.sourceforge.net>
3# ### ### ### ######### ######### #########
4## Package description
5
6## Implementation of ME virtual machines based on state values
7## manipulated by the commands according to the match
8## instructions. Allows for implementation in C.
9
10# ### ### ### ######### ######### #########
11## Requisites
12
13namespace eval ::grammar::me::cpu::core {}
14
15# ### ### ### ######### ######### #########
16## Implementation, API. Ensemble command.
17
18proc ::grammar::me::cpu::core {cmd args} {
19    # Dispatcher for the ensemble command.
20    variable core::cmds
21    return [uplevel 1 [linsert $args 0 $cmds($cmd)]]
22}
23
24namespace eval grammar::me::cpu::core {
25    variable cmds
26
27    # Mapping from cmd names to procedures for quick dispatch. The
28    # objects will shimmer into resolved command references.
29
30    array set cmds {
31	disasm ::grammar::me::cpu::core::disasm
32	asm    ::grammar::me::cpu::core::asm
33	new    ::grammar::me::cpu::core::new
34	lc     ::grammar::me::cpu::core::lc
35	tok    ::grammar::me::cpu::core::tok
36	pc     ::grammar::me::cpu::core::pc
37	iseof  ::grammar::me::cpu::core::iseof
38	at     ::grammar::me::cpu::core::at
39	cc     ::grammar::me::cpu::core::cc
40	sv     ::grammar::me::cpu::core::sv
41	ok     ::grammar::me::cpu::core::ok
42	error  ::grammar::me::cpu::core::error
43	lstk   ::grammar::me::cpu::core::lstk
44	astk   ::grammar::me::cpu::core::astk
45	mstk   ::grammar::me::cpu::core::mstk
46	estk   ::grammar::me::cpu::core::estk
47	rstk   ::grammar::me::cpu::core::rstk
48	nc     ::grammar::me::cpu::core::nc
49	ast    ::grammar::me::cpu::core::ast
50	halted ::grammar::me::cpu::core::halted
51	code   ::grammar::me::cpu::core::code
52	eof    ::grammar::me::cpu::core::eof
53	put    ::grammar::me::cpu::core::put
54	run    ::grammar::me::cpu::core::run
55    }
56}
57
58# ### ### ### ######### ######### #########
59## Ensemble implementation
60
61proc ::grammar::me::cpu::core::disasm {code} {
62    variable iname
63    variable tclass
64    variable anum
65
66    Validate $code ord dst jmp
67
68    set label 0
69    foreach k [array names jmp] {
70	set jmp($k) bra$label
71	incr label
72    }
73    foreach k [array names dst] {
74	if {![info exists jmp($k)]} {
75	    set jmp($k) {}
76	}
77    }
78
79    set result {}
80    foreach {asm pool tokmap} $code break
81
82    set pc    0
83    set pcend [llength $asm]
84
85    while {$pc < $pcend} {
86	set base $pc
87	set insn [lindex $asm $pc] ; incr pc
88	set an   [lindex $anum $insn]
89
90	if {$an == 1} {
91	    set a [lindex $asm $pc] ; incr pc
92	} elseif {$an == 2} {
93	    set a [lindex $asm $pc] ; incr pc
94	    set b [lindex $asm $pc] ; incr pc
95	} elseif {$an == 3} {
96	    set a [lindex $asm $pc] ; incr pc
97	    set b [lindex $asm $pc] ; incr pc
98	    set c [lindex $asm $pc] ; incr pc
99	}
100
101	set     instruction {}
102	lappend instruction $jmp($base)
103	lappend instruction $iname($insn)
104
105	switch -exact $insn {
106	    0 - 5 - 20 - 24 - 25 - 26 -
107	    a/string {
108		lappend instruction [lindex $pool $a]
109	    }
110	    1 {
111		# a/tok b/string
112		if {![llength $tokmap]} {
113		    lappend instruction [lindex $pool $a]
114		} else {
115		    lappend instruction ${a}:$ord($a)
116		}
117		lappend instruction [lindex $pool $b]
118	    }
119	    2 {
120		# a/tokstart b/tokend c/string
121		if {![llength $tokmap]} {
122		    lappend instruction [lindex $pool $a]
123		    lappend instruction [lindex $pool $b]
124		} else {
125		    # tokmap defined: a = b = order rank.
126		    lappend instruction ${a}:$ord($a)
127		    lappend instruction ${b}:$ord($b)
128		}
129		lappend instruction [lindex $pool $c]
130	    }
131	    3 {
132		# a/class(0-5) b/string
133		lappend instruction [lindex $tclass $a]
134		lappend instruction [lindex $pool $b]
135	    }
136	    4 {
137		# a/branch b/string
138		lappend instruction $jmp($a)
139		lappend instruction [lindex $pool $b]
140	    }
141	    6 - 11 - 12 - 13 -
142	    a/branch {
143		lappend instruction $jmp($a)
144	    }
145	    default {}
146	}
147
148	lappend result $instruction
149    }
150
151    return $result
152}
153
154proc ::grammar::me::cpu::core::asm {code} {
155    variable iname
156    variable anum
157    variable tccode
158
159    # code = list(insn), insn = list (label insn-name ...)
160
161    # I. Indices for the labels, based on instruction sizes.
162
163    array set jmp {}
164    set off 0
165    foreach insn $code {
166	foreach {label name} $insn break
167	# Ignore embedded comments, except for labels
168	if {$label ne ""} {
169	    set jmp($label) $off
170	}
171	if {$name eq ".C"} continue
172	if {![info exists iname($name)]} {
173	    return -code error "Bad instruction \"$insn\", unknown command \"$name\""
174	}
175	set an [lindex $anum $iname($name)]
176	if {[llength $insn] != ($an+2)} {
177	    return -code error "Bad instruction \"$insn\", expected $an argument[expr {$an == 1 ? "" : "s"}]"
178	}
179	incr off
180	incr off [lindex $anum $iname($name)]
181    }
182
183    set asm          {}
184    set pool         {}
185    array set poolh  {}
186    array set tokmap {}
187    array set ord    {}
188    set plain        0
189
190    foreach insn $code {
191	foreach {label name} $insn break
192	# Ignore embedded comments
193	if {$name eq ".C"} continue
194	set an [lindex $anum $iname($name)]
195
196	# Instruction code to assembly ...
197	lappend asm $iname($name)
198
199	# Encode arguments ...
200	switch -exact -- $name {
201	    ict_advance            -
202	    inc_save               -
203	    ier_nonterminal        -
204	    isv_nonterminal_leaf   -
205	    isv_nonterminal_range  -
206	    isv_nonterminal_reduce {
207		lappend asm [Str [lindex $insn 2]]
208	    }
209	    ict_match_token {
210		lappend asm [Tok [lindex $insn 2]]
211		lappend asm [Str [lindex $insn 3]]
212	    }
213	    ict_match_tokrange {
214		lappend asm [Tok [lindex $insn 2]]
215		lappend asm [Tok [lindex $insn 3]]
216		lappend asm [Str [lindex $insn 4]]
217	    }
218	    ict_match_tokclass {
219		set ccode [lindex $insn 2]
220		if {![info exists tccode($ccode)]} {
221		    return -code error "Bad instruction \"$insn\", unknown class code \"$ccode\""
222		}
223		lappend asm $tccode($ccode)
224		lappend asm [Str [lindex $insn 3]]
225
226	    }
227	    inc_restore {
228		set jmpto [lindex $insn 2]
229		if {![info exists jmp($jmpto)]} {
230		    return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
231		}
232		lappend asm $jmp($jmpto)
233		lappend asm [Str [lindex $insn 3]]
234	    }
235	    icf_ntcall  -
236	    icf_jalways -
237	    icf_jok     -
238	    icf_jfail   {
239		set jmpto [lindex $insn 2]
240		if {![info exists jmp($jmpto)]} {
241		    return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
242		}
243		lappend asm $jmp($jmpto)
244	    }
245	}
246    }
247
248    return [list $asm $pool [array get tokmap]]
249}
250
251proc ::grammar::me::cpu::core::new {code} {
252    # The code generating the state is drawn out to integrate a
253    # specification of how the machine state is mapped to Tcl as well.
254
255    Validate $code
256
257    set     state {}   ; # The state is representend as a Tcl list.
258    # ### ### ### ######### ######### #########
259    lappend state $code ; # [_0] code  - list  - code to run (-)
260    lappend state 0     ; # [_1] pc    - int   - Program counter
261    lappend state 0     ; # [_2] halt  - bool  - Flag, set (internal) when machine was halted (icf_halt).
262    lappend state 0     ; # [_3] eof   - bool  - Flag, set (external) when where will be no more input.
263    lappend state {}    ; # [_4] tc    - list  - Terminal cache, pending and processed tokens.
264    lappend state -1    ; # [_5] cl    - int   - Current Location
265    lappend state {}    ; # [_6] ct    - token - Current Character
266    lappend state 0     ; # [_7] ok    - bool  - Match Status
267    lappend state {}    ; # [_8] sv    - any   - Semantic Value
268    lappend state {}    ; # [_9] er    - list  - Error status (*)
269    lappend state {}    ; # [10] ls    - list  - Location Stack (x)
270    lappend state {}    ; # [11] as    - list  - Ast Stack
271    lappend state {}    ; # [12] ms    - list  - Ast Marker Stack
272    lappend state {}    ; # [13] es    - list  - Error Stack
273    lappend state {}    ; # [14] rs    - list  - Return Stack
274    lappend state {}    ; # [15] nc    - dict  - Nonterminal Cache (backtracking)
275    # ### ### ### ######### ######### #########
276
277    # tc    = list(token)
278    # token = list(str lexeme line col)
279
280
281    # (-) See manpage of this package for the representation.
282
283    # (*) 2 elements, first is error location, second is list of
284    # ... strings, the error messages. The strings are actually
285    # ... represented by references into the pool element of the code.
286
287    # (x) Regarding the various stacks maintained in the state, their
288    #     top element is always at the right end, i.e. the last
289    #     element in the list representing it.
290
291    return $state
292}
293
294proc ::grammar::me::cpu::core::ntok {state} {
295    return [llength [lindex $state 4]]
296}
297
298proc ::grammar::me::cpu::core::lc {state loc} {
299    set tc  [lindex $state 4]
300    set loc [INDEX $tc $loc "Illegal location"]
301    return [lrange [lindex $tc $loc] 2 3]
302    # result = list(line col)
303}
304
305proc ::grammar::me::cpu::core::tok {state args} {
306    if {[llength $args] > 2} {
307	return -code error {wrong # args: should be "grammar::me::cpu::core::tok state ?from ?to??"}
308    }
309    set tc [lindex $state 4]
310    if {[llength $args] == 0} {
311	return $tc
312    } elseif {[llength $args] == 1} {
313	set at [INDEX $tc [lindex $args 0] "Illegal location"]
314	return [lrange $tc $at $at]
315    } else {
316	set from [INDEX $tc [lindex $args 0] "Illegal start location"]
317	set to   [INDEX $tc [lindex $args 1] "Illegal end location"]
318	if {$from > $to} {
319	    return -code error "Illegal empty location range $from .. $to"
320	}
321	return [lrange $tc $from $to]
322    }
323    # result = list(token), token = list(str lex line col)
324}
325
326proc ::grammar::me::cpu::core::pc {state} {
327    return [lindex $state 1]
328}
329
330proc ::grammar::me::cpu::core::iseof {state} {
331    return [lindex $state 3]
332}
333
334proc ::grammar::me::cpu::core::at {state} {
335    return [lindex $state 5]
336}
337
338proc ::grammar::me::cpu::core::cc {state} {
339    return [lindex $state 6]
340}
341
342proc ::grammar::me::cpu::core::sv {state} {
343    return [lindex $state 8]
344}
345
346proc ::grammar::me::cpu::core::ok {state} {
347    return [lindex $state 7]
348}
349
350proc ::grammar::me::cpu::core::error {state} {
351    set er [lindex $state 9]
352    if {[llength $er]} {
353	foreach {l m} $er break
354
355	set pool [lindex $state 0 1] ; # state ->/0 code ->/1 pool
356	set mx   {}
357	foreach id $m {
358	    lappend mx [lindex $pool $id]
359	}
360	set er [list $l $mx]
361    }
362    return $er
363}
364
365proc ::grammar::me::cpu::core::lstk {state} {
366    return [lindex $state 10]
367}
368
369proc ::grammar::me::cpu::core::astk {state} {
370    return [lindex $state 11]
371}
372
373proc ::grammar::me::cpu::core::mstk {state} {
374    return [lindex $state 12]
375}
376
377proc ::grammar::me::cpu::core::estk {state} {
378    return [lindex $state 13]
379}
380
381proc ::grammar::me::cpu::core::rstk {state} {
382    return [lindex $state 14]
383}
384
385proc ::grammar::me::cpu::core::nc {state} {
386    return [lindex $state 15]
387}
388
389proc ::grammar::me::cpu::core::ast {state} {
390    return [lindex $state 11 end]
391}
392
393proc ::grammar::me::cpu::core::halted {state} {
394    return [lindex $state 2]
395}
396
397proc ::grammar::me::cpu::core::code {state} {
398    return [lindex $state 0]
399}
400
401proc ::grammar::me::cpu::core::eof {statevar} {
402    upvar 1 $statevar state
403    lset state 3 1
404    return
405}
406
407proc ::grammar::me::cpu::core::put {statevar tok lex line col} {
408    upvar 1 $statevar state
409    if {[lindex $state 3]} {
410	return -code error "Cannot add input data after eof"
411    }
412    set     tc [K [lindex $state 4] [lset state 4 {}]]
413    lappend tc [list $tok $lex $line $col]
414    lset state 4 $tc
415    return
416}
417
418proc ::grammar::me::cpu::core::run {statevar {steps -1}} {
419    # Execution loop. Should be instrumented for statistics about
420    # dynamic instruction frequency. I.e. which instructions are
421    # executed the most => put them at the front of the if/switch for
422    # quicker selection. I.e. frequency coding of the branches for
423    # speed.
424
425    # A C implementation can shimmer the state into a directly
426    # accessible data structure. And the asm instructions can shimmer
427    # into an integer index upon which we can switch fast.
428
429    variable anum
430    variable tclass
431    upvar 1 $statevar state
432    variable iname ; # For debug output
433
434    # Do nothing for a stopped machine (halt flag set).
435    if {[lindex $state 2]} {return $state}
436
437    # Fail if there are no instruction to execute
438    if {![llength [lindex $state 0 0]]} {
439	# No instructions to execute
440	return -code error "No instructions to execute"
441    }
442
443    # Unpack state into locally accessible variables
444    #        0    1  2    3   4  5  6  7  8  9  10 11 12 13 14 15 16 17 18  19  20
445    foreach {code pc halt eof tc cl ct ok sv er ls as ms es rs nc} $state break
446
447    # Unpack match program for easy access as well.
448    #        0   1    2
449    foreach {asm pool tokmap} $code break
450
451    if 0 {
452	puts ________________________
453	puts [join [disasm $code] \n]
454	puts ________________________
455    }
456
457    # Ensure that the unpacked information is not shared
458    unset state
459
460    # Internal flags for optimal handling of the nonterminal
461    # cache. Avoid multiple unpacking of the dictionary, and avoid
462    # repacking if it was not modified.
463
464    set ncunpacked 0
465    set ncmodified 0
466    set tmunpacked 0
467
468    while {1} {
469	# Stop execution if the specified number of instructions have
470	# been executed. Ignore if infinity was specified.
471	if {$steps == 0} break
472	if {$steps > 0} {incr steps -1}
473
474	# Get current instruction ...
475
476	if 0 {puts .$pc:\t$iname([lindex $asm $pc])}
477	if 0 {puts -nonewline .$pc:\t$iname([lindex $asm $pc])}
478
479	set insn [lindex $asm $pc] ; incr pc
480
481	# And its arguments ...
482
483	set an [lindex $anum $insn]
484	if {$an == 1} {
485	    set a [lindex $asm $pc] ; incr pc
486	    if 0 {puts \t<$a>}
487	} elseif {$an == 2} {
488	    set a [lindex $asm $pc] ; incr pc
489	    set b [lindex $asm $pc] ; incr pc
490	    if 0 {puts \t<$a|$b>}
491	} elseif {$an == 3} {
492	    set a [lindex $asm $pc] ; incr pc
493	    set b [lindex $asm $pc] ; incr pc
494	    set c [lindex $asm $pc] ; incr pc
495	    if 0 {puts \t<$a|$b|$c>}
496	} ;# else {puts ""}
497
498	# Dispatch to implementation of the instruction ...
499
500	# Separate if commands are used for easier ordering of the
501	# dispatch. The order of the branches should be frequency
502	# coded to have the most frequently used instructions first.
503
504	# ict_advance <a:message>
505	if {$insn == 0} {
506	    if 0 {puts \t\[$cl|[llength $tc]|$eof\]}
507	    incr cl
508	    if {$cl < [llength $tc]} {
509		if 0 {puts \tConsume}
510
511		set ct [lindex $tc $cl 0]
512		set ok 1
513		set er {}
514	    } elseif {$eof} {
515		if 0 {puts \tFail<Eof>}
516
517		# We have no input, and there won't be more coming in
518		# either. Fail the advance. We do _not_ stop the match
519		# loop, the program has to complete. The failure might
520		# be no such, revealed during backtracking. The current
521		# location is not rewound automatically, this is the
522		# responsibility of any backtracking.
523
524		set er  [list $cl [list $a]]
525		set ok  0
526	    } else {
527		if 0 {puts \tSuspend&Wait}
528
529		# We have no input, stop matching and wait for
530		# more. We reset the machine into a state
531		# which will restart this instruction when
532		# execution resumes.
533
534		incr cl -1
535		incr pc -2 ; # code and message argument
536		break
537	    }
538	    if 0 {puts .Next}
539	    continue
540	}
541
542	# ict_match_token <a:token> <b:message>
543	if {$insn == 1} {
544	    if {[llength $tokmap]} {
545		if {!$tmunpacked} {
546		    array set tm $tokmap
547		    set tmunpacked 1
548		}
549		set ok [expr {$a == $tm($ct)}]
550	    } else {
551		set xch [lindex $pool $a]
552		set ok  [expr {$xch eq $ct}]
553	    }
554	    if {!$ok} {
555		set er [list $cl [list $b]]
556	    } else {
557		set er {}
558	    }
559	    continue
560	}
561
562	# ict_match_tokrange <a:tokstart> <b:tokend> <c:message>
563	if {$insn == 2} {
564	    if {[llength $tokmap]} {
565		if {!$tmunpacked} {
566		    array set tm $tokmap
567		    set tmunpacked 1
568		}
569		set x $tm($ct)
570		set ok [expr {($a <= $x) && ($x <= $b)}]
571	    } else {
572		set a [lindex $pool $a]
573		set b [lindex $pool $b]
574		set ok [expr {
575		    ([string compare $a $ct] <= 0) &&
576		    ([string compare $ct $b] <= 0)
577		}] ; # {}
578	    }
579	    if {!$ok} {
580		set er [list $cl [list $c]]
581	    } else {
582		set er {}
583	    }
584	    continue
585	}
586
587	# ict_match_tokclass <a:code> <b:message>
588	if {$insn == 3} {
589	    set strcode [lindex $tclass $a]
590	    set ok   [string is $strcode -strict $ct]
591	    if {!$ok} {
592		set er [list $cl [list $b]]
593	    } else {
594		set er {}
595	    }
596	    continue
597	}
598
599	# inc_restore <a:branchtarget> <b:nonterminal>
600	if {$insn == 4} {
601	    set sym [lindex $pool $b]
602
603	    # Unpack the cache dict, only here.
604	    # 8.5 - Use dict operations instead.
605
606	    if {!$ncunpacked} {
607		array set ncc $nc
608		set ncunpacked 1
609	    }
610
611	    if {[info exists ncc($cl,$sym)]} {
612		foreach {go ok error sv} $ncc($cl,$sym) break
613
614		# Go forward, as the nonterminal matches (or not).
615		set cl $go
616		set pc $a
617	    }
618	    continue
619	}
620
621	# inc_save <a:nonterminal>
622	if {$insn == 5} {
623	    set sym [lindex $pool $a]
624	    set at  [lindex $ls end]
625	    set ls  [lrange $ls 0 end-1]
626
627	    # Unpack, modify, only here.
628	    # 8.5 - Use dict operations instead.
629
630	    if {!$ncunpacked} {
631		array set ncc $nc
632		set ncunpacked 1
633	    }
634
635	    set ncc($at,$sym) [list $cl $ok $er $sv]
636	    set ncmodified 1
637	    continue
638	}
639
640	# icf_ntcall <a:branchtarget>
641	if {$insn == 6} {
642	    lappend rs $pc
643	    set     pc $a
644	    continue
645	}
646
647	# icf_ntreturn
648	if {$insn == 7} {
649	    set pc [lindex $rs end]
650	    set rs [lrange $rs 0 end-1]
651	    continue
652	}
653
654	# iok_ok
655	if {$insn == 8} {
656	    set ok 1
657	    continue
658	}
659
660	# iok_fail
661	if {$insn == 9} {
662	    set ok 0
663	    continue
664	}
665
666	# iok_negate
667	if {$insn == 10} {
668	    set ok [expr {!$ok}]
669	    continue
670	}
671
672	# icf_jalways <a:branchtarget>
673	if {$insn == 11} {
674	    set pc $a
675	    continue
676	}
677
678	# icf_jok <a:branchtarget>
679	if {$insn == 12} {
680	    if {$ok} {set pc $a}
681	    # !ok => pc is already on next instruction.
682	    continue
683	}
684
685	# icf_jfail <a:branchtarget>
686	if {$insn == 13} {
687	    if {!$ok} {set pc $a}
688	    # ok => pc is already on next instruction.
689	    continue
690	}
691
692	# icf_halt
693	if {$insn == 14} {
694	    set halt 1
695	    break
696	}
697
698	# icl_push
699	if {$insn == 15} {
700	    lappend ls $cl
701	    continue
702	}
703
704	# icl_rewind
705	if {$insn == 16} {
706	    set cl [lindex $ls end]
707	    set ls [lrange $ls 0 end-1]
708	    continue
709	}
710
711	# icl_pop
712	if {$insn == 17} {
713	    set ls [lrange $ls 0 end-1]
714	    continue
715	}
716
717	# ier_push
718	if {$insn == 18} {
719	    lappend es $er
720	    continue
721	}
722
723	# ier_clear
724	if {$insn == 19} {
725	    set er {}
726	    continue
727	}
728
729	# ier_nonterminal <a:nonterminal>
730	if {$insn == 20} {
731	    if {[llength $er]} {
732		set  pos [lindex $ls end]
733		incr pos
734		set eloc [lindex $er 0]
735		if {$eloc == $pos} {
736		    set er [list $eloc [list $a]]
737		}
738	    }
739	    continue
740	}
741
742	# ier_merge
743	if {$insn == 21} {
744	    set old [lindex $es end]
745	    set es  [lrange $es 0 end-1]
746
747	    # We have either old or current error data, keep it.
748
749	    if {![llength $er]} {
750		# No current data, keep old
751		set er $old
752	    } elseif {[llength $old]} {
753		# If one of the errors is further on in the input
754		# choose that as the information to propagate.
755
756		foreach {loe msgse} $er  break
757		foreach {lon msgsn} $old break
758
759		if {$lon > $loe} {
760		    set er $old
761		} elseif {$loe == $lon} {
762		    # Equal locations, merge the message lists.
763
764		    foreach m $msgsn {lappend msgse $m}
765		    set er [list $loe [lsort -uniq $msgse]]
766		}
767		# else lon < loe - er is better - nothing
768	    }
769	    # else - !old, but er - nothing
770
771	    continue
772	}
773
774	# isv_clear
775	if {$insn == 22} {
776	    set sv {}
777	    continue
778	}
779
780	# isv_terminal (implied ias_push)
781	if {$insn == 23} {
782	    set sv [list {} $cl $cl]
783	    lappend as $sv
784	    continue
785	}
786
787	# isv_nonterminal_leaf <a:nonterminal>
788	if {$insn == 24} {
789	    set pos [lindex $ls end]
790	    set sv  [list $a $pos $cl]
791	    continue
792	}
793
794	# isv_nonterminal_range <a:nonterminal>
795	if {$insn == 25} {
796	    set pos [lindex $ls end]
797	    set sv  [list $a $pos $cl [list {} $pos $cl]]
798	    continue
799	}
800
801	# isv_nonterminal_reduce <a:nonterminal>
802	if {$insn == 26} {
803	    set pos [lindex $ls end]
804	    if {[llength $ms]} {
805		set  mrk [lindex $ms end]
806		incr mrk
807	    } else {
808		set mrk 0
809	    }
810	    set sv [lrange $as $mrk end]
811	    set sv [linsert $sv 0 $a $pos $cl]
812	    continue
813	}
814
815	# ias_push
816	if {$insn == 27} {
817	    lappend as $sv
818	    continue
819	}
820
821	# ias_mark
822	if {$insn == 28} {
823	    set  mark [llength $as]
824	    incr mark -1
825	    lappend ms $mark
826	    continue
827	}
828
829	# ias_mrewind
830	if {$insn == 29} {
831	    set mark [lindex $ms end]
832	    set ms   [lrange $ms 0 end-1]
833	    set as   [lrange $as 0 $mark]
834	    continue
835	}
836
837	# ias_mpop
838	if {$insn == 30} {
839	    set ms [lrange $ms 0 end-1]
840	    continue
841	}
842
843	return -code error "Illegal instruction $insn"
844    }
845
846    # Repack a modified cache dictionary, then repack and store the
847    # updated state value.
848
849    if 0 {puts .Repackage\ state}
850
851    if {$ncmodified} {set nc [array get ncc]}
852    set state [list $code $pc $halt $eof $tc $cl $ct $ok $sv $er $ls $as $ms $es $rs $nc]
853    return
854}
855
856namespace eval grammar::me::cpu::core {
857    # Map between class codes and names
858    variable tclass {}
859    variable tccode
860
861    foreach {x code} {
862	0 alnum
863	1 alpha
864	2 digit
865	3 xdigit
866	4 punct
867	5 space
868    } {
869	lappend tclass $code
870	set tccode($code) $x
871    }
872
873    # Number of arguments per ME instruction.
874    # Indexed by instruction code.
875    variable anum {}
876
877    # Mapping between instruction codes and names.
878    variable iname
879
880    foreach {z insn x notes} {
881	0  ict_advance            1	{-- TESTED}
882	1  ict_match_token        2	{-- TESTED}
883	2  ict_match_tokrange     3	{-- TESTED}
884	3  ict_match_tokclass     2	{-- TESTED}
885	4  inc_restore            2	{-- TESTED}
886	5  inc_save               1	{-- TESTED}
887	6  icf_ntcall             1	{-- TESTED}
888	7  icf_ntreturn           0	{-- TESTED}
889	8  iok_ok                 0	{-- TESTED}
890	9  iok_fail               0	{-- TESTED}
891	10 iok_negate             0	{-- TESTED}
892	11 icf_jalways            1	{-- TESTED}
893	12 icf_jok                1	{-- TESTED}
894	13 icf_jfail              1	{-- TESTED}
895	14 icf_halt               0	{-- TESTED}
896	15 icl_push               0	{-- TESTED}
897	16 icl_rewind             0	{-- TESTED}
898	17 icl_pop                0	{-- TESTED}
899	18 ier_push               0	{-- TESTED}
900	19 ier_clear              0	{-- TESTED}
901	20 ier_nonterminal        1	{-- TESTED}
902	21 ier_merge              0	{-- TESTED}
903	22 isv_clear              0	{-- TESTED}
904	23 isv_terminal           0	{-- TESTED}
905	24 isv_nonterminal_leaf   1	{-- TESTED}
906	25 isv_nonterminal_range  1	{-- TESTED}
907	26 isv_nonterminal_reduce 1	{-- TESTED}
908	27 ias_push               0	{-- TESTED}
909	28 ias_mark               0	{-- TESTED}
910	29 ias_mrewind            0	{-- TESTED}
911	30 ias_mpop               0	{-- TESTED}
912    } {
913	lappend anum $x
914	set iname($z) $insn
915	set iname($insn) $z
916    }
917}
918
919# ### ### ### ######### ######### #########
920## Helper commands ((Dis)Assembler, runtime).
921
922proc ::grammar::me::cpu::core::INDEX {list i label} {
923    if {$i eq "end"} {
924	set i [expr {[llength $list] - 1}]
925    } elseif {[regexp {^end-([0-9]+)$} $i -> n]} {
926	set i [expr {[llength $list] - $n -1}]
927    }
928    if {
929	![string is integer -strict $i] ||
930	($i < 0) ||
931	($i >= [llength $list])
932    } {
933	return -code error "$label $i"
934    }
935    return $i
936}
937
938proc ::grammar::me::cpu::core::K {x y} {set x}
939
940proc ::grammar::me::cpu::core::Str {str} {
941    upvar 1 pool pool poolh poolh
942    if {![info exists poolh($str)]} {
943	set poolh($str) [llength $pool]
944	lappend pool $str
945    }
946    return $poolh($str)
947}
948
949proc ::grammar::me::cpu::core::Tok {str} {
950    upvar 1 tokmap tokmap ord ord plain plain
951
952    if {[regexp {^([^:]+):(.+)$} $str -> id name]} {
953	if {$plain} {
954	    return -code error "Bad assembly, mixing plain and ranked tokens"
955	}
956	if {[info exists ord($id)]} {
957	    return -code error "Bad assembly, non-total ordering for $name and $ord($id), at rank $id"
958	}
959	set ord($id) $name
960	set tokmap($name) $id
961
962	return $id
963    } else {
964	if {[array size ord]} {
965	    return -code error "Bad assembly, mixing plain and ranked tokens"
966	}
967	set plain 1
968	return [uplevel 1 [list Str $str]]
969    }
970}
971
972proc ::grammar::me::cpu::core::Validate {code {ovar {}} {tvar {}} {jvar {}}} {
973    variable anum
974    variable iname
975
976    # Basic validation of structure ...
977
978    if {[llength $code] != 3} {
979	return -code error "Bad length"
980    }
981
982    foreach {asm pool tokmap} $code break
983
984    if {[llength $tokmap] % 2 == 1} {
985	return -code error "Bad tokmap, expected a dictionary"
986    }
987
988    array set ord {}
989    if {[llength $tokmap] > 0} {
990	foreach {tok rank} $tokmap {
991	    if {[info exists ord($rank)]} {
992		return -code error "Bad tokmap, non-total ordering for $tok and $ord($rank), at rank $rank"
993	    }
994	    set ord($rank) $tok
995	}
996    }
997
998    # Basic validation of ME code: Valid instructions, collect valid
999    # branch target indices
1000
1001    array set target {}
1002
1003    set pc 0
1004    set pcend   [llength $asm]
1005    set poolend [llength $pool]
1006
1007    while {$pc < $pcend} {
1008	set target($pc) .
1009
1010	set insn [lindex $asm $pc]
1011	if {($insn < 0) || ($insn > 30)} {
1012	    return -code error "Invalid instruction $insn at PC $pc"
1013	}
1014
1015	incr pc
1016	incr pc [lindex $anum $insn]
1017    }
1018
1019    if {$pc > $pcend} {
1020	return -code error "Bad program, last instruction $insn ($iname($insn)) is truncated"
1021    }
1022
1023    # Validation of ME instruction arguments (pool references, branch
1024    # targets, ...)
1025
1026    if {$jvar ne ""} {
1027	upvar 1 $jvar jmp
1028    }
1029    array set jmp {}
1030
1031    set pc 0
1032    while {$pc < $pcend} {
1033	set base $pc
1034	set insn [lindex $asm $pc] ; incr pc
1035	set an   [lindex $anum $insn]
1036
1037	if {$an == 1} {
1038	    set a [lindex $asm $pc] ; incr pc
1039	} elseif {$an == 2} {
1040	    set a [lindex $asm $pc] ; incr pc
1041	    set b [lindex $asm $pc] ; incr pc
1042	} elseif {$an == 3} {
1043	    set a [lindex $asm $pc] ; incr pc
1044	    set b [lindex $asm $pc] ; incr pc
1045	    set c [lindex $asm $pc] ; incr pc
1046	}
1047
1048	switch -exact $insn {
1049	    0 - 5 - 20 - 24 - 25 - 26 -
1050	    a/string {
1051		if {($a < 0) || ($a >= $poolend)} {
1052		    return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
1053		}
1054	    }
1055	    1 {
1056		# a/tok b/string
1057		if {![llength $tokmap]} {
1058		    if {($a < 0) || ($a >= $poolend)} {
1059			return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
1060		    }
1061		} else {
1062		    if {![info exists ord($a)]} {
1063			return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
1064		    }
1065		}
1066		if {($b < 0) || ($b >= $poolend)} {
1067		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
1068		}
1069	    }
1070	    2 {
1071		# a/tokstart b/tokend c/string
1072
1073		if {![llength $tokmap]} {
1074		    # a = b = string references.
1075		    if {($a < 0) || ($a >= $poolend)} {
1076			return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
1077		    }
1078		    if {($b < 0) || ($b >= $poolend)} {
1079			return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
1080		    }
1081		    if {$a == $b} {
1082			return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
1083		    }
1084		    if {[string compare [lindex $pool $a] [lindex $pool $b]] > 0} {
1085			return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
1086		    }
1087		} else {
1088		    # tokmap defined: a = b = order rank.
1089		    if {![info exists ord($a)]} {
1090			return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
1091		    }
1092		    if {![info exists ord($b)]} {
1093			return -code error "Invalid token rank $b for instruction $insn ($iname($insn)) at $base"
1094		    }
1095		    if {$a == $b} {
1096			return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
1097		    }
1098		    if {$a > $b} {
1099			return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
1100		    }
1101		}
1102		if {($c < 0) || ($c >= $poolend)} {
1103		    return -code error "Invalid string reference $c for instruction $insn ($iname($insn)) at $base"
1104		}
1105	    }
1106	    3 {
1107		# a/class(0-5) b/string
1108		if {($a < 0) || ($a > 5)} {
1109		    return -code error "Invalid token-class $a for instruction $insn ($iname($insn)) at $base"
1110		}
1111		if {($b < 0) || ($b >= $poolend)} {
1112		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
1113		}
1114	    }
1115	    4 {
1116		# a/branch b/string
1117		if {![info exists target($a)]} {
1118		    return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
1119		} else {
1120		    set jmp($a) .
1121		}
1122		if {($b < 0) || ($b >= $poolend)} {
1123		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
1124		}
1125	    }
1126	    6 - 11 - 12 - 13 -
1127	    a/branch {
1128		if {![info exists target($a)]} {
1129		    return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
1130		} else {
1131		    set jmp($base) $a
1132		}
1133	    }
1134	    default {}
1135	}
1136    }
1137
1138    # All checks passed, code is deemed good enough.
1139    # Caller may have asked for some of the collected
1140    # information.
1141
1142    if {$ovar ne ""} {
1143	upvar 1 $ovar o
1144	array set o [array get ord]
1145    }
1146    if {$tvar ne ""} {
1147	upvar 1 $tvar t
1148	array set t [array get target]
1149    }
1150    return
1151}
1152
1153# ### ### ### ######### ######### #########
1154## Ready
1155
1156package provide grammar::me::cpu::core 0.2
1157