1# -*- tcl -*-
2# (c) 2004-2009 Andreas Kupries
3# Grammar / Finite Automatons / Container
4
5# ### ### ### ######### ######### #########
6## Package description
7
8## A class whose instances hold all the information describing a
9## single finite automaton (states, symbols, start state, set of
10## accepting states, transition function), and operations to define,
11## manipulate, and query this information.
12
13# ### ### ### ######### ######### #########
14## Requisites
15
16package require grammar::fa::op ; # Heavy FA operations.
17package require snit 1.3        ; # OO system in use (Using hierarchical methods)
18package require struct::list    ; # Extended list operations.
19package require struct::set     ; # Extended set operations.
20
21# ### ### ### ######### ######### #########
22## Implementation
23
24snit::type ::grammar::fa {
25    # ### ### ### ######### ######### #########
26    ## Type API. A number of operations on FAs
27
28    # ### ### ### ######### ######### #########
29    ## Instance API
30
31    #constructor {args} {}
32    #destructor  {}
33
34    method =   {b} {}
35    method --> {b} {}
36
37    method serialize {} {}
38    method deserialize {value} {}
39    method deserialize_merge {value} {}
40
41    method states {} {}
42    #method state {cmd s args} {}
43
44    method startstates {} {}
45    method start?      {s} {}
46    method start?set   {states} {}
47    #method start       {cmd args} {}
48
49    method finalstates {} {}
50    method final?      {s} {}
51    method final?set   {states} {}
52    #method final       {cmd args} {}
53
54    method symbols     {} {}
55    method symbols@    {state} {}
56    method symbols@set {states} {}
57    #method symbol      {cmd sym} {}
58
59    method next  {s sym args} {}
60    method !next {s sym args} {}
61    method nextset {states sym} {}
62
63    method is {cmd} {}
64
65    method reachable_states   {} {}
66    method unreachable_states {} {}
67    method reachable          {s} {}
68
69    method useful_states   {} {}
70    method unuseful_states {} {}
71    method useful          {s} {}
72
73    method epsilon_closure {s} {}
74
75    method clear {} {}
76
77    # ### ### ### ######### ######### #########
78    ## Instance API. Complex FA operations.
79    ## The heavy lifting is done by the operations package.
80
81    method reverse    {}                          {op::reverse    $self}
82    method complete   {{sink {}}}                 {op::complete   $self $sink}
83    method remove_eps {}                          {op::remove_eps $self}
84    method trim       {{what !reachable|!useful}} {op::trim       $self $what}
85    method complement {}                          {op::complement $self}
86    method kleene     {}                          {op::kleene     $self}
87    method optional   {}                          {op::optional   $self}
88    method fromRegex  {regex {over {}}}           {op::fromRegex  $self $regex $over}
89
90    method determinize {{mapvar {}}} {
91	if {$mapvar ne ""} {upvar 1 $mapvar map}
92	op::determinize $self map
93    }
94
95    method minimize {{mapvar {}}} {
96	if {$mapvar ne ""} {upvar 1 $mapvar map}
97	op::minimize $self map
98    }
99
100    method union {fa {mapvar {}}} {
101	if {$mapvar ne ""} {upvar 1 $mapvar map}
102	op::union $self $fa map
103    }
104
105    method intersect {fa {mapvar {}}} {
106	if {$mapvar ne ""} {upvar 1 $mapvar map}
107	op::intersect $self $fa map
108    }
109
110    method difference {fa {mapvar {}}} {
111	if {$mapvar ne ""} {upvar 1 $mapvar map}
112	op::difference $self $fa map
113    }
114
115    method concatenate {fa {mapvar {}}} {
116	if {$mapvar ne ""} {upvar 1 $mapvar map}
117	op::concatenate $self $fa map
118    }
119
120    # ### ### ### ######### ######### #########
121    ## Internal data structures.
122
123    ## State information:
124    ## - Order    : Defined for all states, values provide creation order.
125    ## - Start    : Defined for states which are "start" (Input processing begins in).
126    ## - Final    : Defined for states which are "final" ("accept" input).
127    ## - Transinv : Inverse transitions. Per state the set of (state,sym)'s
128    ##              which have transitions into the state. Defined only for
129    ##              states which have inbound transitions.
130    ##
131    ## Transinv is maintained to make state deletion easier: Direct
132    ## access to the states and transitions which are inbound, for
133    ## their deletion.
134
135    variable order        ; # Map : State -> Order of creation
136    variable final        ; # Map : State -> .   Exists <=> Is a final State
137    variable start        ; # Map : State -> .   Exists <=> Is a start State
138    variable transinv     ; # Map : State -> {(State, Sym)}
139
140    ## Global information:
141    ## - Scount     : Counter for creation order of states.
142
143    variable scount     0  ; # Counter for orderering states.
144
145    ## Symbol information:
146    ## - Symbol : Defined for all symbols, values irrelevant.
147
148    variable symbol       ; # Map : Symbol -> . Exists = Symbol declared.
149
150    ## Transition data:
151    ## - TransN  : Dynamically created instance variables. Transition tables
152    ##             for single states. Defined only for states which have
153    ##             transitions.
154    ## - Transym : List of states having transitions on that symbol.
155
156    ## Transym is maintained for symbol deletion. Direct access to the transitions
157    ## we have to delete as well.
158
159    ## selfns::trans_$order(state) : Per state map : symbol -> list of destinations.
160    variable transym      ; # Map : Sym -> {State}
161
162    ## Derived information:
163    ## - Reach       : Cache for set of states reachable from start.
164    ## - Reachvalid  : Boolean flag. True iff the reach cache contains valid data
165    ## - Useful      : Cache for set of states able to reach final.
166    ## - Usefulvalid : Boolean flag. True iff the useful cache contains valid data
167    ## - Nondete     : Set of states which are non-deterministic, because they have
168    #                  epsilon-transitions.
169    # -  EC          : Cache of epsilon-closures
170
171    variable reach      {} ; # Set of states reachable from 'start'.
172    variable reachvalid 0  ; # Boolean flag, if 'reach' is valid.
173
174    variable useful      {} ; # Set of states able to reach 'final'.
175    variable usefulvalid 0  ; # Boolean flag, if 'useful' is valid.
176
177    variable nondete    {} ; # Set of non-deterministic states, by epsilon/non-epsilon.
178    variable nondets       ; # Per non-det state the set of symbols it is non-det in.
179
180    variable ec            ; # Cache of epsilon-closures for states.
181
182
183    # ### ### ### ######### ######### #########
184    ## Instance API Implementation.
185
186    constructor {args} {
187	set alen [llength $args]
188	if {($alen != 2) && ($alen != 0) && ($alen != 3)} {
189	    return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
190	}
191
192	array set order    {} ; set nondete     {}
193	array set start    {} ; set scount      0
194	array set final    {} ; set reach       {}
195	array set symbol   {} ; set reachvalid  0
196	array set transym  {} ; set useful      {}
197	array set transinv {} ; set usefulvalid 0
198	array set nondets  {}
199	array set ec       {}
200
201	if {$alen == 0} return
202
203	foreach {cmd object} $args break
204	switch -exact -- $cmd {
205	    = - := - <-- - as {
206		if {$alen != 2} {
207		    return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
208		}
209		$self = $object
210	    }
211	    deserialize {
212		if {$alen != 2} {
213		    return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
214		}
215		# Object is actually a value, the deserialization to use.
216		$self deserialize $object
217	    }
218	    fromRegex {
219		# Object is actually a value, the regular expression to use.
220		if {$alen == 2} {
221		    $self fromRegex $object
222		} else {
223		    $self fromRegex $object [lindex $args 2]
224		}
225	    }
226	    default {
227		return -code error "bad assignment: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
228	    }
229	}
230	return
231    }
232
233    # destructor {}
234
235    # --- --- --- --------- --------- ---------
236
237    method = {b} {
238	$self deserialize [$b serialize]
239    }
240
241    method --> {b} {
242	$b deserialize [$self serialize]
243    }
244
245    # --- --- --- --------- --------- ---------
246
247    method serialize {} {
248	set ord {}
249	foreach {s n} [array get order] {
250	    lappend ord [list $s $n]
251	}
252	set states {} ; # Dictionary
253	foreach item [lsort -index 1 -integer -increasing $ord] {
254	    set s [lindex $item 0]
255	    set sdata {}
256
257	    # Dict data per state :
258
259	    lappend sdata [info exists start($s)]
260	    lappend sdata [info exists final($s)]
261
262	    # Transitions from the state.
263
264	    upvar #0 ${selfns}::trans_$order($s) jump
265
266	    if {![info exists jump]} {
267		lappend sdata {}
268	    } else {
269		lappend sdata [array get jump]
270	    }
271
272	    # ----------------------
273	    lappend states $s $sdata
274	}
275
276	return [::list \
277		grammar::fa \
278		[array names symbol] \
279		$states \
280		]
281    }
282
283    method deserialize {value} {
284	$self CheckSerialization $value st states acc tr newsymbols
285	$self clear
286
287	foreach s   $states     {set order($s)    [incr scount]}
288	foreach sym $newsymbols {set symbol($sym) .}
289	foreach s   $acc        {set final($s)    .}
290	foreach s   $st         {set start($s)    .}
291
292	foreach {sa sym se} $tr {$self Next $sa $sym $se}
293	return
294    }
295
296    method deserialize_merge {value} {
297	$self CheckSerialization $value st states acc tr newsymbols
298
299	foreach s   $states     {set order($s)    [incr scount]}
300	foreach sym $newsymbols {set symbol($sym) .}
301	foreach s   $acc        {set final($s)    .}
302	foreach s   $st         {set start($s)    .}
303
304	foreach {sa sym se} $tr {$self Next $sa $sym $se}
305	return
306    }
307
308    # --- --- --- --------- --------- ---------
309
310    method states {} {
311	return [array names order]
312    }
313
314    method {state add} {s args} {
315	set args [linsert $args 0 $s]
316	foreach s $args {
317	    if {[info exists order($s)]} {
318		return -code error "State \"$s\" is already known"
319	    }
320	}
321	foreach s $args {set order($s) [incr scount]}
322	return
323    }
324
325    method {state delete} {s args} {
326	set args [linsert $args 0 $s]
327	$self StateCheckSet $args
328
329	foreach s $args {
330	    unset -nocomplain start($s)                   ; # Start/Initial indicator
331	    unset -nocomplain final($s)                   ; # Final/Accept indicator
332
333	    # Remove all inbound transitions.
334	    if {[info exists transinv($s)]} {
335		set src $transinv($s)
336		unset    transinv($s)
337
338		foreach srcitem $src {
339		    struct::list assign $srcitem sin sym
340		    $self !Next $sin $sym $s
341		}
342	    }
343
344	    # We remove transition data only after the inbound
345	    # ones. Otherwise we screw up the removal of
346	    # looping transitions. We have to consider the
347	    # backpointers to us in transinv as well.
348
349	    upvar #0  ${selfns}::trans_$order($s) jump
350	    if {[info exists jump]} {
351		foreach sym [array names jump] {
352		    $self !Transym $s $sym
353		    foreach nexts $jump($sym) {
354			$self !Transinv $s $sym $nexts
355		    }
356		}
357
358		unset ${selfns}::trans_$order($s) ; # Transitions from s
359	    }
360	    unset order($s)                               ; # State ordering
361
362	    # Removal of a state may break the automaton into
363	    # disconnected pieces. This means that the set of
364	    # reachable and useful states may change, and the
365	    # cache cannot be used from now on.
366
367	    $self InvalidateReach
368	    $self InvalidateUseful
369	}
370	return
371    }
372
373    method {state rename} {s snew} {
374	$self StateCheck $s
375	if {[info exists order($snew)]} {
376	    return -code error "State \"$snew\" is already known"
377	}
378
379	set o $order($s)
380	unset order($s)                               ; # State ordering
381	set   order($snew) $o
382
383	# Start/Initial indicator
384	if {[info exists start($s)]} {
385	    set   start($snew) $start($s)
386	    unset start($s)
387	}
388	# Final/Accept indicator
389	if {[info exists final($s)]} {
390	    set   final($snew) $final($s)
391	    unset final($s)
392	}
393	# Update all inbound transitions.
394	if {[info exists transinv($s)]} {
395	    set   transinv($snew) $transinv($s)
396	    unset transinv($s)
397
398	    # We have to perform a bit more here. We have to
399	    # go through the inbound transitions and change the
400	    # listed destination state to the new name.
401
402	    foreach srcitem $transinv($snew) {
403		struct::list assign $srcitem sin sym
404		# For loops access the 'order' array under the
405		# new name, the old entry is already gone. See
406		# above. See bug SF 2595296.
407		if {$sin eq $s} {
408		    set sin $snew
409		}
410		upvar #0 ${selfns}::trans_$order($sin) jump
411		upvar 0 jump($sym) destinations
412		set pos [lsearch -exact $destinations $s]
413		set destinations [lreplace $destinations $pos $pos $snew]
414	    }
415	}
416
417	# Another place to change are the back pointers from
418	# all the states we have transitions to, i.e. transinv
419	# for all outbound states.
420
421	upvar #0 ${selfns}::trans_$o jump
422	if {[info exists jump]} {
423	    foreach sym [array names jump] {
424		foreach sout $jump($sym) {
425		    upvar 0 transinv($sout) backpointer
426		    set pos [lsearch -exact $backpointer [list $s $sym]]
427		    set backpointer [lreplace $backpointer $pos $pos [list $snew $sym]]
428		}
429
430		# And also to update: Transym information for the symbol.
431		upvar 0 transym($sym) users
432		set pos [lsearch -exact $users $s]
433		set users [lreplace $users $pos $pos $snew]
434	    }
435	}
436
437	# Changing the name of a state does not change the
438	# reachables / useful states per se. We just may have
439	# to replace the name in the caches as well.
440
441	# - Invalidation will do the same, at the expense of a
442	# - larger computation later.
443
444	$self InvalidateReach
445	$self InvalidateUseful
446	return
447    }
448
449    method {state exists} {s} {
450	return [info exists order($s)]
451    }
452
453    # --- --- --- --------- --------- ---------
454
455    method startstates {} {
456	return [array names start]
457    }
458
459    method start? {s} {
460	$self StateCheck $s
461	return [info exists start($s)]
462    }
463
464    method start?set {states} {
465	$self StateCheckSet $states
466	foreach s $states {
467	    if {[info exists start($s)]} {return 1}
468	}
469	return 0
470    }
471
472    # Note: Adding or removing start states does not change
473    # usefulness, only reachability
474
475    method {start add} {state args} {
476	set args [linsert $args 0 $state]
477	$self StateCheckSet $args
478	foreach s $args {set start($s) .}
479	$self InvalidateReach
480	return
481    }
482
483    method {start set} {states} {
484	$self StateCheckSet $states
485	array unset start
486	foreach s $states {set start($s) .}
487	$self InvalidateReach
488	return
489    }
490
491    method {start remove} {state args} {
492	set args [linsert $args 0 $state]
493	$self StateCheckSet $args
494	foreach s $args {
495	    unset -nocomplain start($s)
496	}
497	$self InvalidateReach
498	return
499    }
500
501    method {start clear} {} {
502	array unset start
503	$self InvalidateReach
504	return
505    }
506
507    # --- --- --- --------- --------- ---------
508
509    method finalstates {} {
510	return [array names final]
511    }
512
513    method final? {s} {
514	$self StateCheck $s
515	return [info exists final($s)]
516    }
517
518    method final?set {states} {
519	$self StateCheckSet $states
520	foreach s $states {
521	    if {[info exists final($s)]} {return 1}
522	}
523	return 0
524    }
525
526    # Note: Adding or removing final states does not change
527    # reachability, only usefulness
528
529    method {final add} {state args} {
530	set args [linsert $args 0 $state]
531	$self StateCheckSet $args
532	foreach s $args {set final($s) .}
533	$self InvalidateUseful
534	return
535    }
536
537    method {final set} {states} {
538	$self StateCheckSet $states
539	array unset final
540	foreach s $states {set final($s) .}
541	$self InvalidateReach
542	return
543    }
544
545    method {final remove} {state args} {
546	set args [linsert $args 0 $state]
547	$self StateCheckSet $args
548	foreach s $args {
549	    unset -nocomplain final($s)
550	}
551	$self InvalidateUseful
552	return
553    }
554
555    method {final clear} {} {
556	array unset final
557	$self InvalidateReach
558	return
559    }
560
561    # --- --- --- --------- --------- ---------
562
563    method symbols {} {
564	return [array names symbol]
565    }
566
567    method symbols@ {s {t {}}} {
568	$self StateCheck $s
569	if {$t ne ""} {	$self StateCheck $t}
570	upvar #0 ${selfns}::trans_$order($s) jump
571	if {![info exists jump]} {return {}}
572	if {$t eq ""} {
573	    # No destination, all symbols.
574	    return [array names jump]
575	}
576	# Specific destination, locate the symbols going there.
577	set result {}
578	foreach sym [array names jump] {
579	    if {[lsearch -exact $jump($sym) $t] < 0} continue
580	    lappend result $sym
581	}
582	return [lsort -uniq $result]
583    }
584
585    method symbols@set {states} {
586	# Union (fa symbol@ s, f.a. s in states)
587
588	$self StateCheckSet $states
589	set result {}
590	foreach s $states {
591	    upvar #0 ${selfns}::trans_$order($s) jump
592	    if {![info exists jump]} continue
593	    foreach sym [array names jump] {
594		lappend result $sym
595	    }
596	}
597	return [lsort -uniq $result]
598    }
599
600    method {symbol add} {sym args} {
601	set args [linsert $args 0 $sym]
602	foreach sym $args {
603	    if {$sym eq ""} {
604		return -code error "Cannot add illegal empty symbol \"\""
605	    }
606	    if {[info exists symbol($sym)]} {
607		return -code error "Symbol \"$sym\" is already known"
608	    }
609	}
610	foreach sym $args {set symbol($sym) .}
611	return
612    }
613
614    method {symbol delete} {sym args} {
615	set args [linsert $args 0 $sym]
616	$self SymbolCheckSetNE $args
617	foreach sym $args {
618	    unset symbol($sym)
619
620	    # Delete all transitions using the removed symbol.
621
622	    if {[info exists transym($sym)]} {
623		foreach s $transym($sym) {
624		    $self !Next $s $sym
625		}
626	    }
627	}
628	return
629    }
630
631    method {symbol rename} {sym newsym} {
632	$self SymbolCheckNE $sym
633	if {$newsym eq ""} {
634	    return -code error "Cannot add illegal empty symbol \"\""
635	}
636	if {[info exists symbol($newsym)]} {
637	    return -code error "Symbol \"$newsym\" is already known"
638	}
639
640	unset symbol($sym)
641	set symbol($newsym) .
642
643	if {[info exists transym($sym)]} {
644	    set   transym($newsym) [set states $transym($sym)]
645	    unset transym($sym)
646
647	    foreach s $states {
648		# Update the jump tables for each of the states
649		# using this symbol, and the reverse tables as
650		# well.
651
652		upvar #0 ${selfns}::trans_$order($s) jump
653		set   jump($newsym) [set destinations $jump($sym)]
654		unset jump($sym)
655
656		foreach sd $destinations {
657		    upvar 0 transinv($sd) backpointer
658		    set pos [lsearch -exact $backpointer [list $s $sym]]
659		    set backpointer [lreplace $backpointer $pos $pos [list $s $newsym]]
660		}
661	    }
662	}
663	return
664    }
665
666    method {symbol exists} {sym} {
667	return [info exists symbol($sym)]
668    }
669
670    # --- --- --- --------- --------- ---------
671
672    method next {s sym args} {
673	## Split into checking and functionality ...
674
675	set alen [llength $args]
676	if {($alen != 2) && ($alen != 0)} {
677	    return -code error "wrong#args: [list $self] next s sym ?--> s'?"
678	}
679	$self StateCheck  $s
680	$self SymbolCheck $sym
681
682	if {($alen == 2) && [set cmd [lindex $args 0]] ne "-->"} {
683	    return -code error "Expected -->, got \"$cmd\""
684	}
685
686	if {$alen == 0} {
687	    # Query transition table.
688	    upvar #0 ${selfns}::trans_$order($s) jump
689	    if {![info exists jump($sym)]} {return {}}
690	    return $jump($sym)
691	}
692
693	set nexts [lindex $args 1]
694	$self StateCheck $nexts
695
696	upvar #0 ${selfns}::trans_$order($s) jump
697	if {[info exists jump($sym)] && [struct::set contains $jump($sym) $nexts]} {
698	    return -code error "Transition \"($s, ($sym)) --> $nexts\" is already known"
699	}
700
701	$self Next $s $sym $nexts
702	return
703    }
704
705    method !next {s sym args} {
706	set alen [llength $args]
707	if {($alen != 2) && ($alen != 0)} {
708	    return -code error "wrong#args: [list $self] !next s sym ?--> s'?"
709	}
710	$self StateCheck  $s
711	$self SymbolCheck $sym
712
713	if {$alen == 2} {
714	    if {[lindex $args 0] ne "-->"} {
715		return -code error "Expected -->, got \"[lindex $args 0]\""
716	    }
717	    set nexts [lindex $args 1]
718	    $self StateCheck $nexts
719	    $self !Next $s $sym $nexts
720	} else {
721	    $self !Next $s $sym
722	}
723    }
724
725    method nextset {states sym} {
726	$self SymbolCheck   $sym
727	$self StateCheckSet $states
728
729	set result {}
730	foreach s $states {
731	    upvar #0 ${selfns}::trans_$order($s) jump
732	    if {![info exists jump($sym)]} continue
733	    struct::set add result $jump($sym)
734	}
735	return $result
736    }
737
738    # --- --- --- --------- --------- ---------
739
740    method is {cmd} {
741	switch -exact -- $cmd {
742	    complete {
743		# The FA is complete if Trans(State, Sym) != {} for all
744		# states and symbols (Not counting epsilon transitions).
745		# Without symbols the FA is deemed complete. Note:
746		# States with epsilon transitions can use symbols
747		# indirectly! Need their closures for exact
748		# computation.
749
750		set nsymbols [llength [array names symbol]]
751		if {$nsymbols == 0} {return 1}
752		foreach s [array names order] {
753		    upvar #0 ${selfns}::trans_$order($s) jump
754		    if {![info exists jump]} {return 0}
755		    set njsym [array size jump]
756		    if {[info exists jump()]} {
757			set  njsym [llength [$self symbols@set [$self epsilon_closure $s]]]
758			incr njsym -1
759		    }
760		    if {$njsym != $nsymbols}  {return 0}
761		}
762		return 1
763	    }
764	    deterministic {
765		# The FA is deterministic if it has on start state, no
766		# epsilon transitions, and the transition function is
767		# State x Symbol -> State, and not
768		# State x Symbol -> P(State).
769
770		return [expr {
771		    ([array size start] == 1) &&
772		    ![llength $nondete] &&
773		    ![array size nondets]
774		}] ;#{}
775	    }
776	    epsilon-free {
777		# FA is epsion-free if there are no states having epsilon transitions.
778		return [expr {![llength $nondete]}]
779	    }
780	    useful {
781		# The FA is useful if and only if we have states and
782		# all states are reachable and useful.
783
784		set states [$self states]
785		return [expr {
786		    [struct::set size $states] &&
787		    [struct::set equal $states [$self reachable_states]] &&
788		    [struct::set equal $states [$self useful_states]]
789		}] ;# {}
790	    }
791	}
792	return -code error "Expected complete, deterministic, epsilon-free, or useful, got \"$cmd\""
793    }
794
795    # --- --- --- --------- --------- ---------
796
797    method reachable_states {} {
798	if {$reachvalid} {return $reach}
799	if {![array size start]} {
800	    set reach {}
801	} else {
802	    # Basic algorithm like for epsilon_closure, except that we
803	    # process all transitions, not only epsilons, and that
804	    # the initial state is fixed to start.
805
806	    set reach   [array names start]
807	    set pending $reach
808	    array set visited {}
809	    while {[llength $pending]} {
810		set s [struct::list shift pending]
811		if {[info exists visited($s)]} continue
812		set visited($s) .
813		upvar #0 ${selfns}::trans_$order($s) jump
814		if {![info exists jump]} continue
815		if {![array size  jump]} continue
816		foreach sym [array names jump] {
817		    struct::set add reach   $jump($sym)
818		    struct::set add pending $jump($sym)
819		}
820	    }
821	}
822	set reachvalid 1
823	return $reach
824    }
825
826    method unreachable_states {} {
827	# unreachable = states - reachables
828	return [struct::set difference \
829		[$self states] [$self reachable_states]]
830    }
831
832    method reachable {s} {
833	$self StateCheck $s
834	return [struct::set contains [$self reachable_states] $s]
835    }
836
837    # --- --- --- --------- --------- ---------
838
839    method useful_states {} {
840	if {$usefulvalid} {return $useful}
841
842	# A state is useful if a final state
843	# can be reached from it.
844
845	if {![array size final]} {
846	    set useful {}
847	} else {
848	    # Basic algorithm like for epsilon_closure, except that we
849	    # process all transitions, not only epsilons, and that
850	    # the initial set of states is fixed to final.
851
852	    set useful      [array names final]
853	    array set known [array get final]
854	    set pending $useful
855	    array set visited {}
856	    while {[llength $pending]} {
857		set s [struct::list shift pending]
858		if {[info exists visited($s)]} continue
859		set visited($s) .
860
861		# All predecessors are useful, and have to be visited as well.
862		# We get the predecessors from the transinv structure.
863
864		if {![info exists transinv($s)]} continue
865		foreach before $transinv($s) {
866		    set before [lindex $before 0]
867		    if {[info exists visited($before)]} continue
868		    lappend pending $before
869		    if {[info exists known($before)]} continue
870		    lappend useful $before
871		    set known($before) .
872		}
873	    }
874	}
875	set usefulvalid 1
876	return $useful
877    }
878
879    method unuseful_states {} {
880	# unuseful = states - useful
881	return [struct::set difference \
882		[$self states] [$self useful_states]]
883    }
884
885    method useful {s} {
886	$self StateCheck $s
887	return [struct::set contains [$self useful_states] $s]
888    }
889
890    # --- --- --- --------- --------- ---------
891
892    method epsilon_closure {s} {
893	# Iterative graph traversal. Keeps a set of states to look at,
894	# and adds to them everything it can reach from the current
895	# state via epsilon-transitions. Loops are handled through the
896	# visited array to weed out all the states already processed.
897
898	$self StateCheck $s
899
900	# Prefer cached information
901	if {[info exists ec($s)]} {
902	    return $ec($s)
903	}
904
905	set closure [list $s]
906	set pending [list $s]
907	array set visited {}
908	while {[llength $pending]} {
909	    set t [struct::list shift pending]
910	    if {[info exists visited($t)]} continue
911	    set visited($t) .
912	    upvar #0 ${selfns}::trans_$order($t) jump
913	    if {![info exists jump()]} continue
914	    struct::set add closure $jump()
915	    struct::set add pending $jump()
916	}
917	set ec($s) $closure
918	return $closure
919    }
920
921    # --- --- --- --------- --------- ---------
922
923    method clear {} {
924	array unset order    ; set nondete     {}
925	array unset start    ; set scount      0
926	array unset final    ; set reach       {}
927	array unset symbol   ; set reachvalid  0
928	array unset transym  ; set useful      {}
929	array unset transinv ; set usefulvalid 0
930	array unset nondets
931	array unset ec
932
933	# Locate all 'trans_' arrays and remove them as well.
934
935	foreach v [info vars ${selfns}::trans_*] {
936	    unset $v
937	}
938	return
939    }
940
941    # ### ### ### ######### ######### #########
942    ## Instance Internals.
943
944    method StateCheck {s} {
945	if {![info exists order($s)]} {
946	    return -code error "Illegal state \"$s\""
947	}
948    }
949
950    method StateCheckSet {states} {
951	foreach s $states {
952	    if {![info exists order($s)]} {
953		return -code error "Illegal state \"$s\""
954	    }
955	}
956    }
957
958    method SymbolCheck {sym} {
959	if {$sym eq ""} return
960	if {![info exists symbol($sym)]} {
961	    return -code error "Illegal symbol \"$sym\""
962	}
963    }
964
965    method SymbolCheckNE {sym} {
966	if {($sym eq "") || ![info exists symbol($sym)]} {
967	    return -code error "Illegal symbol \"$sym\""
968	}
969    }
970
971    if 0 {
972	# Unused. Activate when needed.
973	method SymbolCheckSet {symbols} {
974	    foreach sym $symbols {
975		if {$sym eq ""} continue
976		if {![info exists symbol($sym)]} {
977		    return -code error "Illegal symbol \"$sym\""
978		}
979	    }
980	}
981    }
982
983    method SymbolCheckSetNE {symbols} {
984	foreach sym $symbols {
985	    if {($sym eq "") || ![info exists symbol($sym)]} {
986		return -code error "Illegal symbol \"$sym\""
987	    }
988	}
989    }
990
991    method Next {s sym nexts} {
992	# Modify transition table. May update the set of
993	# non-deterministic states. Invalidates reachable
994	# cache, as states may become reachable. Updates
995	# the transym and transinv mappings.
996
997	upvar #0 ${selfns}::trans_$order($s) jump
998
999	$self InvalidateReach
1000	$self InvalidateUseful
1001	# Clear closure cache when epsilons change.
1002	if {$sym eq ""} {array unset ec}
1003
1004	if {[info exists transym($sym)]} {
1005	    struct::set include transym($sym) $s
1006	} else {
1007	    set transym($sym) [list $s]
1008	}
1009
1010	if {[info exists transinv($nexts)]} {
1011	    struct::set include transinv($nexts) [list $s $sym]
1012	} else {
1013	    set transinv($nexts) [list [list $s $sym]]
1014	}
1015
1016	if {![info exists jump($sym)]} {
1017	    set jump($sym) [list $nexts]
1018	} else {
1019	    struct::set include jump($sym) $nexts
1020	}
1021	$self NonDeterministic $s $sym
1022	return
1023    }
1024
1025    method !Next {s sym args} {
1026	upvar #0 ${selfns}::trans_$order($s) jump
1027	# Anything to do at all ?
1028	if {![info exists jump($sym)]} return
1029	$self InvalidateReach
1030	$self InvalidateUseful
1031	# Clear closure cache when epsilons change.
1032	if {$sym eq ""} {array unset ec}
1033
1034	if {![llength $args]} {
1035	    # Unset all transitions for (s, sym)
1036	    # Update transym and transinv mappings as well, if existing.
1037
1038	    $self !Transym $s $sym
1039	    foreach nexts $jump($sym) {
1040		$self !Transinv $s $sym $nexts
1041	    }
1042
1043	    unset jump($sym)
1044	} else {
1045	    # Remove the single transition (s, sym) -> nexts
1046	    set nexts [lindex $args 0]
1047
1048	    struct::set exclude jump($sym) $nexts
1049	    $self !Transinv $s $sym $nexts
1050
1051	    if {![struct::set size $jump($sym)]} {
1052		$self !Transym $s $sym
1053		unset jump($sym)
1054		if {![array size jump]} {
1055		    unset jump
1056		}
1057	    }
1058	}
1059
1060	$self NonDeterministic $s $sym
1061	return
1062    }
1063
1064    method !Transym {s sym} {
1065	struct::set exclude transym($sym) $s
1066	if {![struct::set size $transym($sym)]} {
1067	    unset transym($sym)
1068	}
1069    }
1070
1071    method !Transinv {s sym nexts} {
1072	if {[info exists transinv($nexts)]} {
1073	    struct::set exclude transinv($nexts) [list $s $sym]
1074	    if {![struct::set size $transinv($nexts)]} {
1075		unset transinv($nexts)
1076	    }
1077	}
1078    }
1079
1080    method InvalidateReach {} {
1081	set reachvalid 0
1082	set reach      {}
1083	return
1084    }
1085
1086    method InvalidateUseful {} {
1087	set usefulvalid 0
1088	set useful     {}
1089	return
1090    }
1091
1092    method NonDeterministic {s sym} {
1093	upvar #0 ${selfns}::trans_$order($s) jump
1094
1095	# Epsilon rule, whole state check. Epslion present <=> Not a DFA.
1096
1097	if {[info exists jump()]} {
1098	    struct::set include nondete $s
1099	} else {
1100	    struct::set exclude nondete $s
1101	}
1102
1103	# Non-determinism over a symbol.
1104
1105	upvar #0 ${selfns}::trans_$order($s) jump
1106
1107	if {[info exists jump($sym)] && [struct::set size $jump($sym)] > 1} {
1108	    if {![info exists nondets($s)]} {
1109		set nondets($s) [list $sym]
1110	    } else {
1111		struct::set include nondets($s) $sym
1112	    }
1113	    return
1114	} else {
1115	    if {![info exists nondets($s)]} return
1116	    struct::set exclude nondets($s) $sym
1117	    if {![struct::set size $nondets($s)]} {
1118		unset nondets($s)
1119	    }
1120	}
1121	return
1122    }
1123
1124    method CheckSerialization {value startst states acc trans syms} {
1125	# value is list/3 ('grammar::fa' symbols states)
1126	# !("" in symbols)
1127	# states is ordered dict (key is state, value is statedata)
1128	# statedata is list/3 (start final trans|"")
1129	# start is boolean
1130	# final is boolean
1131	# trans is dict (key in symbols, value is destinations)
1132	# destinations is set of states
1133
1134	upvar 1 $startst startstates \
1135		$states  sts \
1136		$acc     a \
1137		$trans   t \
1138		$syms    symbols
1139
1140	set prefix "error in serialization:"
1141	if {[llength $value] != 3} {
1142	    return -code error "$prefix list length not 3"
1143	}
1144
1145	struct::list assign $value   stype symbols statedata
1146
1147	if {$stype ne "grammar::fa"} {
1148	    return -code error "$prefix unknown type \"$stype\""
1149	}
1150	if {[struct::set contains $symbols ""]} {
1151	    return -code error "$prefix empty symbol is not legal"
1152	}
1153
1154	if {[llength $statedata] % 2 == 1} {
1155	    return -code error "$prefix state data is not a dictionary"
1156	}
1157	array set _states $statedata
1158	if {[llength $statedata] != (2*[array size _states])} {
1159	    return -code error "$prefix state data contains duplicate states"
1160	}
1161	set startstates {}
1162	set sts {}
1163	set p   {}
1164	set a   {}
1165	set e   {}
1166	set l   {}
1167	set m   {}
1168	set t   {}
1169	foreach {k v} $statedata {
1170	    lappend sts $k
1171	    if {[llength $v] != 3} {
1172		return -code error "$prefix state list length not 3"
1173	    }
1174
1175	    struct::list assign $v begin accept trans
1176
1177	    if {![string is boolean -strict $begin]} {
1178		return -code error "$prefix expected boolean for start, got \"$begin\""
1179	    }
1180	    if {$begin} {lappend startstates $k}
1181	    if {![string is boolean -strict $accept]} {
1182		return -code error "$prefix expected boolean for final, got \"$accept\""
1183	    }
1184	    if {$accept} {lappend a $k}
1185
1186	    if {[llength $trans] % 2 == 1} {
1187		return -code error "$prefix transition data is not a dictionary"
1188	    }
1189	    array set _trans $trans
1190	    if {[llength $trans] != (2*[array size _trans])} {
1191		return -code error "$prefix transition data contains duplicate symbols"
1192	    }
1193	    unset _trans
1194
1195	    foreach {sym destinations} $trans {
1196		# destinations = list of state
1197		if {($sym ne "") && ![struct::set contains $symbols $sym]} {
1198		    return -code error "$prefix illegal symbol \"$sym\" in transition"
1199		}
1200		foreach dest $destinations {
1201		    if {![info exists _states($dest)]} {
1202			return -code error "$prefix illegal destination state \"$dest\""
1203		    }
1204		    lappend t $k $sym $dest
1205		}
1206	    }
1207	}
1208	return
1209    }
1210
1211    # ### ### ### ######### ######### #########
1212    ## Type API implementation.
1213
1214    # ### ### ### ######### ######### #########
1215    ## Type Internals.
1216
1217    # ### ### ### ######### ######### #########
1218}
1219
1220# ### ### ### ######### ######### #########
1221## Initialization. Specify the container constructor command to use by
1222## the operations package.
1223
1224::grammar::fa::op::constructor ::grammar::fa
1225
1226# ### ### ### ######### ######### #########
1227## Package Management
1228
1229package provide grammar::fa 0.4
1230