1# -*- tcl -*-
2# Grammar / FA / Operations
3
4# ### ### ### ######### ######### #########
5## Package description
6
7# ### ### ### ######### ######### #########
8## Requisites
9
10package require struct::list ; # Extended list operations.
11package require struct::set  ; # Extended set operations.
12
13# ### ### ### ######### ######### #########
14## Implementation
15
16namespace eval ::grammar::fa::op {
17
18    # ### ### ### ######### ######### #########
19    ## API. Structure / Language / Compilation
20
21    proc reverse     {fa} {}
22    proc complete    {fa {sink {}}} {}
23    proc remove_eps  {fa} {}
24    proc trim        {fa {what !reachable|!useful}} {}
25    proc determinize {fa {mapvar {}} {idstart 0}} {}
26    proc minimize    {fa {mapvar {}}} {}
27
28    proc complement  {fa} {}
29    proc kleene      {fa} {}
30    proc optional    {fa} {}
31    proc union       {fa fb {mapvar {}}} {}
32    proc intersect   {fa fb {mapvar {}} {idstart 0}} {}
33    proc difference  {fa fb {mapvar {}}} {}
34    proc concatenate {fa fb {mapvar {}}} {}
35
36    proc fromRegex   {fa regex {over {}}} {}
37
38    proc toRegexp    {fa} {}
39    proc toRegexp2   {fa} {}
40
41    proc simplifyRegexp {rex} {}
42    proc toTclRegexp    {rex symdict} {}
43
44    # ### ### ### ######### ######### #########
45
46    namespace export reverse complete remove_eps trim \
47	    determinize minimize complement kleene \
48	    optional union intersect difference \
49	    concatenate fromRegex toRegexp toRegexp2 \
50	    simplifyRegexp toTclRegexp
51
52    # ### ### ### ######### ######### #########
53    ## Internal data structures.
54
55    variable cons {}
56
57    # ### ### ### ######### ######### #########
58}
59
60# ### ### ### ######### ######### #########
61## API implementation. Structure
62
63proc ::grammar::fa::op::reverse {fa} {
64    # Reversal means that all transitions change their direction
65    # and start and final states are swapped.
66
67    # Note that reversed FA might not be deterministic, even if the FA
68    # itself was.
69
70    # One loop is not enough for this. If we reverse the
71    # transitions for a state immediately we may modify a state
72    # which has not been processed yet. And when we come to this
73    # state we reverse already reversed transitions, creating a
74    # complete mess. Thus two loops, one to collect the current
75    # transitions (and also remove them), and a second to insert
76    # the reversed transitions.
77
78    set tmp [$fa finalstates]
79    $fa final set [$fa startstates]
80    $fa start set $tmp
81
82    # FUTURE : Method to retrieve all transitions
83    # FUTURE : Method to delete all transitions
84
85    set trans {}
86    foreach s [$fa states] {
87	foreach sym [$fa symbols@ $s] {
88	    lappend trans $s $sym [$fa next $s $sym]
89	    $fa !next $s $sym
90	}
91    }
92    foreach {s sym destinations} $trans {
93	foreach d $destinations {
94	    $fa next $d $sym --> $s
95	}
96    }
97    return
98}
99
100# --- --- --- --------- --------- ---------
101
102proc ::grammar::fa::op::complete {fa {sink {}}} {
103    if {[$fa is complete]} return
104
105    # We have an incomplete FA.
106
107    if {$sink eq ""} {
108	set sink [FindNewState $fa sink]
109    } elseif {[$fa state exists $sink]} {
110	return -code error "The chosen sink state exists already"
111    }
112    $fa state add $sink
113
114    # Add transitions to it from all states which are not
115    # complete. The sink state itself loops on all inputs. IOW it is a
116    # non-useful state.
117
118    set symbols [$fa symbols]
119    foreach sym $symbols {
120	$fa next $sink $sym --> $sink
121    }
122
123    if {[$fa is epsilon-free]} {
124	foreach s [$fa states] {
125	    foreach missing [struct::set difference \
126		    $symbols \
127		    [$fa symbols@ $s]] {
128		$fa next $s $missing --> $sink
129	    }
130	}
131    } else {
132	# For an FA with epsilon-transitions we cannot simply look at
133	# the direct transitions to find the used symbols. We have to
134	# determine this for the epsilon-closure of the state in
135	# question. Oh, and we have to defer actually adding the
136	# transitions after we have picked them all, or otherwise the
137	# newly added transitions throw the symbol calculations for
138	# epsilon closures off.
139
140	set new {}
141	foreach s [$fa states] {
142	    foreach missing [struct::set difference \
143		    $symbols \
144		    [$fa symbols@set [$fa epsilon_closure $s]]] {
145		lappend new $s $missing
146	    }
147	}
148
149	foreach {s missing} $new {
150	    $fa next $s $missing --> $sink
151	}
152    }
153    return
154}
155
156# --- --- --- --------- --------- ---------
157
158proc ::grammar::fa::op::remove_eps {fa} {
159    # We eliminate all epsilon transitions by duplicating a number
160    # of regular transitions, which we get through the epsilon
161    # closure of the states having epsilon transitions. We do
162    # nothing if the FA is epsilon free to begin with.
163
164    if {[$fa is epsilon-free]} return
165
166    # Note: Epsilon transitions touching start and final states
167    # propagate the start markers forward and final markers
168    # backward. We do this first by propagating start markers twice,
169    # once with a reversed FA. This also gives us some
170    # epsilon-closures as well.
171
172    foreach n {1 2} {
173	foreach s [$fa startstates] {
174	    foreach e [$fa epsilon_closure $s] {
175		$fa start add $e
176	    }
177	}
178	reverse $fa
179    }
180
181    # Now duplicate all transitions which are followed or preceeded by
182    # epsilon transitions of any number greater than zero.
183
184    # Note: The closure computations done by the FA are cached in the
185    # FA, so doing it multiple times is no big penalty.
186
187    # FUTURE : Retrieve all transitions on one command.
188
189    # FUTURE : Different algorithm ...
190    # Retrieve non-eps transitions for all states ...
191    # Iterate this list. Compute e-closures for endpoints, cache
192    # them. Duplicate the transition if needed, in that case add it to
193    # the end of the list, for possible more duplication (may touch
194    # different e-closures). Stop when the list is empty again.
195
196    set changed 1
197    while {$changed} {
198	set changed 0
199	foreach s [$fa states] {
200	    foreach sym [$fa symbols@ $s] {
201		set dest [$fa next $s $sym]
202		if {$sym eq ""} {
203		    # Epsilon transitions.
204
205		    # Get the closure, and duplicate all transitions for all
206		    # non-empty symbols as transitions of the original state.
207		    # This may lead to parallel transitions between states, hence
208		    # the catch. It prevents the generated error from stopping the
209		    # action, and no actual parallel transitions are created.
210
211		    set clos [$fa epsilon_closure $s]
212		    foreach csym [$fa symbols@set $clos] {
213			if {$csym eq ""} continue
214			foreach d [$fa nextset $clos $csym] {
215			    if {![catch {$fa next $s $csym --> $d} msg]} {
216				set changed 1
217			    }
218			}
219		    }
220		} else {
221		    # Regular transition. Go through all destination
222		    # states, compute their closures and replicate the
223		    # transition if the closure contains more than the
224		    # destination itself, to all states in the closure.
225
226		    foreach d $dest {
227			set clos [$fa epsilon_closure $d]
228			if {[llength $clos] > 1} {
229			    foreach e $clos {
230				if {![catch {$fa next $s $sym --> $e}]} {
231				    set changed 1
232				}
233			    }
234			}
235		    }
236		}
237	    }
238	}
239    }
240
241    # At last, drop the epsilons for all states. Only now is this
242    # possible because otherwise we might compute bad epsilon
243    # closures in the previous loop.
244
245    foreach s [$fa states] {
246	$fa !next $s ""
247    }
248    return
249}
250
251# --- --- --- --------- --------- ---------
252
253proc ::grammar::fa::op::trim {fa {what !reachable|!useful}} {
254    # Remove various unwanted pices from the FA.
255
256    switch -exact -- $what {
257	!reachable {
258	    set remove [$fa unreachable_states]
259	}
260	!useful {
261	    set remove [$fa unuseful_states]
262	}
263	!reachable&!useful -
264	!(reachable|useful) {
265	    set remove [struct::set intersect [$fa unreachable_states] [$fa unuseful_states]]
266	}
267	!reachable|!useful -
268	!(reachable&useful) {
269	    set remove [struct::set union [$fa unreachable_states] [$fa unuseful_states]]
270	}
271	default {
272	    return -code error "Expected !reachable, !useful, !reachable&!useful, !(reachable|useful), !reachable|!useful, or !(reachable&useful), got \"$what\""
273	}
274    }
275
276    foreach s $remove {
277	$fa state delete $s
278    }
279    return
280}
281
282# --- --- --- --------- --------- ---------
283
284proc ::grammar::fa::op::determinize {fa {mapvar {}} {idstart 0}} {
285    # We do the operation in several stages instead of jumping
286    # directly in the subset construction. Basically we try the less
287    # expensive operations first to see if they are enough. It does
288    # help that they will us also bring nearer to the ultimate goal
289    # even if they are not enough.
290
291    set hasmap 0
292    if {$mapvar ne ""} {
293	upvar 1 $mapvar map ; set hasmap 1
294    }
295
296    # First, is the input already deterministic ?
297    # There is nothing to do in that case.
298
299    if {[$fa is deterministic]} {
300	if {$hasmap} {set map {}}
301	return
302    }
303
304    # Second, trim unreachable and unuseables. We are done if only
305    # they carried the non-determinism. Otherwise we might have made
306    # the FA smaller and was less time consuming to convert.
307
308    if {[llength [$fa startstates]]} {trim $fa !reachable}
309    if {[llength [$fa finalstates]]} {trim $fa !useful}
310    if {[$fa is deterministic]} {
311	if {$hasmap} {set map {}}
312	return
313    }
314
315    # Third, remove any epsilon transitions, and stop if that was
316    # enough. Of course, weed out again states which have become
317    # irrelevant. The removal of the epsilons will at least ensure
318    # that the subset construction won't have to deal with
319    # closures. I.e. simpler.
320
321    remove_eps $fa
322    if {[llength [$fa startstates]]} {trim $fa !reachable}
323    if {[llength [$fa finalstates]]} {trim $fa !useful}
324    if {[$fa is deterministic]} {
325	if {$hasmap} {set map {}}
326	return
327    }
328
329    # Fourth. There is no way to avoid the subset construction.
330    # Dive in. This is the only part of the algorithm which requires
331    # us to keep a map. We construct the dfa in a transient container
332    # and copy the result back to fa when completed.
333
334    array set subsets {}
335    set id      $idstart
336    set pending {}
337    set dfa [[cons] %AUTO%]
338    # FUTURE : $dfa symbol set [$fa symbols]
339    foreach sym [$fa symbols] {$dfa symbol add $sym}
340
341    # If we have start states we can initialize the algorithm with
342    # their set. Otherwise we have to the single-element sets of all
343    # states as the beginning.
344
345    set starts [$fa startstates]
346    if {[llength $starts] > 0} {
347	# Make the set of start states the initial stae of the result.
348
349	set starts [lsort $starts] ; # Sort to get canonical form.
350	$dfa state add $id
351	$dfa start add $id
352
353	# The start may also be a final state
354	if {[$fa final?set $starts]} {
355	    $dfa final add $id
356	}
357
358	set subsets(dfa,$starts) $id
359	set subsets(nfa,$id) $starts
360
361	lappend pending $id
362	incr id
363    } else {
364	# Convert all states of the input into sets (of one element)
365	# in the output. Do not forget to mark all final states we
366	# come by. No start states, otherwise we wouldn't be here.
367
368	foreach s [$fa states] {
369	    set nfaset [list $s]
370
371	    $dfa state add $id
372	    if {[$fa final? $s]} {
373		$dfa final add $id
374	    }
375
376	    set subsets(dfa,$nfaset) $id
377	    set subsets(nfa,$id) $nfaset
378	    lappend pending $id
379	    incr id
380	}
381    }
382
383    while {[llength $pending]} {
384	set dfastate [struct::list shift pending]
385
386	# We have to compute the transition function for this dfa state.
387
388	set nfaset $subsets(nfa,$dfastate)
389
390	foreach sym [$fa symbols@set $nfaset] {
391	    set nfanext [lsort [$fa nextset $nfaset $sym]]
392
393	    if {![info exists subsets(dfa,$nfanext)]} {
394		# Unknown destination. Add it as a new state.
395
396		$dfa state add $id
397		if {[$fa final?set $nfanext]} {
398		    $dfa final add $id
399		}
400
401		set subsets(dfa,$nfanext) $id
402		set subsets(nfa,$id) $nfanext
403
404		# Schedule the calculation of the transition function
405		# of the new state.
406
407		lappend pending $id
408		incr id
409	    }
410
411	    # Add the transition
412	    $dfa next $dfastate $sym --> $subsets(dfa,$nfanext)
413	}
414    }
415
416    if {[llength [$fa startstates]]} {trim $fa !reachable}
417    if {[llength [$fa finalstates]]} {trim $fa !useful}
418
419    if {$hasmap} {
420	# The map is from new dfa states to the sets of nfa states.
421
422	set map {}
423	foreach s [$dfa states] {
424	    lappend map $s $subsets(nfa,$s)
425	}
426    }
427
428    $fa = $dfa
429    $dfa destroy
430
431    # ASSERT : $fa is deterministic
432    return
433}
434
435# --- --- --- --------- --------- ---------
436
437proc ::grammar::fa::op::minimize {fa {mapvar {}}} {
438    # Brzozowski's method:
439    # Reverse, determinize, reverse again, determinize again.
440
441    reverse     $fa
442    determinize $fa mapa
443    reverse     $fa
444    determinize $fa mapb
445
446    if {$mapvar ne ""} {
447	upvar 1 $mapvar map
448
449	if {![llength $mapa] && ![llength $mapb]} {
450	    # No state reorganizations, signal up
451	    set map {}
452	} elseif {[llength $mapa] && ![llength $mapb]} {
453	    # Only one reorg, this is the combined reorg as well.
454	    set map $mapa
455	} elseif {![llength $mapa] && [llength $mapb]} {
456	    # Only one reorg, this is the combined reorg as well.
457	    set map $mapb
458	} else {
459	    # Two reorgs. Compose the maps into the final map signaled
460	    # up.
461
462	    # mapb : final state -> set of states in mapa -> sets of original states.
463
464	    set map {}
465	    array set tmp $mapa
466	    foreach {b aset} $mapb {
467		set compose {}
468		foreach a $aset {foreach o $tmp($a) {lappend compose $o}}
469		lappend map $b [lsort -uniq $compose]
470	    }
471	}
472    }
473
474    # The FA is implicitly trimmed by the determinize's.
475    return
476}
477
478# ### ### ### ######### ######### #########
479## API implementation. Language.
480
481proc ::grammar::fa::op::complement {fa} {
482    # Complementing is possible if and only if the FA is complete,
483    # and accomplished by swapping the final and non-final states.
484
485    if {![$fa is complete]} {
486	return -code error "Unable to complement incomplete FA"
487    }
488    if {![$fa is deterministic]} {
489	return -code error "Unable to complement non-deterministic FA"
490    }
491
492    set newfinal [struct::set difference [$fa states] [$fa finalstates]]
493    $fa final set $newfinal
494    return
495}
496
497# --- --- --- --------- --------- ---------
498
499proc ::grammar::fa::op::kleene {fa} {
500    # The Kleene Closure of the FA makes no sense if we don't have
501    # start and final states we can work from.
502
503    set start [$fa startstates]
504    set final [$fa finalstates]
505
506    if {![llength $start] || ![llength $final]} {
507	return -code error "Unable to add Kleene's closure to a FA without start/final states"
508    }
509
510    # FUTURE :: If final states have no outgoing transitions, and start
511    # FUTURE :: states have no input transitions, then place the new
512    # FUTURE :: transitions directly between start and final
513    # FUTURE :: states. In that case we don't need new states.
514
515    # We need new start/final states, like for optional (see below)
516
517    set ns [NewState $fa s]
518    set nf [NewState $fa f]
519
520    foreach s $start {$fa next $ns "" --> $s}
521    foreach f $final {$fa next $f  "" --> $nf}
522
523    $fa start clear ; $fa start add $ns
524    $fa final clear ; $fa final add $nf
525
526    $fa next $ns "" --> $nf ; # Optionality
527    $fa next $nf "" --> $ns ; # Loop for closure
528    return
529}
530
531# --- --- --- --------- --------- ---------
532
533proc ::grammar::fa::op::optional {fa} {
534    # The Optionality of the FA makes no sense if we don't have
535    # start and final states we can work from.
536
537    set start [$fa startstates]
538    set final [$fa finalstates]
539
540    if {![llength $start] || ![llength $final]} {
541	return -code error "Unable to make a FA without start/final states optional"
542    }
543
544    # We have to introduce new start and final states to ensure
545    # that we do not get additional recognized words from the FA
546    # due to epsilon transitions. IOW just placing epsilons from
547    # all start to all final states is wrong. Consider unreachable
548    # final states, they become reachable. Or final states able to
549    # reach final states from. Again the epsilons would extend the
550    # language. We have to detach our optional epsilon from anything
551    # in the existing start/final states. Hence the new start/final.
552
553    # FUTURE : Recognize if there are no problems with placing direct
554    # FUTURE : epsilons from start to final.
555
556    set ns [NewState $fa s]
557    set nf [NewState $fa f]
558
559    foreach s $start {$fa next $ns "" --> $s}
560    foreach f $final {$fa next $f  "" --> $nf}
561
562    $fa start clear ; $fa start add $ns
563    $fa final clear ; $fa final add $nf
564
565    $fa next $ns "" --> $nf ; # This is the transition which creates the optionality.
566    return
567}
568
569# --- --- --- --------- --------- ---------
570
571proc ::grammar::fa::op::union {fa fb {mapvar {}}} {
572    # We union the input symbols, then add the states and
573    # transitions of the second FA to the first, adding in
574    # epsilons for the start and final states as well. When
575    # adding states we make sure that the new states do not
576    # intersect with the existing states.
577
578    struct::list assign \
579	    [MergePrepare $fa $fb union smap] \
580	    astart afinal bstart bfinal
581
582    if {$mapvar ne ""} {
583	upvar 1 $mapvar map
584	set map $smap
585    }
586
587    # And now the new start & final states
588
589    set ns [NewState $fa s]
590    set nf [NewState $fa f]
591
592    eLink1N $fa $ns $astart
593    eLink1N $fa $ns $bstart
594
595    eLinkN1 $fa $afinal $nf
596    eLinkN1 $fa $bfinal $nf
597
598    $fa start clear ; $fa start add $ns
599    $fa final clear ; $fa final add $nf
600    return
601}
602
603# --- --- --- --------- --------- ---------
604
605proc ::grammar::fa::op::intersect {fa fb {mapvar {}} {idstart 0}} {
606    # Intersection has to run the two automata in parallel, using
607    # paired states. If we have start states we begin the
608    # construction with them. This leads to a smaller result as we
609    # do not have create a full cross-crossproduct. The latter is
610    # unfortunately required if there are no start states.
611
612    struct::list assign [CrossPrepare $fa $fb intersection] tmp res
613
614    # The start states of the new FA consist of the cross-product of
615    # the start states of fa with fb. These are also the states used
616    # to seed DoCross.
617
618    set id $idstart
619    set smap {}
620    set bstart [$tmp startstates]
621    foreach a [$fa startstates] {
622	foreach b $bstart {
623	    set pair [list $a $b]
624	    lappend smap    $id $pair
625	    lappend pending $pair $id
626	    $res state add $id
627	    $res start add $id
628	    incr id
629	}
630    }
631
632    set cp [DoCross $fa $tmp $res $id $pending smap]
633
634    foreach {id pair} $smap {
635	struct::list assign $pair a b
636	if {[$fa final? $a] && [$tmp final? $b]} {
637	    $res final add $id
638	}
639    }
640
641    # Remove excess states (generated because of the sinks).
642    trim $res
643    if {$mapvar ne ""} {
644	upvar 1 $mapvar map
645	# The loop is required to filter out the mappings for all
646	# states which were trimmed off.
647	set map {}
648	foreach {id pair} $smap {
649	    if {![$res state exists $id]} continue
650	    lappend map $id $pair
651	}
652    }
653
654    # Copy result into permanent storage and delete all intermediaries
655    $fa = $res
656    $res destroy
657    if {$tmp ne $fb} {$tmp destroy}
658    return
659}
660
661# --- --- --- --------- --------- ---------
662
663proc ::grammar::fa::op::difference {fa fb {mapvar {}}} {
664    # Difference has to run the two automata in parallel, using
665    # paired states. Only the final states are defined differently
666    # than for intersection. It has to be final in fa and _not_ final
667    # in fb to be a final state of the result. <=> Accepted by A, but
668    # not B, to be in the difference.
669
670    struct::list assign [CrossPrepare $fa $fb difference] tmp res
671
672    # The start states of the new FA consist of the cross-product of
673    # the start states of fa with fb. These are also the states used
674    # to seed DoCross.
675
676    set id 0
677    set smap {}
678    set bstart [$tmp startstates]
679    foreach a [$fa startstates] {
680	foreach b $bstart {
681	    set pair [list $a $b]
682	    lappend smap    $id $pair
683	    lappend pending $pair $id
684	    $res state add $id
685	    $res start add $id
686	    incr id
687	}
688    }
689
690    set cp [DoCross $fa $tmp $res $id $pending smap]
691
692    foreach {id pair} $smap {
693	struct::list assign $pair a b
694	if {[$fa final? $a] && ![$tmp final? $b]} {
695	    $res final add $id
696	}
697    }
698
699    # Remove excess states (generated because of the sinks).
700    trim $res
701    if {$mapvar ne ""} {
702	upvar 1 $mapvar map
703	# The loop is required to filter out the mappings for all
704	# states which were trimmed off.
705	set map {}
706	foreach {id pair} $smap {
707	    if {![$res state exists $id]} continue
708	    lappend map $id $pair
709	}
710    }
711
712    # Copy result into permanent storage and delete all intermediaries
713    $fa = $res
714    $res destroy
715    if {$tmp ne $fb} {$tmp destroy}
716    return
717}
718
719# --- --- --- --------- --------- ---------
720
721proc ::grammar::fa::op::concatenate {fa fb {mapvar {}}} {
722    # Like union, only the interconnect between existing and new FA is different.
723
724    struct::list assign \
725	    [MergePrepare $fa $fb concatenate smap] \
726	    astart afinal bstart bfinal
727
728    if {$mapvar ne ""} {
729	upvar 1 $mapvar map
730	set map $smap
731    }
732
733    set ns [NewState $fa s]
734    set nm [NewState $fa m] ;# Midpoint.
735    set nf [NewState $fa f]
736
737    eLink1N $fa $ns $astart
738    eLinkN1 $fa $afinal $nm
739
740    eLink1N $fa $nm $bstart
741    eLinkN1 $fa $bfinal $nf
742
743    $fa start clear ; $fa start add $ns
744    $fa final clear ; $fa final add $nf
745    return
746}
747
748# ### ### ### ######### ######### #########
749## API implementation. Compilation (regexp -> FA).
750
751proc ::grammar::fa::op::fromRegex {fa regex {over {}}} {
752    # Convert a regular expression into a FA. The regex is given as
753    # parse tree in the form of a nested list.
754
755    # {. A B ...} ... Concatenation (accepts zero|one arguments).
756    # {| A B ...} ... Alternatives  (accepts zero|one arguments).
757    # {? A}       ... Optional.
758    # {* A}       ... Kleene.
759    # {+ A}       ... Pos.Kleene.
760    # {! A}       ... Complement/Negation.
761    # {S Symbol}  ... Atom, Symbol
762    #
763    # Recursive descent with a helper ...
764
765    if {![llength $regex]} {
766	$fa clear
767	return
768    }
769
770    set tmp [[cons] %AUTO%]
771
772    if {![llength $over]} {
773	set over [lsort -uniq [RESymbols $regex]]
774    }
775    foreach sym $over {
776	$tmp symbol add $sym
777    }
778
779    set id 0
780    struct::list assign [Regex $tmp $regex id] s f
781    $tmp start set [list $s]
782    $tmp final set [list $f]
783
784    $fa = $tmp
785    $tmp destroy
786    return
787}
788
789# ### ### ### ######### ######### #########
790## Internal helpers.
791
792proc ::grammar::fa::op::RESymbols {regex} {
793    set cmd [lindex $regex 0]
794    switch -exact -- $cmd {
795	? - * - ! - + {
796	    return [RESymbols [lindex $regex 1]]
797	}
798	. - | - & {
799	    set res {}
800	    foreach sub [lrange $regex 1 end] {
801		foreach sym [RESymbols $sub] {lappend res $sym}
802	    }
803	    return $res
804	}
805	S {
806	    return [list [lindex $regex 1]]
807	}
808	default {
809	    return -code error "Expected . ! ? * | &, or S, got \"$cmd\""
810	}
811    }
812}
813
814proc ::grammar::fa::op::Regex {fa regex idvar} {
815    upvar 1 $idvar id
816    set cmd [lindex $regex 0]
817    switch -exact -- $cmd {
818	? {
819	    # Optional
820	    set a $id ; incr id ; $fa state add $a
821	    set b $id ; incr id ; $fa state add $b
822
823	    struct::list assign [Regex $fa [lindex $regex 1] id] s f
824	    $fa next $a "" --> $s
825	    $fa next $f "" --> $b
826	    $fa next $a "" --> $b
827	}
828	* {
829	    # Kleene
830	    set a $id ; incr id ; $fa state add $a
831	    set b $a
832
833	    struct::list assign [Regex $fa [lindex $regex 1] id] s f
834	    $fa next $a "" --> $s
835	    $fa next $f "" --> $a ;# == b
836	}
837	+ {
838	    # Pos. Kleene
839	    set a $id ; incr id ; $fa state add $a
840	    set b $id ; incr id ; $fa state add $b
841
842	    struct::list assign [Regex $fa [lindex $regex 1] id] s f
843	    $fa next $a "" --> $s
844	    $fa next $f "" --> $b
845	    $fa next $b "" --> $a
846	}
847	! {
848	    # Complement.
849	    # Build up in a temp FA, complement, and
850	    # merge nack into the current
851
852	    set a $id ; incr id ; $fa state add $a
853	    set b $id ; incr id ; $fa state add $b
854
855	    set tmp [[cons] %AUTO%]
856	    foreach sym [$fa symbols] {$tmp symbol add $sym}
857	    struct::list assign [Regex $tmp [lindex $regex 1] id] s f
858	    $tmp start add $s
859	    $tmp final add $f
860
861	    determinize $tmp {} $id
862	    incr id [llength [$tmp states]]
863	    if {![$tmp is complete]} {
864		complete    $tmp $id
865		incr id
866	    }
867	    complement  $tmp
868
869	    # Merge and link.
870	    $fa deserialize_merge [$tmp serialize]
871
872	    eLink1N $fa $a [$tmp startstates]
873	    eLinkN1 $fa [$tmp finalstates] $b
874	    $tmp destroy
875	}
876	& {
877	    # Intersection ... /And
878
879	    if {[llength $regex] < 3} {
880		# Optimized path. Intersection of one sub-expression
881		# is the sub-expression itself.
882
883		struct::list assign [Regex $fa [lindex $regex 1] id] a b
884	    } else {
885		set a $id ; incr id ; $fa state add $a
886		set b $id ; incr id ; $fa state add $b
887
888		set tmp [[cons] %AUTO%]
889		foreach sym [$fa symbols] {$tmp symbol add $sym}
890		set idsub 0
891		struct::list assign [Regex $tmp [lindex $regex 1] idsub] s f
892		$tmp start add $s
893		$tmp final add $f
894
895		set beta [[cons] %AUTO%]
896		foreach sub [lrange $regex 2 end] {
897		    foreach sym [$fa symbols] {$beta symbol add $sym}
898		    struct::list assign [Regex $beta $sub idsub] s f
899		    $beta start add $s
900		    $beta final add $f
901		    intersect $tmp $beta {} $id
902		}
903		$beta destroy
904		determinize $tmp {} $id
905		incr id [llength [$tmp states]]
906
907		# Merge and link.
908		$fa deserialize_merge [$tmp serialize]
909
910		eLink1N $fa $a [$tmp startstates]
911		eLinkN1 $fa [$tmp finalstates] $b
912		$tmp destroy
913	    }
914	}
915	. {
916	    # Concatenation ...
917
918	    if {[llength $regex] == 1} {
919		# Optimized path. No sub-expressions. This represents
920		# language containing only the empty string, aka
921		# epsilon.
922
923		set a $id ; incr id ; $fa state add $a
924		set b $id ; incr id ; $fa state add $b
925		$fa next $a "" --> $b
926
927	    } elseif {[llength $regex] == 2} {
928		# Optimized path. Concatenation of one sub-expression
929		# is the sub-expression itself.
930
931		struct::list assign [Regex $fa [lindex $regex 1] id] a b
932	    } else {
933		set first 1
934		set last {}
935		foreach sub [lrange $regex 1 end] {
936		    struct::list assign [Regex $fa $sub id] s f
937		    if {$first} {set first 0 ; set a $s}
938		    if {$last != {}} {
939			$fa next $last "" --> $s
940		    }
941		    set last $f
942		}
943		set b $f
944	    }
945	}
946	| {
947	    # Alternatives ... (Union)
948
949	    if {[llength $regex] == 1} {
950		# Optimized path. No sub-expressions. This represents
951		# the empty language, i.e. the language without words.
952
953		set a $id ; incr id ; $fa state add $a
954		set b $id ; incr id ; $fa state add $b
955
956	    } elseif {[llength $regex] == 2} {
957		# Optimized path. Choice/Union of one sub-expression
958		# is the sub-expression itself.
959
960		struct::list assign [Regex $fa [lindex $regex 1] id] a b
961	    } else {
962		set a $id ; incr id ; $fa state add $a
963		set b $id ; incr id ; $fa state add $b
964		foreach sub [lrange $regex 1 end] {
965		    struct::list assign [Regex $fa $sub id] s f
966		    $fa next $a "" --> $s
967		    $fa next $f "" --> $b
968		}
969	    }
970	}
971	S {
972	    # Atom, base transition.
973	    set sym [lindex $regex 1]
974	    set a $id ; incr id ; $fa state add $a
975	    set b $id ; incr id ; $fa state add $b
976	    $fa next $a $sym --> $b
977	}
978	default {
979	    return -code error "Expected . ! ? * | &, or S, got \"$cmd\""
980	}
981    }
982    return [list $a $b]
983}
984
985# --- --- --- --------- --------- ---------
986
987proc ::grammar::fa::op::CrossPrepare {fa fb label} {
988    set starta [$fa startstates]
989    set finala [$fa finalstates]
990    set startb [$fb startstates]
991    set finalb [$fb finalstates]
992    if {
993	![llength $starta] || ![llength $finala] ||
994	![llength $startb] || ![llength $finalb]
995    } {
996	return -code error "Unable to perform the $label of two FAs without start/final states"
997    }
998
999    # The inputs are made complete over the union of their symbol
1000    # sets. A temp. container is used for the second input if necessary.
1001
1002    set totals [struct::set union [$fa symbols] [$fb symbols]]
1003    foreach sym [struct::set difference $totals [$fa symbols]] {
1004	$fa symbol add $sym
1005    }
1006    if {![$fa is epsilon-free]} {
1007	remove_eps $fa
1008	trim       $fa
1009    }
1010    if {![$fa is complete]} {
1011	complete $fa
1012    }
1013    set tmp $fb
1014    set bnew [struct::set difference $totals [$fb symbols]]
1015    if {[llength $bnew]} {
1016	set tmp [[cons] %AUTO% = $fb]
1017	foreach sym $bnew {
1018	    $tmp symbol add $sym
1019	}
1020    }
1021    if {![$fb is epsilon-free]} {
1022	if {$tmp eq $fb} {set tmp [[cons] %AUTO% = $fb]}
1023	remove_eps $tmp
1024	trim       $tmp
1025    }
1026    if {![$fb is complete]} {
1027	if {$tmp eq $fb} {set tmp [[cons] %AUTO% = $fb]}
1028	complete $tmp
1029    }
1030
1031    set res [[cons] %AUTO%]
1032    foreach sym $totals {
1033	$res symbol add $sym
1034    }
1035
1036    return [list $tmp $res]
1037}
1038
1039# --- --- --- --------- --------- ---------
1040
1041proc ::grammar::fa::op::DoCross {fa fb res id seed smapvar} {
1042    upvar 1 $smapvar smap
1043
1044    set symbols [$fa symbols]
1045    array set tmp $seed
1046
1047    set pending $seed
1048    while {[llength $pending]} {
1049	set cpair [struct::list shift pending]
1050	set cid   [struct::list shift pending]
1051
1052	struct::list assign $cpair a b
1053
1054	# ASSERT: /res state exists /cid
1055
1056	# Generate the transitions for the pair, add the resulting
1057	# destinations to the FA, and schedule them for a visit if
1058	# they are new.
1059
1060	foreach sym $symbols {
1061	    set adestinations [$fa next $a $sym]
1062	    set bdestinations [$fb next $b $sym]
1063
1064	    foreach ad $adestinations {
1065		foreach bd $bdestinations {
1066		    set dest [list $ad $bd]
1067
1068		    if {![info exists tmp($dest)]} {
1069			$res state add $id
1070			lappend smap $id $dest
1071			lappend pending $dest $id
1072			set tmp($dest) $id
1073			incr id
1074		    }
1075		    $res next $cid $sym --> $tmp($dest)
1076		}
1077	    }
1078	}
1079    }
1080    return
1081}
1082
1083# --- --- --- --------- --------- ---------
1084
1085proc ::grammar::fa::op::MergePrepare {fa fb label mapvar} {
1086    upvar 1 $mapvar map
1087
1088    set starta [$fa startstates]
1089    set finala [$fa finalstates]
1090    set startb [$fb startstates]
1091    set finalb [$fb finalstates]
1092    if {
1093	![llength $starta] || ![llength $finala] ||
1094	![llength $startb] || ![llength $finalb]
1095    } {
1096	return -code error "Unable to $label FAs without start/final states"
1097    }
1098
1099    # FUTURE: add {*}[symbols], ignore dup's
1100    foreach sym [$fb symbols] {catch {$fa symbol add $sym}}
1101
1102    set dup [struct::set intersect [$fa states] [$fb states]]
1103    if {![llength $dup]} {
1104	# The states do not overlap. A plain merge of fb is enough to
1105	# copy the information.
1106
1107	$fa deserialize_merge [$fb serialize]
1108	set map {}
1109    } else {
1110	# We have duplicate states, therefore we have to remap fb to
1111	# prevent interference between the two.
1112
1113	set map {}
1114	set tmp [[cons] %AUTO% = $fb]
1115	set id 0
1116	foreach s $dup {
1117	    # The renaming process has to ensure that the new name is
1118	    # in neither fa, nor already in fb as well.
1119	    while {
1120		[$fa  state exists $id] ||
1121		[$tmp state exists $id]
1122	    } {incr id}
1123	    $tmp state rename $s $id
1124	    lappend map $id $s
1125	    incr id
1126	}
1127
1128	set startb [$tmp startstates]
1129	set finalb [$tmp finalstates]
1130
1131	$fa deserialize_merge [$tmp serialize]
1132	$tmp destroy
1133    }
1134
1135    return [list $starta $finala $startb $finalb]
1136}
1137
1138# --- --- --- --------- --------- ---------
1139
1140proc ::grammar::fa::op::eLink1N {fa from states} {
1141    foreach s $states {
1142	$fa next $from "" --> $s
1143    }
1144    return
1145}
1146
1147# --- --- --- --------- --------- ---------
1148
1149proc ::grammar::fa::op::eLinkN1 {fa states to} {
1150    foreach s $states {
1151	$fa next $s "" --> $to
1152    }
1153    return
1154}
1155
1156# --- --- --- --------- --------- ---------
1157
1158proc ::grammar::fa::op::NewState {fa prefix} {
1159    set newstate [FindNewState $fa $prefix]
1160    $fa state add $newstate
1161    return $newstate
1162}
1163
1164# --- --- --- --------- --------- ---------
1165
1166proc ::grammar::fa::op::FindNewState {fa prefix} {
1167    #if {![$fa state exists $prefix]} {return $prefix}
1168    set n 0
1169    while {[$fa state exists ${prefix}.$n]} {incr n}
1170    return ${prefix}.$n
1171}
1172
1173# ### ### ### ######### ######### #########
1174## API implementation. Decompilation (FA -> regexp).
1175
1176proc ::grammar::fa::op::toRegexp {fa} {
1177    # NOTE: FUTURE - Do not go through the serialization, nor through
1178    # a matrix. The algorithm can be expressed more directly as
1179    # operations on the automaton (states and transitions).
1180
1181    set ET [ser_to_ematrix [$fa serialize]]
1182    while {[llength $ET] > 2} {
1183	set ET [matrix_drop_state $ET]
1184    }
1185    return [lindex $ET 0 1]
1186}
1187
1188proc ::grammar::fa::op::toRegexp2 {fa} {
1189    # NOTE: FUTURE - See above.
1190    set ET [ser_to_ematrix [$fa serialize]]
1191    while {[llength $ET] > 2} {
1192	set ET [matrix_drop_state $ET re2]
1193    }
1194    return [lindex $ET 0 1]
1195}
1196
1197# ### ### ### ######### ######### #########
1198## Internal helpers.
1199
1200proc ::grammar::fa::op::ser_to_ematrix {ser} {
1201    if {[lindex $ser 0] ne "grammar::fa"} then {
1202	error "Expected grammar::fa automaton serialisation"
1203    }
1204    set stateL {}
1205    set n 2; foreach {state des} [lindex $ser 2] {
1206	lappend stateL $state
1207	set N($state) $n
1208	incr n
1209    }
1210    set row0 {}
1211    for {set k 0} {$k<$n} {incr k} {lappend row0 [list |]}
1212    set res [list $row0 $row0]
1213    foreach {from des} [lindex $ser 2] {
1214	set row [lrange $row0 0 1]
1215	if {[lindex $des 0]} then {lset res 0 $N($from) [list .]}
1216	if {[lindex $des 1]} then {lset row 1 [list .]}
1217	foreach to $stateL {set S($to) [list |]}
1218	foreach {symbol targetL} [lindex $des 2] {
1219	    if {$symbol eq ""} then {
1220		set atom [list .]
1221	    } else {
1222		set atom [list S $symbol]
1223	    }
1224	    foreach to $targetL {lappend S($to) $atom}
1225	}
1226	foreach to $stateL {
1227	    if {[llength $S($to)] == 2} then {
1228		lappend row [lindex $S($to) 1]
1229	    } else {
1230		lappend row $S($to)
1231	    }
1232	}
1233	lappend res $row
1234    }
1235    return $res
1236}
1237
1238proc ::grammar::fa::op::matrix_drop_state {T_in {ns re1}} {
1239    set sumcmd ${ns}::|
1240    set prodcmd ${ns}::.
1241    set T1 {}
1242    set lastcol {}
1243    foreach row $T_in {
1244	lappend T1 [lreplace $row end end]
1245	lappend lastcol [lindex $row end]
1246    }
1247    set lastrow [lindex $T1 end]
1248    set T1 [lreplace $T1 end end]
1249    set b [${ns}::* [lindex $lastcol end]]
1250    set lastcol [lreplace $lastcol end end]
1251    set res {}
1252    foreach row $T1 a $lastcol {
1253	set newrow {}
1254	foreach pos $row c $lastrow {
1255	    lappend newrow [$sumcmd $pos [$prodcmd $a $b $c]]
1256	}
1257	lappend res $newrow
1258    }
1259    return $res
1260}
1261
1262# ### ### ### ######### ######### #########
1263## Internal helpers. Regexp simplification I.
1264
1265namespace eval ::grammar::fa::op::re1 {
1266    namespace export | . {\*}
1267}
1268
1269proc ::grammar::fa::op::re1::| {args} {
1270    set L {}
1271
1272    # | = Choices.
1273    # Sub-choices are lifted into the top expression (foreach).
1274    # Identical choices are reduced to a single term (lsort -uniq).
1275
1276    foreach re $args {
1277	switch -- [lindex $re 0] "|" {
1278	    foreach term [lrange $re 1 end] {lappend L $term}
1279	} default {
1280	    lappend L $re
1281	}
1282    }
1283    set L [lsort -unique $L]
1284    if {[llength $L] == 1} then {
1285	return [lindex $L 0]
1286    } else {
1287	return [linsert $L 0 |]
1288    }
1289}
1290
1291proc ::grammar::fa::op::re1::. {args} {
1292    set L {}
1293
1294    # . = Sequence.
1295    # One element sub-choices are lifted into the top expression.
1296    # Sub-sequences are lifted into the top expression.
1297
1298    foreach re $args {
1299	switch -- [lindex $re 0] "." {
1300	    foreach term [lrange $re 1 end] {lappend L $term}
1301	} "|" {
1302	    if {[llength $re] == 1} then {return $re}
1303	    lappend L $re
1304	} default {
1305	    lappend L $re
1306	}
1307    }
1308    if {[llength $L] == 1} then {
1309	return [lindex $L 0]
1310    } else {
1311	return [linsert $L 0 .]
1312    }
1313}
1314
1315proc ::grammar::fa::op::re1::* {re} {
1316    # * = Kleene closure.
1317    # Sub-closures are lifted into the top expression.
1318    # One-element sub-(choices,sequences) are lifted into the top expression.
1319
1320    switch -- [lindex $re 0] "|" - "." {
1321	if {[llength $re] == 1} then {
1322	    return [list .]
1323	} else {
1324	    return [list * $re]
1325	}
1326    } "*" {
1327	return $re
1328    } default {
1329	return [list * $re]
1330    }
1331}
1332
1333# ### ### ### ######### ######### #########
1334## Internal helpers. Regexp simplification II.
1335
1336namespace eval ::grammar::fa::op::re2 {
1337    # Inherit choices and kleene-closure from the basic simplifier.
1338
1339    namespace import [namespace parent]::re1::|
1340    namespace import [namespace parent]::re1::\\*
1341}
1342
1343proc ::grammar::fa::op::re2::. {args} {
1344
1345    # . = Sequences
1346    # Sub-sequences are lifted into the top expression.
1347    # Sub-choices are multiplied out.
1348    # <Example a(b|c) => ab|ac >
1349
1350    set L {}
1351    set n -1
1352    foreach re $args {
1353	incr n
1354	switch -- [lindex $re 0] "." {
1355	    foreach term [lrange $re 1 end] {lappend L $term}
1356	} "|" {
1357	    set res [list |]
1358	    set L2 [lreplace $args 0 $n]
1359	    foreach term [lrange $re 1 end] {
1360		lappend res [eval [list .] $L [list $term] $L2]
1361	    }
1362	    return [eval $res]
1363	} default {
1364	    lappend L $re
1365	}
1366    }
1367    if {[llength $L] == 1} then {
1368	return [lindex $L 0]
1369    } else {
1370	return [linsert $L 0 .]
1371    }
1372}
1373
1374# ### ### ### ######### ######### #########
1375## API. Simplification of regular expressions.
1376
1377proc ::grammar::fa::op::simplifyRegexp {RE0} {
1378    set RE1 [namespace inscope nonnull $RE0]
1379    if {[lindex $RE1 0] eq "S" || $RE1 eq "." || $RE1 eq "|"} then {
1380	return $RE1
1381    }
1382    set tmp [grammar::fa %AUTO% fromRegex $RE1]
1383    $tmp minimize
1384    set RE1 [toRegexp $tmp]
1385    $tmp destroy
1386    if {[string length $RE1] < [string length $RE0]} then {
1387	set RE0 $RE1
1388    }
1389    if {[lindex $RE0 0] eq "S"} then {return $RE0}
1390    set res [lrange $RE0 0 0]
1391    foreach branch [lrange $RE0 1 end] {
1392	lappend res [simplifyRegexp $branch]
1393    }
1394    return $res
1395}
1396
1397# ### ### ### ######### ######### #########
1398## Internal helpers.
1399
1400namespace eval ::grammar::fa::op::nonnull {}
1401
1402proc ::grammar::fa::op::nonnull::| {args} {
1403    set also_empty false
1404    set res [list |]
1405    foreach branch $args {
1406	set RE [eval $branch]
1407	if {[lindex $RE 0] eq "?"} then {
1408	    set also_empty true
1409	    set RE [lindex $RE 1]
1410	}
1411	switch -- [lindex $RE 0] "|" {
1412	    eval [lreplace $RE 0 0 lappend res]
1413	} "." {
1414	    if {[llength $RE] == 1} then {
1415		set also_empty true
1416	    } else {
1417		lappend res $RE
1418	    }
1419	} default {
1420	    lappend res $RE
1421	}
1422    }
1423    if {!$also_empty} then {return $res}
1424    foreach branch [lrange $res 1 end] {
1425	if {[lindex $branch 0] eq "*"} then {return $res}
1426    }
1427    if {[llength $res] == 1} then {
1428	return [list .]
1429    } elseif {[llength $res] == 2} then {
1430	return [lreplace $res 0 0 ?]
1431    } else {
1432	return [list ? $res]
1433    }
1434}
1435
1436proc ::grammar::fa::op::nonnull::. {args} {
1437    set res [list .]
1438    foreach branch $args {
1439	set RE [eval $branch]
1440	switch -- [lindex $RE 0] "|" {
1441	    if {[llength $RE] == 1} then {return $RE}
1442	    lappend res $RE
1443	} "." {
1444	    eval [lreplace $RE 0 0 lappend res]
1445	} default {
1446	    lappend res $RE
1447	}
1448    }
1449    return $res
1450}
1451
1452proc ::grammar::fa::op::nonnull::* {sub} {
1453    set RE [eval $sub]
1454    switch -- [lindex $RE 0] "*" - "?" - "+" {
1455	return [lreplace $RE 0 0 *]
1456    } default {
1457	return [list * $RE]
1458    }
1459}
1460
1461proc ::grammar::fa::op::nonnull::+ {sub} {
1462    set RE [eval $sub]
1463    switch -- [lindex $RE 0] "+" {
1464	return $RE
1465    } "*" - "?" {
1466	return [lreplace $RE 0 0 *]
1467    } default {
1468	return [list * $RE]
1469    }
1470}
1471
1472proc ::grammar::fa::op::nonnull::? {sub} {
1473    set RE [eval $sub]
1474    switch -- [lindex $RE 0] "?" - "*" {
1475	return $RE
1476    } "+" {
1477	return [lreplace $RE 0 0 *]
1478    } default {
1479	return [list ? $RE]
1480    }
1481}
1482
1483proc ::grammar::fa::op::nonnull::S {name} {
1484    return [list S $name]
1485}
1486
1487# ### ### ### ######### ######### #########
1488## API. Translate RE of this package to Tcl REs
1489
1490proc ::grammar::fa::op::toTclRegexp {re symdict} {
1491    return [lindex [namespace inscope tclre $re $symdict] 1]
1492}
1493
1494# ### ### ### ######### ######### #########
1495## Internal helpers.
1496
1497namespace eval ::grammar::fa::op::tclre {}
1498
1499proc ::grammar::fa::op::tclre::S {name dict} {
1500    array set A $dict
1501    if {[info exists A($name)]} then {
1502	return $A($name)
1503    } elseif {[string length $name] == 1} then {
1504	if {[regexp {[\\\[\]{}.()*+?^$]} $name]} then {
1505	    return [list char \\$name]
1506	} else {
1507	    return [list char $name]
1508	}
1509    } else {
1510	return [list class "\[\[:${name}:\]\]"]
1511    }
1512}
1513
1514proc ::grammar::fa::op::tclre::. {args} {
1515    set suffix [lrange $args end end]
1516    set L {}
1517    foreach factor [lrange $args 0 end-1] {
1518	set pair [eval $factor $suffix]
1519	switch -- [lindex $pair 0] "sum" {
1520	    lappend L ([lindex $pair 1])
1521	} default {
1522	    lappend L [lindex $pair 1]
1523	}
1524    }
1525    return [list prod [join $L ""]]
1526}
1527
1528proc ::grammar::fa::op::tclre::* {re dict} {
1529    set pair [eval $re [list $dict]]
1530    switch -- [lindex $pair 0] "sum" - "prod" {
1531	return [list prod "([lindex $pair 1])*"]
1532    } default {
1533	return [list prod "[lindex $pair 1]*"]
1534    }
1535}
1536
1537proc ::grammar::fa::op::tclre::+ {re dict} {
1538    set pair [eval $re [list $dict]]
1539    switch -- [lindex $pair 0] "sum" - "prod" {
1540	return [list prod "([lindex $pair 1])+"]
1541    } default {
1542	return [list prod "[lindex $pair 1]+"]
1543    }
1544}
1545
1546proc ::grammar::fa::op::tclre::? {re dict} {
1547    set pair [eval $re [list $dict]]
1548    switch -- [lindex $pair 0] "sum" - "prod" {
1549	return [list prod "([lindex $pair 1])?"]
1550    } default {
1551	return [list prod "[lindex $pair 1]?"]
1552    }
1553}
1554
1555proc ::grammar::fa::op::tclre::| {args} {
1556    set suffix [lrange $args end end]
1557    set charL {}
1558    set classL {}
1559    set prodL {}
1560    foreach factor [lrange $args 0 end-1] {
1561	set pair [eval $factor $suffix]
1562	switch -- [lindex $pair 0] "char" {
1563	    lappend charL [lindex $pair 1]
1564	} "class" {
1565	    lappend classL [string range [lindex $pair 1] 1 end-1]
1566	} default {
1567	    lappend prodL [lindex $pair 1]
1568	}
1569    }
1570    if {[llength $charL]>1 || [llength $classL]>0} then {
1571	while {[set n [lsearch $charL -]] >= 0} {
1572	    lset charL $n {\-}
1573	}
1574	set bracket "\[[join $charL ""][join $classL ""]\]"
1575	if {![llength $prodL]} then {
1576	    return [list atom $bracket]
1577	}
1578	lappend prodL $bracket
1579    } else {
1580	eval [list lappend prodL] $charL
1581    }
1582    return [list sum [join $prodL |]]
1583}
1584
1585proc ::grammar::fa::op::tclre::& {args} {
1586    error "Cannot express language intersection in Tcl-RE's"
1587
1588    # Note: This can be translated by constructing an automaton for
1589    # the intersection, and then translating its conversion to a
1590    # regular expression.
1591}
1592
1593proc ::grammar::fa::op::tclre::! {args} {
1594    error "Cannot express language complementation in Tcl-RE's"
1595
1596    # Note: This can be translated by constructing an automaton for
1597    # the complement, and then translating its conversion to a regular
1598    # expression. This however requires knowledge regarding the set of
1599    # symbols. Large (utf-8) for Tcl regexes.
1600}
1601
1602# ### ### ### ######### ######### #########
1603
1604proc ::grammar::fa::op::constructor {cmd} {
1605    variable cons $cmd
1606    return
1607}
1608
1609proc ::grammar::fa::op::cons {} {
1610    variable cons
1611    if {$cons ne ""} {return $cons}
1612    return -code error "No constructor for FA container was established."
1613}
1614
1615# ### ### ### ######### ######### #########
1616## Package Management
1617
1618package provide grammar::fa::op 0.4.1
1619