1# -*- tcl -*- 2# (c) 2004-2009 Andreas Kupries 3# Grammar / Finite Automatons / Container 4 5# ### ### ### ######### ######### ######### 6## Package description 7 8## A class whose instances hold all the information describing a 9## single finite automaton (states, symbols, start state, set of 10## accepting states, transition function), and operations to define, 11## manipulate, and query this information. 12 13# ### ### ### ######### ######### ######### 14## Requisites 15 16package require grammar::fa::op ; # Heavy FA operations. 17package require snit 1.3 ; # OO system in use (Using hierarchical methods) 18package require struct::list ; # Extended list operations. 19package require struct::set ; # Extended set operations. 20 21# ### ### ### ######### ######### ######### 22## Implementation 23 24snit::type ::grammar::fa { 25 # ### ### ### ######### ######### ######### 26 ## Type API. A number of operations on FAs 27 28 # ### ### ### ######### ######### ######### 29 ## Instance API 30 31 #constructor {args} {} 32 #destructor {} 33 34 method = {b} {} 35 method --> {b} {} 36 37 method serialize {} {} 38 method deserialize {value} {} 39 method deserialize_merge {value} {} 40 41 method states {} {} 42 #method state {cmd s args} {} 43 44 method startstates {} {} 45 method start? {s} {} 46 method start?set {states} {} 47 #method start {cmd args} {} 48 49 method finalstates {} {} 50 method final? {s} {} 51 method final?set {states} {} 52 #method final {cmd args} {} 53 54 method symbols {} {} 55 method symbols@ {state} {} 56 method symbols@set {states} {} 57 #method symbol {cmd sym} {} 58 59 method next {s sym args} {} 60 method !next {s sym args} {} 61 method nextset {states sym} {} 62 63 method is {cmd} {} 64 65 method reachable_states {} {} 66 method unreachable_states {} {} 67 method reachable {s} {} 68 69 method useful_states {} {} 70 method unuseful_states {} {} 71 method useful {s} {} 72 73 method epsilon_closure {s} {} 74 75 method clear {} {} 76 77 # ### ### ### ######### ######### ######### 78 ## Instance API. Complex FA operations. 79 ## The heavy lifting is done by the operations package. 80 81 method reverse {} {op::reverse $self} 82 method complete {{sink {}}} {op::complete $self $sink} 83 method remove_eps {} {op::remove_eps $self} 84 method trim {{what !reachable|!useful}} {op::trim $self $what} 85 method complement {} {op::complement $self} 86 method kleene {} {op::kleene $self} 87 method optional {} {op::optional $self} 88 method fromRegex {regex {over {}}} {op::fromRegex $self $regex $over} 89 90 method determinize {{mapvar {}}} { 91 if {$mapvar ne ""} {upvar 1 $mapvar map} 92 op::determinize $self map 93 } 94 95 method minimize {{mapvar {}}} { 96 if {$mapvar ne ""} {upvar 1 $mapvar map} 97 op::minimize $self map 98 } 99 100 method union {fa {mapvar {}}} { 101 if {$mapvar ne ""} {upvar 1 $mapvar map} 102 op::union $self $fa map 103 } 104 105 method intersect {fa {mapvar {}}} { 106 if {$mapvar ne ""} {upvar 1 $mapvar map} 107 op::intersect $self $fa map 108 } 109 110 method difference {fa {mapvar {}}} { 111 if {$mapvar ne ""} {upvar 1 $mapvar map} 112 op::difference $self $fa map 113 } 114 115 method concatenate {fa {mapvar {}}} { 116 if {$mapvar ne ""} {upvar 1 $mapvar map} 117 op::concatenate $self $fa map 118 } 119 120 # ### ### ### ######### ######### ######### 121 ## Internal data structures. 122 123 ## State information: 124 ## - Order : Defined for all states, values provide creation order. 125 ## - Start : Defined for states which are "start" (Input processing begins in). 126 ## - Final : Defined for states which are "final" ("accept" input). 127 ## - Transinv : Inverse transitions. Per state the set of (state,sym)'s 128 ## which have transitions into the state. Defined only for 129 ## states which have inbound transitions. 130 ## 131 ## Transinv is maintained to make state deletion easier: Direct 132 ## access to the states and transitions which are inbound, for 133 ## their deletion. 134 135 variable order ; # Map : State -> Order of creation 136 variable final ; # Map : State -> . Exists <=> Is a final State 137 variable start ; # Map : State -> . Exists <=> Is a start State 138 variable transinv ; # Map : State -> {(State, Sym)} 139 140 ## Global information: 141 ## - Scount : Counter for creation order of states. 142 143 variable scount 0 ; # Counter for orderering states. 144 145 ## Symbol information: 146 ## - Symbol : Defined for all symbols, values irrelevant. 147 148 variable symbol ; # Map : Symbol -> . Exists = Symbol declared. 149 150 ## Transition data: 151 ## - TransN : Dynamically created instance variables. Transition tables 152 ## for single states. Defined only for states which have 153 ## transitions. 154 ## - Transym : List of states having transitions on that symbol. 155 156 ## Transym is maintained for symbol deletion. Direct access to the transitions 157 ## we have to delete as well. 158 159 ## selfns::trans_$order(state) : Per state map : symbol -> list of destinations. 160 variable transym ; # Map : Sym -> {State} 161 162 ## Derived information: 163 ## - Reach : Cache for set of states reachable from start. 164 ## - Reachvalid : Boolean flag. True iff the reach cache contains valid data 165 ## - Useful : Cache for set of states able to reach final. 166 ## - Usefulvalid : Boolean flag. True iff the useful cache contains valid data 167 ## - Nondete : Set of states which are non-deterministic, because they have 168 # epsilon-transitions. 169 # - EC : Cache of epsilon-closures 170 171 variable reach {} ; # Set of states reachable from 'start'. 172 variable reachvalid 0 ; # Boolean flag, if 'reach' is valid. 173 174 variable useful {} ; # Set of states able to reach 'final'. 175 variable usefulvalid 0 ; # Boolean flag, if 'useful' is valid. 176 177 variable nondete {} ; # Set of non-deterministic states, by epsilon/non-epsilon. 178 variable nondets ; # Per non-det state the set of symbols it is non-det in. 179 180 variable ec ; # Cache of epsilon-closures for states. 181 182 183 # ### ### ### ######### ######### ######### 184 ## Instance API Implementation. 185 186 constructor {args} { 187 set alen [llength $args] 188 if {($alen != 2) && ($alen != 0) && ($alen != 3)} { 189 return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??" 190 } 191 192 array set order {} ; set nondete {} 193 array set start {} ; set scount 0 194 array set final {} ; set reach {} 195 array set symbol {} ; set reachvalid 0 196 array set transym {} ; set useful {} 197 array set transinv {} ; set usefulvalid 0 198 array set nondets {} 199 array set ec {} 200 201 if {$alen == 0} return 202 203 foreach {cmd object} $args break 204 switch -exact -- $cmd { 205 = - := - <-- - as { 206 if {$alen != 2} { 207 return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??" 208 } 209 $self = $object 210 } 211 deserialize { 212 if {$alen != 2} { 213 return -code error "wrong#args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??" 214 } 215 # Object is actually a value, the deserialization to use. 216 $self deserialize $object 217 } 218 fromRegex { 219 # Object is actually a value, the regular expression to use. 220 if {$alen == 2} { 221 $self fromRegex $object 222 } else { 223 $self fromRegex $object [lindex $args 2] 224 } 225 } 226 default { 227 return -code error "bad assignment: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??" 228 } 229 } 230 return 231 } 232 233 # destructor {} 234 235 # --- --- --- --------- --------- --------- 236 237 method = {b} { 238 $self deserialize [$b serialize] 239 } 240 241 method --> {b} { 242 $b deserialize [$self serialize] 243 } 244 245 # --- --- --- --------- --------- --------- 246 247 method serialize {} { 248 set ord {} 249 foreach {s n} [array get order] { 250 lappend ord [list $s $n] 251 } 252 set states {} ; # Dictionary 253 foreach item [lsort -index 1 -integer -increasing $ord] { 254 set s [lindex $item 0] 255 set sdata {} 256 257 # Dict data per state : 258 259 lappend sdata [info exists start($s)] 260 lappend sdata [info exists final($s)] 261 262 # Transitions from the state. 263 264 upvar #0 ${selfns}::trans_$order($s) jump 265 266 if {![info exists jump]} { 267 lappend sdata {} 268 } else { 269 lappend sdata [array get jump] 270 } 271 272 # ---------------------- 273 lappend states $s $sdata 274 } 275 276 return [::list \ 277 grammar::fa \ 278 [array names symbol] \ 279 $states \ 280 ] 281 } 282 283 method deserialize {value} { 284 $self CheckSerialization $value st states acc tr newsymbols 285 $self clear 286 287 foreach s $states {set order($s) [incr scount]} 288 foreach sym $newsymbols {set symbol($sym) .} 289 foreach s $acc {set final($s) .} 290 foreach s $st {set start($s) .} 291 292 foreach {sa sym se} $tr {$self Next $sa $sym $se} 293 return 294 } 295 296 method deserialize_merge {value} { 297 $self CheckSerialization $value st states acc tr newsymbols 298 299 foreach s $states {set order($s) [incr scount]} 300 foreach sym $newsymbols {set symbol($sym) .} 301 foreach s $acc {set final($s) .} 302 foreach s $st {set start($s) .} 303 304 foreach {sa sym se} $tr {$self Next $sa $sym $se} 305 return 306 } 307 308 # --- --- --- --------- --------- --------- 309 310 method states {} { 311 return [array names order] 312 } 313 314 method {state add} {s args} { 315 set args [linsert $args 0 $s] 316 foreach s $args { 317 if {[info exists order($s)]} { 318 return -code error "State \"$s\" is already known" 319 } 320 } 321 foreach s $args {set order($s) [incr scount]} 322 return 323 } 324 325 method {state delete} {s args} { 326 set args [linsert $args 0 $s] 327 $self StateCheckSet $args 328 329 foreach s $args { 330 unset -nocomplain start($s) ; # Start/Initial indicator 331 unset -nocomplain final($s) ; # Final/Accept indicator 332 333 # Remove all inbound transitions. 334 if {[info exists transinv($s)]} { 335 set src $transinv($s) 336 unset transinv($s) 337 338 foreach srcitem $src { 339 struct::list assign $srcitem sin sym 340 $self !Next $sin $sym $s 341 } 342 } 343 344 # We remove transition data only after the inbound 345 # ones. Otherwise we screw up the removal of 346 # looping transitions. We have to consider the 347 # backpointers to us in transinv as well. 348 349 upvar #0 ${selfns}::trans_$order($s) jump 350 if {[info exists jump]} { 351 foreach sym [array names jump] { 352 $self !Transym $s $sym 353 foreach nexts $jump($sym) { 354 $self !Transinv $s $sym $nexts 355 } 356 } 357 358 unset ${selfns}::trans_$order($s) ; # Transitions from s 359 } 360 unset order($s) ; # State ordering 361 362 # Removal of a state may break the automaton into 363 # disconnected pieces. This means that the set of 364 # reachable and useful states may change, and the 365 # cache cannot be used from now on. 366 367 $self InvalidateReach 368 $self InvalidateUseful 369 } 370 return 371 } 372 373 method {state rename} {s snew} { 374 $self StateCheck $s 375 if {[info exists order($snew)]} { 376 return -code error "State \"$snew\" is already known" 377 } 378 379 set o $order($s) 380 unset order($s) ; # State ordering 381 set order($snew) $o 382 383 # Start/Initial indicator 384 if {[info exists start($s)]} { 385 set start($snew) $start($s) 386 unset start($s) 387 } 388 # Final/Accept indicator 389 if {[info exists final($s)]} { 390 set final($snew) $final($s) 391 unset final($s) 392 } 393 # Update all inbound transitions. 394 if {[info exists transinv($s)]} { 395 set transinv($snew) $transinv($s) 396 unset transinv($s) 397 398 # We have to perform a bit more here. We have to 399 # go through the inbound transitions and change the 400 # listed destination state to the new name. 401 402 foreach srcitem $transinv($snew) { 403 struct::list assign $srcitem sin sym 404 # For loops access the 'order' array under the 405 # new name, the old entry is already gone. See 406 # above. See bug SF 2595296. 407 if {$sin eq $s} { 408 set sin $snew 409 } 410 upvar #0 ${selfns}::trans_$order($sin) jump 411 upvar 0 jump($sym) destinations 412 set pos [lsearch -exact $destinations $s] 413 set destinations [lreplace $destinations $pos $pos $snew] 414 } 415 } 416 417 # Another place to change are the back pointers from 418 # all the states we have transitions to, i.e. transinv 419 # for all outbound states. 420 421 upvar #0 ${selfns}::trans_$o jump 422 if {[info exists jump]} { 423 foreach sym [array names jump] { 424 foreach sout $jump($sym) { 425 upvar 0 transinv($sout) backpointer 426 set pos [lsearch -exact $backpointer [list $s $sym]] 427 set backpointer [lreplace $backpointer $pos $pos [list $snew $sym]] 428 } 429 430 # And also to update: Transym information for the symbol. 431 upvar 0 transym($sym) users 432 set pos [lsearch -exact $users $s] 433 set users [lreplace $users $pos $pos $snew] 434 } 435 } 436 437 # Changing the name of a state does not change the 438 # reachables / useful states per se. We just may have 439 # to replace the name in the caches as well. 440 441 # - Invalidation will do the same, at the expense of a 442 # - larger computation later. 443 444 $self InvalidateReach 445 $self InvalidateUseful 446 return 447 } 448 449 method {state exists} {s} { 450 return [info exists order($s)] 451 } 452 453 # --- --- --- --------- --------- --------- 454 455 method startstates {} { 456 return [array names start] 457 } 458 459 method start? {s} { 460 $self StateCheck $s 461 return [info exists start($s)] 462 } 463 464 method start?set {states} { 465 $self StateCheckSet $states 466 foreach s $states { 467 if {[info exists start($s)]} {return 1} 468 } 469 return 0 470 } 471 472 # Note: Adding or removing start states does not change 473 # usefulness, only reachability 474 475 method {start add} {state args} { 476 set args [linsert $args 0 $state] 477 $self StateCheckSet $args 478 foreach s $args {set start($s) .} 479 $self InvalidateReach 480 return 481 } 482 483 method {start set} {states} { 484 $self StateCheckSet $states 485 array unset start 486 foreach s $states {set start($s) .} 487 $self InvalidateReach 488 return 489 } 490 491 method {start remove} {state args} { 492 set args [linsert $args 0 $state] 493 $self StateCheckSet $args 494 foreach s $args { 495 unset -nocomplain start($s) 496 } 497 $self InvalidateReach 498 return 499 } 500 501 method {start clear} {} { 502 array unset start 503 $self InvalidateReach 504 return 505 } 506 507 # --- --- --- --------- --------- --------- 508 509 method finalstates {} { 510 return [array names final] 511 } 512 513 method final? {s} { 514 $self StateCheck $s 515 return [info exists final($s)] 516 } 517 518 method final?set {states} { 519 $self StateCheckSet $states 520 foreach s $states { 521 if {[info exists final($s)]} {return 1} 522 } 523 return 0 524 } 525 526 # Note: Adding or removing final states does not change 527 # reachability, only usefulness 528 529 method {final add} {state args} { 530 set args [linsert $args 0 $state] 531 $self StateCheckSet $args 532 foreach s $args {set final($s) .} 533 $self InvalidateUseful 534 return 535 } 536 537 method {final set} {states} { 538 $self StateCheckSet $states 539 array unset final 540 foreach s $states {set final($s) .} 541 $self InvalidateReach 542 return 543 } 544 545 method {final remove} {state args} { 546 set args [linsert $args 0 $state] 547 $self StateCheckSet $args 548 foreach s $args { 549 unset -nocomplain final($s) 550 } 551 $self InvalidateUseful 552 return 553 } 554 555 method {final clear} {} { 556 array unset final 557 $self InvalidateReach 558 return 559 } 560 561 # --- --- --- --------- --------- --------- 562 563 method symbols {} { 564 return [array names symbol] 565 } 566 567 method symbols@ {s {t {}}} { 568 $self StateCheck $s 569 if {$t ne ""} { $self StateCheck $t} 570 upvar #0 ${selfns}::trans_$order($s) jump 571 if {![info exists jump]} {return {}} 572 if {$t eq ""} { 573 # No destination, all symbols. 574 return [array names jump] 575 } 576 # Specific destination, locate the symbols going there. 577 set result {} 578 foreach sym [array names jump] { 579 if {[lsearch -exact $jump($sym) $t] < 0} continue 580 lappend result $sym 581 } 582 return [lsort -uniq $result] 583 } 584 585 method symbols@set {states} { 586 # Union (fa symbol@ s, f.a. s in states) 587 588 $self StateCheckSet $states 589 set result {} 590 foreach s $states { 591 upvar #0 ${selfns}::trans_$order($s) jump 592 if {![info exists jump]} continue 593 foreach sym [array names jump] { 594 lappend result $sym 595 } 596 } 597 return [lsort -uniq $result] 598 } 599 600 method {symbol add} {sym args} { 601 set args [linsert $args 0 $sym] 602 foreach sym $args { 603 if {$sym eq ""} { 604 return -code error "Cannot add illegal empty symbol \"\"" 605 } 606 if {[info exists symbol($sym)]} { 607 return -code error "Symbol \"$sym\" is already known" 608 } 609 } 610 foreach sym $args {set symbol($sym) .} 611 return 612 } 613 614 method {symbol delete} {sym args} { 615 set args [linsert $args 0 $sym] 616 $self SymbolCheckSetNE $args 617 foreach sym $args { 618 unset symbol($sym) 619 620 # Delete all transitions using the removed symbol. 621 622 if {[info exists transym($sym)]} { 623 foreach s $transym($sym) { 624 $self !Next $s $sym 625 } 626 } 627 } 628 return 629 } 630 631 method {symbol rename} {sym newsym} { 632 $self SymbolCheckNE $sym 633 if {$newsym eq ""} { 634 return -code error "Cannot add illegal empty symbol \"\"" 635 } 636 if {[info exists symbol($newsym)]} { 637 return -code error "Symbol \"$newsym\" is already known" 638 } 639 640 unset symbol($sym) 641 set symbol($newsym) . 642 643 if {[info exists transym($sym)]} { 644 set transym($newsym) [set states $transym($sym)] 645 unset transym($sym) 646 647 foreach s $states { 648 # Update the jump tables for each of the states 649 # using this symbol, and the reverse tables as 650 # well. 651 652 upvar #0 ${selfns}::trans_$order($s) jump 653 set jump($newsym) [set destinations $jump($sym)] 654 unset jump($sym) 655 656 foreach sd $destinations { 657 upvar 0 transinv($sd) backpointer 658 set pos [lsearch -exact $backpointer [list $s $sym]] 659 set backpointer [lreplace $backpointer $pos $pos [list $s $newsym]] 660 } 661 } 662 } 663 return 664 } 665 666 method {symbol exists} {sym} { 667 return [info exists symbol($sym)] 668 } 669 670 # --- --- --- --------- --------- --------- 671 672 method next {s sym args} { 673 ## Split into checking and functionality ... 674 675 set alen [llength $args] 676 if {($alen != 2) && ($alen != 0)} { 677 return -code error "wrong#args: [list $self] next s sym ?--> s'?" 678 } 679 $self StateCheck $s 680 $self SymbolCheck $sym 681 682 if {($alen == 2) && [set cmd [lindex $args 0]] ne "-->"} { 683 return -code error "Expected -->, got \"$cmd\"" 684 } 685 686 if {$alen == 0} { 687 # Query transition table. 688 upvar #0 ${selfns}::trans_$order($s) jump 689 if {![info exists jump($sym)]} {return {}} 690 return $jump($sym) 691 } 692 693 set nexts [lindex $args 1] 694 $self StateCheck $nexts 695 696 upvar #0 ${selfns}::trans_$order($s) jump 697 if {[info exists jump($sym)] && [struct::set contains $jump($sym) $nexts]} { 698 return -code error "Transition \"($s, ($sym)) --> $nexts\" is already known" 699 } 700 701 $self Next $s $sym $nexts 702 return 703 } 704 705 method !next {s sym args} { 706 set alen [llength $args] 707 if {($alen != 2) && ($alen != 0)} { 708 return -code error "wrong#args: [list $self] !next s sym ?--> s'?" 709 } 710 $self StateCheck $s 711 $self SymbolCheck $sym 712 713 if {$alen == 2} { 714 if {[lindex $args 0] ne "-->"} { 715 return -code error "Expected -->, got \"[lindex $args 0]\"" 716 } 717 set nexts [lindex $args 1] 718 $self StateCheck $nexts 719 $self !Next $s $sym $nexts 720 } else { 721 $self !Next $s $sym 722 } 723 } 724 725 method nextset {states sym} { 726 $self SymbolCheck $sym 727 $self StateCheckSet $states 728 729 set result {} 730 foreach s $states { 731 upvar #0 ${selfns}::trans_$order($s) jump 732 if {![info exists jump($sym)]} continue 733 struct::set add result $jump($sym) 734 } 735 return $result 736 } 737 738 # --- --- --- --------- --------- --------- 739 740 method is {cmd} { 741 switch -exact -- $cmd { 742 complete { 743 # The FA is complete if Trans(State, Sym) != {} for all 744 # states and symbols (Not counting epsilon transitions). 745 # Without symbols the FA is deemed complete. Note: 746 # States with epsilon transitions can use symbols 747 # indirectly! Need their closures for exact 748 # computation. 749 750 set nsymbols [llength [array names symbol]] 751 if {$nsymbols == 0} {return 1} 752 foreach s [array names order] { 753 upvar #0 ${selfns}::trans_$order($s) jump 754 if {![info exists jump]} {return 0} 755 set njsym [array size jump] 756 if {[info exists jump()]} { 757 set njsym [llength [$self symbols@set [$self epsilon_closure $s]]] 758 incr njsym -1 759 } 760 if {$njsym != $nsymbols} {return 0} 761 } 762 return 1 763 } 764 deterministic { 765 # The FA is deterministic if it has on start state, no 766 # epsilon transitions, and the transition function is 767 # State x Symbol -> State, and not 768 # State x Symbol -> P(State). 769 770 return [expr { 771 ([array size start] == 1) && 772 ![llength $nondete] && 773 ![array size nondets] 774 }] ;#{} 775 } 776 epsilon-free { 777 # FA is epsion-free if there are no states having epsilon transitions. 778 return [expr {![llength $nondete]}] 779 } 780 useful { 781 # The FA is useful if and only if we have states and 782 # all states are reachable and useful. 783 784 set states [$self states] 785 return [expr { 786 [struct::set size $states] && 787 [struct::set equal $states [$self reachable_states]] && 788 [struct::set equal $states [$self useful_states]] 789 }] ;# {} 790 } 791 } 792 return -code error "Expected complete, deterministic, epsilon-free, or useful, got \"$cmd\"" 793 } 794 795 # --- --- --- --------- --------- --------- 796 797 method reachable_states {} { 798 if {$reachvalid} {return $reach} 799 if {![array size start]} { 800 set reach {} 801 } else { 802 # Basic algorithm like for epsilon_closure, except that we 803 # process all transitions, not only epsilons, and that 804 # the initial state is fixed to start. 805 806 set reach [array names start] 807 set pending $reach 808 array set visited {} 809 while {[llength $pending]} { 810 set s [struct::list shift pending] 811 if {[info exists visited($s)]} continue 812 set visited($s) . 813 upvar #0 ${selfns}::trans_$order($s) jump 814 if {![info exists jump]} continue 815 if {![array size jump]} continue 816 foreach sym [array names jump] { 817 struct::set add reach $jump($sym) 818 struct::set add pending $jump($sym) 819 } 820 } 821 } 822 set reachvalid 1 823 return $reach 824 } 825 826 method unreachable_states {} { 827 # unreachable = states - reachables 828 return [struct::set difference \ 829 [$self states] [$self reachable_states]] 830 } 831 832 method reachable {s} { 833 $self StateCheck $s 834 return [struct::set contains [$self reachable_states] $s] 835 } 836 837 # --- --- --- --------- --------- --------- 838 839 method useful_states {} { 840 if {$usefulvalid} {return $useful} 841 842 # A state is useful if a final state 843 # can be reached from it. 844 845 if {![array size final]} { 846 set useful {} 847 } else { 848 # Basic algorithm like for epsilon_closure, except that we 849 # process all transitions, not only epsilons, and that 850 # the initial set of states is fixed to final. 851 852 set useful [array names final] 853 array set known [array get final] 854 set pending $useful 855 array set visited {} 856 while {[llength $pending]} { 857 set s [struct::list shift pending] 858 if {[info exists visited($s)]} continue 859 set visited($s) . 860 861 # All predecessors are useful, and have to be visited as well. 862 # We get the predecessors from the transinv structure. 863 864 if {![info exists transinv($s)]} continue 865 foreach before $transinv($s) { 866 set before [lindex $before 0] 867 if {[info exists visited($before)]} continue 868 lappend pending $before 869 if {[info exists known($before)]} continue 870 lappend useful $before 871 set known($before) . 872 } 873 } 874 } 875 set usefulvalid 1 876 return $useful 877 } 878 879 method unuseful_states {} { 880 # unuseful = states - useful 881 return [struct::set difference \ 882 [$self states] [$self useful_states]] 883 } 884 885 method useful {s} { 886 $self StateCheck $s 887 return [struct::set contains [$self useful_states] $s] 888 } 889 890 # --- --- --- --------- --------- --------- 891 892 method epsilon_closure {s} { 893 # Iterative graph traversal. Keeps a set of states to look at, 894 # and adds to them everything it can reach from the current 895 # state via epsilon-transitions. Loops are handled through the 896 # visited array to weed out all the states already processed. 897 898 $self StateCheck $s 899 900 # Prefer cached information 901 if {[info exists ec($s)]} { 902 return $ec($s) 903 } 904 905 set closure [list $s] 906 set pending [list $s] 907 array set visited {} 908 while {[llength $pending]} { 909 set t [struct::list shift pending] 910 if {[info exists visited($t)]} continue 911 set visited($t) . 912 upvar #0 ${selfns}::trans_$order($t) jump 913 if {![info exists jump()]} continue 914 struct::set add closure $jump() 915 struct::set add pending $jump() 916 } 917 set ec($s) $closure 918 return $closure 919 } 920 921 # --- --- --- --------- --------- --------- 922 923 method clear {} { 924 array unset order ; set nondete {} 925 array unset start ; set scount 0 926 array unset final ; set reach {} 927 array unset symbol ; set reachvalid 0 928 array unset transym ; set useful {} 929 array unset transinv ; set usefulvalid 0 930 array unset nondets 931 array unset ec 932 933 # Locate all 'trans_' arrays and remove them as well. 934 935 foreach v [info vars ${selfns}::trans_*] { 936 unset $v 937 } 938 return 939 } 940 941 # ### ### ### ######### ######### ######### 942 ## Instance Internals. 943 944 method StateCheck {s} { 945 if {![info exists order($s)]} { 946 return -code error "Illegal state \"$s\"" 947 } 948 } 949 950 method StateCheckSet {states} { 951 foreach s $states { 952 if {![info exists order($s)]} { 953 return -code error "Illegal state \"$s\"" 954 } 955 } 956 } 957 958 method SymbolCheck {sym} { 959 if {$sym eq ""} return 960 if {![info exists symbol($sym)]} { 961 return -code error "Illegal symbol \"$sym\"" 962 } 963 } 964 965 method SymbolCheckNE {sym} { 966 if {($sym eq "") || ![info exists symbol($sym)]} { 967 return -code error "Illegal symbol \"$sym\"" 968 } 969 } 970 971 if 0 { 972 # Unused. Activate when needed. 973 method SymbolCheckSet {symbols} { 974 foreach sym $symbols { 975 if {$sym eq ""} continue 976 if {![info exists symbol($sym)]} { 977 return -code error "Illegal symbol \"$sym\"" 978 } 979 } 980 } 981 } 982 983 method SymbolCheckSetNE {symbols} { 984 foreach sym $symbols { 985 if {($sym eq "") || ![info exists symbol($sym)]} { 986 return -code error "Illegal symbol \"$sym\"" 987 } 988 } 989 } 990 991 method Next {s sym nexts} { 992 # Modify transition table. May update the set of 993 # non-deterministic states. Invalidates reachable 994 # cache, as states may become reachable. Updates 995 # the transym and transinv mappings. 996 997 upvar #0 ${selfns}::trans_$order($s) jump 998 999 $self InvalidateReach 1000 $self InvalidateUseful 1001 # Clear closure cache when epsilons change. 1002 if {$sym eq ""} {array unset ec} 1003 1004 if {[info exists transym($sym)]} { 1005 struct::set include transym($sym) $s 1006 } else { 1007 set transym($sym) [list $s] 1008 } 1009 1010 if {[info exists transinv($nexts)]} { 1011 struct::set include transinv($nexts) [list $s $sym] 1012 } else { 1013 set transinv($nexts) [list [list $s $sym]] 1014 } 1015 1016 if {![info exists jump($sym)]} { 1017 set jump($sym) [list $nexts] 1018 } else { 1019 struct::set include jump($sym) $nexts 1020 } 1021 $self NonDeterministic $s $sym 1022 return 1023 } 1024 1025 method !Next {s sym args} { 1026 upvar #0 ${selfns}::trans_$order($s) jump 1027 # Anything to do at all ? 1028 if {![info exists jump($sym)]} return 1029 $self InvalidateReach 1030 $self InvalidateUseful 1031 # Clear closure cache when epsilons change. 1032 if {$sym eq ""} {array unset ec} 1033 1034 if {![llength $args]} { 1035 # Unset all transitions for (s, sym) 1036 # Update transym and transinv mappings as well, if existing. 1037 1038 $self !Transym $s $sym 1039 foreach nexts $jump($sym) { 1040 $self !Transinv $s $sym $nexts 1041 } 1042 1043 unset jump($sym) 1044 } else { 1045 # Remove the single transition (s, sym) -> nexts 1046 set nexts [lindex $args 0] 1047 1048 struct::set exclude jump($sym) $nexts 1049 $self !Transinv $s $sym $nexts 1050 1051 if {![struct::set size $jump($sym)]} { 1052 $self !Transym $s $sym 1053 unset jump($sym) 1054 if {![array size jump]} { 1055 unset jump 1056 } 1057 } 1058 } 1059 1060 $self NonDeterministic $s $sym 1061 return 1062 } 1063 1064 method !Transym {s sym} { 1065 struct::set exclude transym($sym) $s 1066 if {![struct::set size $transym($sym)]} { 1067 unset transym($sym) 1068 } 1069 } 1070 1071 method !Transinv {s sym nexts} { 1072 if {[info exists transinv($nexts)]} { 1073 struct::set exclude transinv($nexts) [list $s $sym] 1074 if {![struct::set size $transinv($nexts)]} { 1075 unset transinv($nexts) 1076 } 1077 } 1078 } 1079 1080 method InvalidateReach {} { 1081 set reachvalid 0 1082 set reach {} 1083 return 1084 } 1085 1086 method InvalidateUseful {} { 1087 set usefulvalid 0 1088 set useful {} 1089 return 1090 } 1091 1092 method NonDeterministic {s sym} { 1093 upvar #0 ${selfns}::trans_$order($s) jump 1094 1095 # Epsilon rule, whole state check. Epslion present <=> Not a DFA. 1096 1097 if {[info exists jump()]} { 1098 struct::set include nondete $s 1099 } else { 1100 struct::set exclude nondete $s 1101 } 1102 1103 # Non-determinism over a symbol. 1104 1105 upvar #0 ${selfns}::trans_$order($s) jump 1106 1107 if {[info exists jump($sym)] && [struct::set size $jump($sym)] > 1} { 1108 if {![info exists nondets($s)]} { 1109 set nondets($s) [list $sym] 1110 } else { 1111 struct::set include nondets($s) $sym 1112 } 1113 return 1114 } else { 1115 if {![info exists nondets($s)]} return 1116 struct::set exclude nondets($s) $sym 1117 if {![struct::set size $nondets($s)]} { 1118 unset nondets($s) 1119 } 1120 } 1121 return 1122 } 1123 1124 method CheckSerialization {value startst states acc trans syms} { 1125 # value is list/3 ('grammar::fa' symbols states) 1126 # !("" in symbols) 1127 # states is ordered dict (key is state, value is statedata) 1128 # statedata is list/3 (start final trans|"") 1129 # start is boolean 1130 # final is boolean 1131 # trans is dict (key in symbols, value is destinations) 1132 # destinations is set of states 1133 1134 upvar 1 $startst startstates \ 1135 $states sts \ 1136 $acc a \ 1137 $trans t \ 1138 $syms symbols 1139 1140 set prefix "error in serialization:" 1141 if {[llength $value] != 3} { 1142 return -code error "$prefix list length not 3" 1143 } 1144 1145 struct::list assign $value stype symbols statedata 1146 1147 if {$stype ne "grammar::fa"} { 1148 return -code error "$prefix unknown type \"$stype\"" 1149 } 1150 if {[struct::set contains $symbols ""]} { 1151 return -code error "$prefix empty symbol is not legal" 1152 } 1153 1154 if {[llength $statedata] % 2 == 1} { 1155 return -code error "$prefix state data is not a dictionary" 1156 } 1157 array set _states $statedata 1158 if {[llength $statedata] != (2*[array size _states])} { 1159 return -code error "$prefix state data contains duplicate states" 1160 } 1161 set startstates {} 1162 set sts {} 1163 set p {} 1164 set a {} 1165 set e {} 1166 set l {} 1167 set m {} 1168 set t {} 1169 foreach {k v} $statedata { 1170 lappend sts $k 1171 if {[llength $v] != 3} { 1172 return -code error "$prefix state list length not 3" 1173 } 1174 1175 struct::list assign $v begin accept trans 1176 1177 if {![string is boolean -strict $begin]} { 1178 return -code error "$prefix expected boolean for start, got \"$begin\"" 1179 } 1180 if {$begin} {lappend startstates $k} 1181 if {![string is boolean -strict $accept]} { 1182 return -code error "$prefix expected boolean for final, got \"$accept\"" 1183 } 1184 if {$accept} {lappend a $k} 1185 1186 if {[llength $trans] % 2 == 1} { 1187 return -code error "$prefix transition data is not a dictionary" 1188 } 1189 array set _trans $trans 1190 if {[llength $trans] != (2*[array size _trans])} { 1191 return -code error "$prefix transition data contains duplicate symbols" 1192 } 1193 unset _trans 1194 1195 foreach {sym destinations} $trans { 1196 # destinations = list of state 1197 if {($sym ne "") && ![struct::set contains $symbols $sym]} { 1198 return -code error "$prefix illegal symbol \"$sym\" in transition" 1199 } 1200 foreach dest $destinations { 1201 if {![info exists _states($dest)]} { 1202 return -code error "$prefix illegal destination state \"$dest\"" 1203 } 1204 lappend t $k $sym $dest 1205 } 1206 } 1207 } 1208 return 1209 } 1210 1211 # ### ### ### ######### ######### ######### 1212 ## Type API implementation. 1213 1214 # ### ### ### ######### ######### ######### 1215 ## Type Internals. 1216 1217 # ### ### ### ######### ######### ######### 1218} 1219 1220# ### ### ### ######### ######### ######### 1221## Initialization. Specify the container constructor command to use by 1222## the operations package. 1223 1224::grammar::fa::op::constructor ::grammar::fa 1225 1226# ### ### ### ######### ######### ######### 1227## Package Management 1228 1229package provide grammar::fa 0.4 1230