1#--
2# tsort.rb - provides a module for topological sorting and strongly connected components.
3#++
4#
5
6#
7# TSort implements topological sorting using Tarjan's algorithm for
8# strongly connected components.
9#
10# TSort is designed to be able to be used with any object which can be
11# interpreted as a directed graph.
12#
13# TSort requires two methods to interpret an object as a graph,
14# tsort_each_node and tsort_each_child.
15#
16# * tsort_each_node is used to iterate for all nodes over a graph.
17# * tsort_each_child is used to iterate for child nodes of a given node.
18#
19# The equality of nodes are defined by eql? and hash since
20# TSort uses Hash internally.
21#
22# == A Simple Example
23#
24# The following example demonstrates how to mix the TSort module into an
25# existing class (in this case, Hash). Here, we're treating each key in
26# the hash as a node in the graph, and so we simply alias the required
27# #tsort_each_node method to Hash's #each_key method. For each key in the
28# hash, the associated value is an array of the node's child nodes. This
29# choice in turn leads to our implementation of the required #tsort_each_child
30# method, which fetches the array of child nodes and then iterates over that
31# array using the user-supplied block.
32#
33#   require 'tsort'
34#
35#   class Hash
36#     include TSort
37#     alias tsort_each_node each_key
38#     def tsort_each_child(node, &block)
39#       fetch(node).each(&block)
40#     end
41#   end
42#
43#   {1=>[2, 3], 2=>[3], 3=>[], 4=>[]}.tsort
44#   #=> [3, 2, 1, 4]
45#
46#   {1=>[2], 2=>[3, 4], 3=>[2], 4=>[]}.strongly_connected_components
47#   #=> [[4], [2, 3], [1]]
48#
49# == A More Realistic Example
50#
51# A very simple `make' like tool can be implemented as follows:
52#
53#   require 'tsort'
54#
55#   class Make
56#     def initialize
57#       @dep = {}
58#       @dep.default = []
59#     end
60#
61#     def rule(outputs, inputs=[], &block)
62#       triple = [outputs, inputs, block]
63#       outputs.each {|f| @dep[f] = [triple]}
64#       @dep[triple] = inputs
65#     end
66#
67#     def build(target)
68#       each_strongly_connected_component_from(target) {|ns|
69#         if ns.length != 1
70#           fs = ns.delete_if {|n| Array === n}
71#           raise TSort::Cyclic.new("cyclic dependencies: #{fs.join ', '}")
72#         end
73#         n = ns.first
74#         if Array === n
75#           outputs, inputs, block = n
76#           inputs_time = inputs.map {|f| File.mtime f}.max
77#           begin
78#             outputs_time = outputs.map {|f| File.mtime f}.min
79#           rescue Errno::ENOENT
80#             outputs_time = nil
81#           end
82#           if outputs_time == nil ||
83#              inputs_time != nil && outputs_time <= inputs_time
84#             sleep 1 if inputs_time != nil && inputs_time.to_i == Time.now.to_i
85#             block.call
86#           end
87#         end
88#       }
89#     end
90#
91#     def tsort_each_child(node, &block)
92#       @dep[node].each(&block)
93#     end
94#     include TSort
95#   end
96#
97#   def command(arg)
98#     print arg, "\n"
99#     system arg
100#   end
101#
102#   m = Make.new
103#   m.rule(%w[t1]) { command 'date > t1' }
104#   m.rule(%w[t2]) { command 'date > t2' }
105#   m.rule(%w[t3]) { command 'date > t3' }
106#   m.rule(%w[t4], %w[t1 t3]) { command 'cat t1 t3 > t4' }
107#   m.rule(%w[t5], %w[t4 t2]) { command 'cat t4 t2 > t5' }
108#   m.build('t5')
109#
110# == Bugs
111#
112# * 'tsort.rb' is wrong name because this library uses
113#   Tarjan's algorithm for strongly connected components.
114#   Although 'strongly_connected_components.rb' is correct but too long.
115#
116# == References
117#
118# R. E. Tarjan, "Depth First Search and Linear Graph Algorithms",
119# <em>SIAM Journal on Computing</em>, Vol. 1, No. 2, pp. 146-160, June 1972.
120#
121
122module TSort
123  class Cyclic < StandardError
124  end
125
126  #
127  # Returns a topologically sorted array of nodes.
128  # The array is sorted from children to parents, i.e.
129  # the first element has no child and the last node has no parent.
130  #
131  # If there is a cycle, TSort::Cyclic is raised.
132  #
133  def tsort
134    result = []
135    tsort_each {|element| result << element}
136    result
137  end
138
139  #
140  # The iterator version of the #tsort method.
141  # <tt><em>obj</em>.tsort_each</tt> is similar to <tt><em>obj</em>.tsort.each</tt>, but
142  # modification of _obj_ during the iteration may lead to unexpected results.
143  #
144  # #tsort_each returns +nil+.
145  # If there is a cycle, TSort::Cyclic is raised.
146  #
147  def tsort_each # :yields: node
148    each_strongly_connected_component {|component|
149      if component.size == 1
150        yield component.first
151      else
152        raise Cyclic.new("topological sort failed: #{component.inspect}")
153      end
154    }
155  end
156
157  #
158  # Returns strongly connected components as an array of arrays of nodes.
159  # The array is sorted from children to parents.
160  # Each elements of the array represents a strongly connected component.
161  #
162  def strongly_connected_components
163    result = []
164    each_strongly_connected_component {|component| result << component}
165    result
166  end
167
168  #
169  # The iterator version of the #strongly_connected_components method.
170  # <tt><em>obj</em>.each_strongly_connected_component</tt> is similar to
171  # <tt><em>obj</em>.strongly_connected_components.each</tt>, but
172  # modification of _obj_ during the iteration may lead to unexpected results.
173  #
174  #
175  # #each_strongly_connected_component returns +nil+.
176  #
177  def each_strongly_connected_component # :yields: nodes
178    id_map = {}
179    stack = []
180    tsort_each_node {|node|
181      unless id_map.include? node
182        each_strongly_connected_component_from(node, id_map, stack) {|c|
183          yield c
184        }
185      end
186    }
187    nil
188  end
189
190  #
191  # Iterates over strongly connected component in the subgraph reachable from
192  # _node_.
193  #
194  # Return value is unspecified.
195  #
196  # #each_strongly_connected_component_from doesn't call #tsort_each_node.
197  #
198  def each_strongly_connected_component_from(node, id_map={}, stack=[]) # :yields: nodes
199    minimum_id = node_id = id_map[node] = id_map.size
200    stack_length = stack.length
201    stack << node
202
203    tsort_each_child(node) {|child|
204      if id_map.include? child
205        child_id = id_map[child]
206        minimum_id = child_id if child_id && child_id < minimum_id
207      else
208        sub_minimum_id =
209          each_strongly_connected_component_from(child, id_map, stack) {|c|
210            yield c
211          }
212        minimum_id = sub_minimum_id if sub_minimum_id < minimum_id
213      end
214    }
215
216    if node_id == minimum_id
217      component = stack.slice!(stack_length .. -1)
218      component.each {|n| id_map[n] = nil}
219      yield component
220    end
221
222    minimum_id
223  end
224
225  #
226  # Should be implemented by a extended class.
227  #
228  # #tsort_each_node is used to iterate for all nodes over a graph.
229  #
230  def tsort_each_node # :yields: node
231    raise NotImplementedError.new
232  end
233
234  #
235  # Should be implemented by a extended class.
236  #
237  # #tsort_each_child is used to iterate for child nodes of _node_.
238  #
239  def tsort_each_child(node) # :yields: child
240    raise NotImplementedError.new
241  end
242end
243