1#!/usr/bin/python
2#
3# Copyright 2014, NICTA
4#
5# This software may be distributed and modified according to the terms of
6# the BSD 2-Clause license. Note that NO WARRANTY is provided.
7# See "LICENSE_BSD2.txt" for details.
8#
9# @TAG(NICTA_BSD)
10#
11
12import os
13import copy
14import heapq
15import glob
16import subprocess
17import itertools
18import argparse
19import sys
20
21from lxml import etree
22
23REGRESSION_DIR = os.path.dirname(os.path.realpath(__file__))
24REGRESSION_DTD = os.path.join(REGRESSION_DIR, "regression.dtd")
25
26class TestSpecParseException(Exception):
27    pass
28
29class TestEnv():
30    def __init__(self, pwd):
31        self.pwd = pwd
32        self.cwd = "."
33        self.timeout = 0
34        self.depends = set()
35
36class Test():
37    def __init__(self, name, command, timeout=0, cwd="", depends=None):
38        self.name = name
39        self.command = command
40        self.timeout = timeout
41        self.cwd = cwd
42
43        if depends == None:
44            depends = set([])
45        self.depends = depends
46
47def parse_attributes(tag, env, strict=True):
48    """Parse attributes such as "timeout" in the given XML tag,
49    updating the given "env" to reflect them."""
50    if tag.get("timeout"):
51        try:
52            env.timeout = int(tag.get("timeout"))
53        except:
54            if strict:
55                raise
56    if tag.get("cwd"):
57        env.cwd = tag.get("cwd")
58    if tag.get("depends"):
59        env.depends |= set(tag.get("depends").split())
60
61def parse_test(doc, env, strict=True):
62    """Parse a <test> tag."""
63    env = copy.deepcopy(env)
64    parse_attributes(doc, env, strict=strict)
65    return Test(doc.get("name"), doc.text.strip(),
66            timeout=env.timeout,
67            cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
68            depends=env.depends)
69
70def parse_sequence(doc, env, strict=True):
71    # Create a copy of env so that the scope is restored.
72    env = copy.deepcopy(env)
73
74    # Parse attributes.
75    parse_attributes(doc, env)
76
77    # Parse children.
78    tests = []
79    for child in doc:
80        if child.tag == "set":
81            # Parse set, recording dependencies of the tests inside the set.
82            new_tests = parse_set(child, env, strict=strict)
83            for x in new_tests:
84                env.depends.add(x.name)
85            tests += new_tests
86        elif child.tag == "sequence":
87            # Parse sequence, recording dependencies of the tests inside the set.
88            new_tests = parse_sequence(child, env, strict=strict)
89            for x in new_tests:
90                env.depends.add(x.name)
91            tests += new_tests
92        elif child.tag == "test":
93            tests.append(parse_test(child, env, strict=strict))
94            env.depends.add(tests[-1].name)
95        elif strict:
96            raise TestSpecParseException("Unknown tag '%s'" % child.tag)
97
98    return tests
99
100def parse_set(doc, env, strict=True):
101    # Create a copy of env so that the scope is restored.
102    env = copy.deepcopy(env)
103
104    # Parse attributes.
105    parse_attributes(doc, env, strict=strict)
106
107    # Parse children.
108    tests = []
109    for child in doc:
110        if child.tag == "set":
111            tests += parse_set(child, env, strict=strict)
112        elif child.tag == "sequence":
113            tests += parse_sequence(child, env, strict=strict)
114        elif child.tag == "test":
115            tests.append(parse_test(child, env, strict=strict))
116        elif strict:
117            raise TestSpecParseException("Unknown tag '%s'" % child.tag)
118
119    return tests
120
121def find_cycle(keys, depends_on):
122    """Find the shortest cycle in the input graph. Unnecessarily O(n**2)."""
123    def dfs(n):
124        safe = set()
125        active = set()
126        def do_dfs(n):
127            if n in safe:
128                return None
129            if n in active:
130                return [n]
131            active.add(n)
132            for c in depends_on(n):
133                x = do_dfs(c)
134                if x != None:
135                    return [n] + x
136            active.discard(n)
137            safe.add(n)
138        return do_dfs(n)
139    shortest_cycle = None
140    for i in keys:
141        x = dfs(i)
142        if x != None and (shortest_cycle == None
143                or len(x) < len(shortest_cycle)):
144            shortest_cycle = x
145    return shortest_cycle
146
147def toposort(keys, prio, depends_on):
148    """topological sort of keys.
149
150    Perform a toposort for keys, trying to order elements by the priority
151    returned by function "prio" as closely as possible without breaking
152    dependencies.
153    """
154    #
155    # We start by creating a dictionary of which tests are dependent on others,
156    # and then how many outstanding dependencies each test has.
157    #
158    # Instead of using "dependents" and "dependencies", we use "parents" and
159    # "children". A parent must be processed before its child.
160    #
161    keys = sorted(keys, key=prio)
162    children = {}
163    num_parents = {}
164    for key in keys:
165        num_parents[key] = len(depends_on(key))
166        for parent in depends_on(key):
167            children.setdefault(parent, set()).add(key)
168
169    #
170    # Generate heap of tests without a parent, and keep popping off
171    # the heap and processing the tests.
172    #
173    final_order = []
174    parentless = sorted([(prio(k), k) for k in keys if num_parents[k] == 0])
175    while len(parentless) > 0:
176        (p, k) = heapq.heappop(parentless)
177        final_order.append(k)
178        for s in children.get(k, []):
179            num_parents[s] -= 1
180            if num_parents[s] == 0:
181                heapq.heappush(parentless, (prio(s), s))
182
183    # Ensure we saw everybody. If we didn't, there is a cycle.
184    if len(keys) != len(final_order):
185        shortest_cycle = find_cycle(keys, depends_on)
186        raise ValueError("Circular dependency involving: %s" %
187                (" -> ".join(shortest_cycle)))
188
189    return final_order
190
191def validate_xml(filename):
192    """Ensure the XML matches the regression DTD."""
193
194    # Read in the DTD
195    with open(REGRESSION_DTD) as dtd_file:
196        dtd = etree.DTD(dtd_file)
197
198    # Parse the file, and validate against the DTD.
199    parser = etree.XMLParser(remove_comments=True)
200    doc = etree.parse(filename, parser=parser)
201    if not dtd.validate(doc):
202        raise Exception(
203                "%s does not validate against DTD:\n\n" % filename
204                + str(dtd.error_log))
205
206def parse_testsuite_xml(filename, strict=True):
207
208    # Validate the XML if requested.
209    if strict:
210        validate_xml(filename)
211
212    # Parse the file. We try to keep reading broken XML. If "strict" is false,
213    # keep trying to parse over broken XML.
214    parser = etree.XMLParser(remove_comments=True, recover=(not strict))
215    doc = etree.parse(filename, parser=parser).getroot()
216
217    # Setup an empty environment
218    env = TestEnv(os.path.dirname(filename))
219
220    # Parse this tag as a set of tests.
221    return parse_set(doc, env, strict=strict)
222
223def process_tests(tests, strict=False):
224    """Given a list of tests (possibly from multiple XML file), check for
225    errors and return a list of tests in dependency-satisfying order."""
226
227    # Check for duplicate names.
228    seen_names = set()
229    for t in tests:
230        if t.name in seen_names:
231            if strict:
232                raise TestSpecParseException("Duplicate test name detected: %s" % t.name)
233            for x in itertools.count(2):
234                proposed_name = "%s_%d" % (t.name, x)
235                if not proposed_name in seen_names:
236                    t.name = proposed_name
237                    break
238        seen_names.add(t.name)
239
240    # Check dependencies.
241    valid_names = set()
242    for test in tests:
243        valid_names.add(test.name)
244    for test in tests:
245        test_depends = sorted(test.depends)
246        for dependency_name in test_depends:
247            if not dependency_name in valid_names:
248                if strict:
249                    raise TestSpecParseException(
250                            "Depedency '%s' invalid." % dependency_name)
251                test.depends.remove(dependency_name)
252
253   # Toposort.
254    test_ordering = {}
255    for (n, t) in enumerate(tests):
256        test_ordering[t.name] = n
257    test_depends = {}
258    for t in tests:
259        test_depends[t.name] = t.depends
260    try:
261        ordering = toposort([t.name for t in tests],
262                lambda x: test_ordering[x],
263                lambda x: test_depends[x])
264    except ValueError, e:
265        if strict:
266            raise TestSpecParseException(
267                    "Cycle in dependencies: %s" % e.message)
268        else:
269            # There is a cycle, but we want to continue anyway.
270            # Just ignore all deps and hope for the best.
271            ordering = dict((t, n) for (n, t)
272                    in enumerate(sorted([t.name for t in tests])))
273    ordering = dict((t, n) for (n, t) in enumerate(ordering))
274    tests = sorted(tests, key=lambda k: ordering[k.name])
275
276    return tests
277
278def legacy_testspec(root):
279    """Find tests inside makefiles."""
280
281    # Find candidate "IsaMakefile"s
282    candidates = sorted(
283        glob.glob(os.path.join(root, "*", "IsaMakefile"))
284        + glob.glob(os.path.join(root, "*", "*", "IsaMakefile")))
285
286    # Get isabelle binary.
287    isabelle_bin = os.path.abspath(os.path.join(root, "isabelle", "bin", "isabelle"))
288
289    # Run "isabelle make report-regression" on each.
290    def report_regression(filename):
291        filename = os.path.abspath(filename)
292        base_name = os.path.split(os.path.dirname(filename))[1]
293        try:
294            with open("/dev/null", "w") as devnull:
295                results = subprocess.check_output(
296                    [isabelle_bin, "make", "-f", filename, "report-regression"],
297                    cwd=os.path.dirname(filename),
298                    stderr=devnull)
299            return [(base_name + "/" + x, x) for x in results.strip().split()]
300        except subprocess.CalledProcessError:
301            return []
302
303    # Search for tests.
304    tests = []
305    for candidate in candidates:
306        targets = report_regression(os.path.abspath(candidate))
307        for (name, target) in targets:
308            new_test = Test(name, "isabelle make " + target, timeout=4*3600,
309                        cwd=os.path.dirname(os.path.abspath(candidate)))
310            tests.append(new_test)
311    return tests
312
313def parse_test_files(xml_files, strict=False):
314    tests = []
315    seen_files = set()
316    for x in xml_files:
317        # Some files may be symlinked; don't process them multiple times.
318        try:
319            st = os.stat(x)
320            if (st.st_dev, st.st_ino) in seen_files:
321                continue
322            seen_files.add((st.st_dev, st.st_ino))
323        except OSError:
324            pass
325
326        try:
327            tests += parse_testsuite_xml(x)
328        except:
329            sys.stderr.write("Exception while parsing file: %s.\n" % x)
330            if strict:
331                raise
332    return process_tests(tests, strict=strict)
333
334def main():
335    # Parse arguments
336    parser = argparse.ArgumentParser(description="Regression Framework Testspec Parser")
337    parser.add_argument("file", metavar="FILE", type=str, nargs="*",
338            help="a regression XML file to parse")
339    parser.add_argument("-r", "--relax", action="store_false", dest="strict",
340            help="be less strict when parsing XML files")
341    parser.add_argument("-l", "--legacy", action="store_true",
342            help="use legacy 'IsaMakefile' specs")
343    args = parser.parse_args()
344
345    # Ensure we are either in legacy more or we have at least one file.
346    if not args.legacy and len(args.file) == 0:
347        parser.error("Please provide at least one XML file.")
348    if args.legacy and len(args.file) > 0:
349        parser.error("Can not use both legacy mode and XML files.")
350
351    if args.legacy:
352        # Fetch legacy tests.
353        tests = legacy_testspec(os.getcwd())
354    else:
355        # Fetch XML tests.
356        tests = parse_test_files(args.file, strict=args.strict)
357
358    # Print results
359    for test in tests:
360        print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
361            test.command, test.timeout, ",".join(test.depends), test.cwd))
362
363
364if __name__ == "__main__":
365    main()
366
367