1#!/usr/bin/env python
2
3"""Helps to keep BUILD.gn files in sync with the corresponding CMakeLists.txt.
4
5For each BUILD.gn file in the tree, checks if the list of cpp files in
6it is identical to the list of cpp files in the corresponding CMakeLists.txt
7file, and prints the difference if not.
8
9Also checks that each CMakeLists.txt file below unittests/ folders that define
10binaries have corresponding BUILD.gn files.
11
12If --write is passed, tries to write modified .gn files and adds one git
13commit for each cmake commit this merges. If an error is reported, the state
14of HEAD is unspecified; run `git reset --hard origin/master` if this happens.
15"""
16
17from __future__ import print_function
18
19from collections import defaultdict
20import os
21import re
22import subprocess
23import sys
24
25
26def patch_gn_file(gn_file, add, remove):
27    with open(gn_file) as f:
28        gn_contents = f.read()
29    if add:
30        srcs_tok = 'sources = ['
31        tokloc = gn_contents.find(srcs_tok)
32        while gn_contents.startswith('sources = []', tokloc):
33            tokloc = gn_contents.find(srcs_tok, tokloc + 1)
34        if tokloc == -1: raise ValueError(gn_file + ': No source list')
35        if gn_contents.find(srcs_tok, tokloc + 1) != -1:
36            raise ValueError(gn_file + ': Multiple source lists')
37        if gn_contents.find('# NOSORT', 0, tokloc) != -1:
38            raise ValueError(gn_file + ': Found # NOSORT, needs manual merge')
39        tokloc += len(srcs_tok)
40        for a in add:
41            gn_contents = (gn_contents[:tokloc] + ('"%s",' % a) +
42                           gn_contents[tokloc:])
43    for r in remove:
44        gn_contents = gn_contents.replace('"%s",' % r, '')
45    with open(gn_file, 'w') as f:
46        f.write(gn_contents)
47
48    # Run `gn format`.
49    gn = os.path.join(os.path.dirname(__file__), '..', 'gn.py')
50    subprocess.check_call([sys.executable, gn, 'format', '-q', gn_file])
51
52
53def sync_source_lists(write):
54    # Use shell=True on Windows in case git is a bat file.
55    def git(args): subprocess.check_call(['git'] + args, shell=os.name == 'nt')
56    def git_out(args):
57        return subprocess.check_output(['git'] + args, shell=os.name == 'nt',
58                                       universal_newlines=True)
59    gn_files = git_out(['ls-files', '*BUILD.gn']).splitlines()
60
61    # Matches e.g. |   "foo.cpp",|, captures |foo| in group 1.
62    gn_cpp_re = re.compile(r'^\s*"([^$"]+\.(?:cpp|c|h|S))",$', re.MULTILINE)
63    # Matches e.g. |   bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1.
64    gn_cpp_re2 = re.compile(
65        r'^\s*(?:.*_)?sources \+?= \[ "([^$"]+\.(?:cpp|c|h|S))" ]$',
66        re.MULTILINE)
67    # Matches e.g. |   foo.cpp|, captures |foo| in group 1.
68    cmake_cpp_re = re.compile(r'^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$',
69                              re.MULTILINE)
70
71    changes_by_rev = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
72
73    def find_gitrev(touched_line, in_file):
74        # re.escape() escapes e.g. '-', which works in practice but has
75        # undefined behavior according to the POSIX extended regex spec.
76        posix_re_escape = lambda s: re.sub(r'([.[{()\\*+?|^$])', r'\\\1', s)
77        cmd = ['log', '--format=%h', '-1', '--pickaxe-regex',
78               r'-S\b%s\b' % posix_re_escape(touched_line), in_file]
79        return git_out(cmd).rstrip()
80
81    # Collect changes to gn files, grouped by revision.
82    for gn_file in gn_files:
83        # The CMakeLists.txt for llvm/utils/gn/secondary/foo/BUILD.gn is
84        # at foo/CMakeLists.txt.
85        strip_prefix = 'llvm/utils/gn/secondary/'
86        if not gn_file.startswith(strip_prefix):
87            continue
88        cmake_file = os.path.join(
89                os.path.dirname(gn_file[len(strip_prefix):]), 'CMakeLists.txt')
90        if not os.path.exists(cmake_file):
91            continue
92
93        def get_sources(source_re, text):
94            return set([m.group(1) for m in source_re.finditer(text)])
95        gn_cpp = get_sources(gn_cpp_re, open(gn_file).read())
96        gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read())
97        cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read())
98
99        if gn_cpp == cmake_cpp:
100            continue
101
102        def by_rev(files, key):
103            for f in files:
104                rev = find_gitrev(f, cmake_file)
105                changes_by_rev[rev][gn_file][key].append(f)
106        by_rev(sorted(cmake_cpp - gn_cpp), 'add')
107        by_rev(sorted(gn_cpp - cmake_cpp), 'remove')
108
109    # Output necessary changes grouped by revision.
110    for rev in sorted(changes_by_rev):
111        print('[gn build] Port {0} -- https://reviews.llvm.org/rG{0}'
112            .format(rev))
113        for gn_file, data in sorted(changes_by_rev[rev].items()):
114            add = data.get('add', [])
115            remove = data.get('remove', [])
116            if write:
117                patch_gn_file(gn_file, add, remove)
118                git(['add', gn_file])
119            else:
120                print('  ' + gn_file)
121                if add:
122                    print('   add:\n' + '\n'.join('    "%s",' % a for a in add))
123                if remove:
124                    print('   remove:\n    ' + '\n    '.join(remove))
125                print()
126        if write:
127            git(['commit', '-m', '[gn build] Port %s' % rev])
128        else:
129            print()
130
131    return bool(changes_by_rev) and not write
132
133
134def sync_unittests():
135    # Matches e.g. |add_llvm_unittest_with_input_files|.
136    unittest_re = re.compile(r'^add_\S+_unittest', re.MULTILINE)
137
138    checked = [ 'clang', 'clang-tools-extra', 'lld', 'llvm' ]
139    changed = False
140    for c in checked:
141        for root, _, _ in os.walk(os.path.join(c, 'unittests')):
142            cmake_file = os.path.join(root, 'CMakeLists.txt')
143            if not os.path.exists(cmake_file):
144                continue
145            if not unittest_re.search(open(cmake_file).read()):
146                continue  # Skip CMake files that just add subdirectories.
147            gn_file = os.path.join('llvm/utils/gn/secondary', root, 'BUILD.gn')
148            if not os.path.exists(gn_file):
149                changed = True
150                print('missing GN file %s for unittest CMake file %s' %
151                      (gn_file, cmake_file))
152    return changed
153
154
155def main():
156    src = sync_source_lists(len(sys.argv) > 1 and sys.argv[1] == '--write')
157    tests = sync_unittests()
158    if src or tests:
159        sys.exit(1)
160
161
162if __name__ == '__main__':
163    main()
164