1import difflib
2import functools
3import getopt
4import io
5import locale
6import os
7import sys
8
9import util
10from util import to_string
11
12class DiffFlags():
13    def __init__(self):
14        self.ignore_all_space = False
15        self.ignore_space_change = False
16        self.unified_diff = False
17        self.num_context_lines = 3
18        self.recursive_diff = False
19        self.strip_trailing_cr = False
20
21def getDirTree(path, basedir=""):
22    # Tree is a tuple of form (dirname, child_trees).
23    # An empty dir has child_trees = [], a file has child_trees = None.
24    child_trees = []
25    for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
26        for child_dir in child_dirs:
27            child_trees.append(getDirTree(child_dir, dirname))
28        for filename in files:
29            child_trees.append((filename, None))
30        return path, sorted(child_trees)
31
32def compareTwoFiles(flags, filepaths):
33    filelines = []
34    for file in filepaths:
35        if file == "-":
36            stdin_fileno = sys.stdin.fileno()
37            with os.fdopen(os.dup(stdin_fileno), 'rb') as stdin_bin:
38                filelines.append(stdin_bin.readlines())
39        else:
40            with open(file, 'rb') as file_bin:
41                filelines.append(file_bin.readlines())
42
43    try:
44        return compareTwoTextFiles(flags, filepaths, filelines,
45                                   locale.getpreferredencoding(False))
46    except UnicodeDecodeError:
47        try:
48            return compareTwoTextFiles(flags, filepaths, filelines, "utf-8")
49        except:
50            return compareTwoBinaryFiles(flags, filepaths, filelines)
51
52def compareTwoBinaryFiles(flags, filepaths, filelines):
53    exitCode = 0
54    if hasattr(difflib, 'diff_bytes'):
55        # python 3.5 or newer
56        diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0],
57                                   filelines[1], filepaths[0].encode(),
58                                   filepaths[1].encode(),
59                                   n = flags.num_context_lines)
60        diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
61    else:
62        # python 2.7
63        if flags.unified_diff:
64            func = difflib.unified_diff
65        else:
66            func = difflib.context_diff
67        diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1],
68                     n = flags.num_context_lines)
69
70    for diff in diffs:
71        sys.stdout.write(to_string(diff))
72        exitCode = 1
73    return exitCode
74
75def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding):
76    filelines = []
77    for lines_bin in filelines_bin:
78        lines = []
79        for line_bin in lines_bin:
80            line = line_bin.decode(encoding=encoding)
81            lines.append(line)
82        filelines.append(lines)
83
84    exitCode = 0
85    def compose2(f, g):
86        return lambda x: f(g(x))
87
88    f = lambda x: x
89    if flags.strip_trailing_cr:
90        f = compose2(lambda line: line.replace('\r\n', '\n'), f)
91    if flags.ignore_all_space or flags.ignore_space_change:
92        ignoreSpace = lambda line, separator: \
93                          separator.join(line.split()) + "\n"
94        ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if flags.ignore_all_space else ' ')
95        f = compose2(ignoreAllSpaceOrSpaceChange, f)
96
97    for idx, lines in enumerate(filelines):
98        filelines[idx]= [f(line) for line in lines]
99
100    func = difflib.unified_diff if flags.unified_diff else difflib.context_diff
101    for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
102                     n = flags.num_context_lines):
103        sys.stdout.write(to_string(diff))
104        exitCode = 1
105    return exitCode
106
107def printDirVsFile(dir_path, file_path):
108    if os.path.getsize(file_path):
109        msg = "File %s is a directory while file %s is a regular file"
110    else:
111        msg = "File %s is a directory while file %s is a regular empty file"
112    sys.stdout.write(msg % (dir_path, file_path) + "\n")
113
114def printFileVsDir(file_path, dir_path):
115    if os.path.getsize(file_path):
116        msg = "File %s is a regular file while file %s is a directory"
117    else:
118        msg = "File %s is a regular empty file while file %s is a directory"
119    sys.stdout.write(msg % (file_path, dir_path) + "\n")
120
121def printOnlyIn(basedir, path, name):
122    sys.stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
123
124def compareDirTrees(flags, dir_trees, base_paths=["", ""]):
125    # Dirnames of the trees are not checked, it's caller's responsibility,
126    # as top-level dirnames are always different. Base paths are important
127    # for doing os.walk, but we don't put it into tree's dirname in order
128    # to speed up string comparison below and while sorting in getDirTree.
129    left_tree, right_tree = dir_trees[0], dir_trees[1]
130    left_base, right_base = base_paths[0], base_paths[1]
131
132    # Compare two files or report file vs. directory mismatch.
133    if left_tree[1] is None and right_tree[1] is None:
134        return compareTwoFiles(flags,
135                               [os.path.join(left_base, left_tree[0]),
136                                os.path.join(right_base, right_tree[0])])
137
138    if left_tree[1] is None and right_tree[1] is not None:
139        printFileVsDir(os.path.join(left_base, left_tree[0]),
140                       os.path.join(right_base, right_tree[0]))
141        return 1
142
143    if left_tree[1] is not None and right_tree[1] is None:
144        printDirVsFile(os.path.join(left_base, left_tree[0]),
145                       os.path.join(right_base, right_tree[0]))
146        return 1
147
148    # Compare two directories via recursive use of compareDirTrees.
149    exitCode = 0
150    left_names = [node[0] for node in left_tree[1]]
151    right_names = [node[0] for node in right_tree[1]]
152    l, r = 0, 0
153    while l < len(left_names) and r < len(right_names):
154        # Names are sorted in getDirTree, rely on that order.
155        if left_names[l] < right_names[r]:
156            exitCode = 1
157            printOnlyIn(left_base, left_tree[0], left_names[l])
158            l += 1
159        elif left_names[l] > right_names[r]:
160            exitCode = 1
161            printOnlyIn(right_base, right_tree[0], right_names[r])
162            r += 1
163        else:
164            exitCode |= compareDirTrees(flags,
165                                        [left_tree[1][l], right_tree[1][r]],
166                                        [os.path.join(left_base, left_tree[0]),
167                                        os.path.join(right_base, right_tree[0])])
168            l += 1
169            r += 1
170
171    # At least one of the trees has ended. Report names from the other tree.
172    while l < len(left_names):
173        exitCode = 1
174        printOnlyIn(left_base, left_tree[0], left_names[l])
175        l += 1
176    while r < len(right_names):
177        exitCode = 1
178        printOnlyIn(right_base, right_tree[0], right_names[r])
179        r += 1
180    return exitCode
181
182def main(argv):
183    if sys.platform == "win32":
184        if hasattr(sys.stdout, 'buffer'):
185            # python 3
186            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, newline='\n')
187        else:
188            # python 2.7
189            import msvcrt
190            msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
191    args = argv[1:]
192    try:
193        opts, args = getopt.gnu_getopt(args, "wbuU:r", ["strip-trailing-cr"])
194    except getopt.GetoptError as err:
195        sys.stderr.write("Unsupported: 'diff': %s\n" % str(err))
196        sys.exit(1)
197
198    flags = DiffFlags()
199    filelines, filepaths, dir_trees = ([] for i in range(3))
200    for o, a in opts:
201        if o == "-w":
202            flags.ignore_all_space = True
203        elif o == "-b":
204            flags.ignore_space_change = True
205        elif o == "-u":
206            flags.unified_diff = True
207        elif o.startswith("-U"):
208            flags.unified_diff = True
209            try:
210                flags.num_context_lines = int(a)
211                if flags.num_context_lines < 0:
212                    raise ValueException
213            except:
214                sys.stderr.write("Error: invalid '-U' argument: {}\n"
215                                 .format(a))
216                sys.exit(1)
217        elif o == "-r":
218            flags.recursive_diff = True
219        elif o == "--strip-trailing-cr":
220            flags.strip_trailing_cr = True
221        else:
222            assert False, "unhandled option"
223
224    if len(args) != 2:
225        sys.stderr.write("Error: missing or extra operand\n")
226        sys.exit(1)
227
228    exitCode = 0
229    try:
230        for file in args:
231            if file != "-" and not os.path.isabs(file):
232                file = os.path.realpath(os.path.join(os.getcwd(), file))
233
234            if flags.recursive_diff:
235                if file == "-":
236                    sys.stderr.write("Error: cannot recursively compare '-'\n")
237                    sys.exit(1)
238                dir_trees.append(getDirTree(file))
239            else:
240                filepaths.append(file)
241
242        if not flags.recursive_diff:
243            exitCode = compareTwoFiles(flags, filepaths)
244        else:
245            exitCode = compareDirTrees(flags, dir_trees)
246
247    except IOError as err:
248        sys.stderr.write("Error: 'diff' command failed, %s\n" % str(err))
249        exitCode = 1
250
251    sys.exit(exitCode)
252
253if __name__ == "__main__":
254    main(sys.argv)
255