1#!/usr/bin/env python
2
3"""
4Static Analyzer qualification infrastructure.
5
6The goal is to test the analyzer against different projects,
7check for failures, compare results, and measure performance.
8
9Repository Directory will contain sources of the projects as well as the
10information on how to build them and the expected output.
11Repository Directory structure:
12   - ProjectMap file
13   - Historical Performance Data
14   - Project Dir1
15     - ReferenceOutput
16   - Project Dir2
17     - ReferenceOutput
18   ..
19Note that the build tree must be inside the project dir.
20
21To test the build of the analyzer one would:
22   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
23     the build directory does not pollute the repository to min network
24     traffic).
25   - Build all projects, until error. Produce logs to report errors.
26   - Compare results.
27
28The files which should be kept around for failure investigations:
29   RepositoryCopy/Project DirI/ScanBuildResults
30   RepositoryCopy/Project DirI/run_static_analyzer.log
31
32Assumptions (TODO: shouldn't need to assume these.):
33   The script is being run from the Repository Directory.
34   The compiler for scan-build and scan-build are in the PATH.
35   export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
36
37For more logging, set the  env variables:
38   zaks:TI zaks$ export CCC_ANALYZER_LOG=1
39   zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
40
41The list of checkers tested are hardcoded in the Checkers variable.
42For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
43variable. It should contain a comma separated list.
44"""
45import CmpRuns
46import SATestUtils as utils
47from ProjectMap import DownloadType, ProjectInfo
48
49import glob
50import logging
51import math
52import multiprocessing
53import os
54import plistlib
55import shutil
56import sys
57import threading
58import time
59import zipfile
60
61from queue import Queue
62# mypy has problems finding InvalidFileException in the module
63# and this is we can shush that false positive
64from plistlib import InvalidFileException  # type:ignore
65from subprocess import CalledProcessError, check_call
66from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
67
68
69###############################################################################
70# Helper functions.
71###############################################################################
72
73class StreamToLogger:
74    def __init__(self, logger: logging.Logger,
75                 log_level: int = logging.INFO):
76        self.logger = logger
77        self.log_level = log_level
78
79    def write(self, message: str):
80        # Rstrip in order not to write an extra newline.
81        self.logger.log(self.log_level, message.rstrip())
82
83    def flush(self):
84        pass
85
86    def fileno(self) -> int:
87        return 0
88
89
90LOCAL = threading.local()
91
92
93def init_logger(name: str):
94    # TODO: use debug levels for VERBOSE messages
95    logger = logging.getLogger(name)
96    logger.setLevel(logging.DEBUG)
97    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
98    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
99
100
101init_logger("main")
102
103
104def stderr(message: str):
105    LOCAL.stderr.write(message)
106
107
108def stdout(message: str):
109    LOCAL.stdout.write(message)
110
111
112logging.basicConfig(
113    format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
114
115
116###############################################################################
117# Configuration setup.
118###############################################################################
119
120
121# Find Clang for static analysis.
122if 'CC' in os.environ:
123    cc_candidate: Optional[str] = os.environ['CC']
124else:
125    cc_candidate = utils.which("clang", os.environ['PATH'])
126if not cc_candidate:
127    stderr("Error: cannot find 'clang' in PATH")
128    sys.exit(1)
129
130CLANG = cc_candidate
131
132# Number of jobs.
133MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
134
135# Names of the project specific scripts.
136# The script that downloads the project.
137DOWNLOAD_SCRIPT = "download_project.sh"
138# The script that needs to be executed before the build can start.
139CLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
140# This is a file containing commands for scan-build.
141BUILD_SCRIPT = "run_static_analyzer.cmd"
142
143# A comment in a build script which disables wrapping.
144NO_PREFIX_CMD = "#NOPREFIX"
145
146# The log file name.
147LOG_DIR_NAME = "Logs"
148BUILD_LOG_NAME = "run_static_analyzer.log"
149# Summary file - contains the summary of the failures. Ex: This info can be be
150# displayed when buildbot detects a build failure.
151NUM_OF_FAILURES_IN_SUMMARY = 10
152
153# The scan-build result directory.
154OUTPUT_DIR_NAME = "ScanBuildResults"
155REF_PREFIX = "Ref"
156
157# The name of the directory storing the cached project source. If this
158# directory does not exist, the download script will be executed.
159# That script should create the "CachedSource" directory and download the
160# project source into it.
161CACHED_SOURCE_DIR_NAME = "CachedSource"
162
163# The name of the directory containing the source code that will be analyzed.
164# Each time a project is analyzed, a fresh copy of its CachedSource directory
165# will be copied to the PatchedSource directory and then the local patches
166# in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
167PATCHED_SOURCE_DIR_NAME = "PatchedSource"
168
169# The name of the patchfile specifying any changes that should be applied
170# to the CachedSource before analyzing.
171PATCHFILE_NAME = "changes_for_analyzer.patch"
172
173# The list of checkers used during analyzes.
174# Currently, consists of all the non-experimental checkers, plus a few alpha
175# checkers we don't want to regress on.
176CHECKERS = ",".join([
177    "alpha.unix.SimpleStream",
178    "alpha.security.taint",
179    "cplusplus.NewDeleteLeaks",
180    "core",
181    "cplusplus",
182    "deadcode",
183    "security",
184    "unix",
185    "osx",
186    "nullability"
187])
188
189VERBOSE = 0
190
191
192###############################################################################
193# Test harness logic.
194###############################################################################
195
196
197def run_cleanup_script(directory: str, build_log_file: IO):
198    """
199    Run pre-processing script if any.
200    """
201    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
202    script_path = os.path.join(directory, CLEANUP_SCRIPT)
203
204    utils.run_script(script_path, build_log_file, cwd,
205                     out=LOCAL.stdout, err=LOCAL.stderr,
206                     verbose=VERBOSE)
207
208
209class TestInfo(NamedTuple):
210    """
211    Information about a project and settings for its analysis.
212    """
213    project: ProjectInfo
214    override_compiler: bool = False
215    extra_analyzer_config: str = ""
216    extra_checkers: str = ""
217    is_reference_build: bool = False
218    strictness: int = 0
219
220
221# typing package doesn't have a separate type for Queue, but has a generic stub
222# We still want to have a type-safe checked project queue, for this reason,
223# we specify generic type for mypy.
224#
225# It is a common workaround for this situation:
226# https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
227if TYPE_CHECKING:
228    TestQueue = Queue[TestInfo]  # this is only processed by mypy
229else:
230    TestQueue = Queue  # this will be executed at runtime
231
232
233class RegressionTester:
234    """
235    A component aggregating all of the project testing.
236    """
237
238    def __init__(self, jobs: int, projects: List[ProjectInfo],
239                 override_compiler: bool, extra_analyzer_config: str,
240                 extra_checkers: str,
241                 regenerate: bool, strictness: bool):
242        self.jobs = jobs
243        self.projects = projects
244        self.override_compiler = override_compiler
245        self.extra_analyzer_config = extra_analyzer_config
246        self.extra_checkers = extra_checkers
247        self.regenerate = regenerate
248        self.strictness = strictness
249
250    def test_all(self) -> bool:
251        projects_to_test: List[TestInfo] = []
252
253        # Test the projects.
254        for project in self.projects:
255            projects_to_test.append(
256                TestInfo(project,
257                         self.override_compiler,
258                         self.extra_analyzer_config,
259                         self.extra_checkers,
260                         self.regenerate, self.strictness))
261        if self.jobs <= 1:
262            return self._single_threaded_test_all(projects_to_test)
263        else:
264            return self._multi_threaded_test_all(projects_to_test)
265
266    def _single_threaded_test_all(self,
267                                  projects_to_test: List[TestInfo]) -> bool:
268        """
269        Run all projects.
270        :return: whether tests have passed.
271        """
272        success = True
273        for project_info in projects_to_test:
274            tester = ProjectTester(project_info)
275            success &= tester.test()
276        return success
277
278    def _multi_threaded_test_all(self,
279                                 projects_to_test: List[TestInfo]) -> bool:
280        """
281        Run each project in a separate thread.
282
283        This is OK despite GIL, as testing is blocked
284        on launching external processes.
285
286        :return: whether tests have passed.
287        """
288        tasks_queue = TestQueue()
289
290        for project_info in projects_to_test:
291            tasks_queue.put(project_info)
292
293        results_differ = threading.Event()
294        failure_flag = threading.Event()
295
296        for _ in range(self.jobs):
297            T = TestProjectThread(tasks_queue, results_differ, failure_flag)
298            T.start()
299
300        # Required to handle Ctrl-C gracefully.
301        while tasks_queue.unfinished_tasks:
302            time.sleep(0.1)  # Seconds.
303            if failure_flag.is_set():
304                stderr("Test runner crashed\n")
305                sys.exit(1)
306        return not results_differ.is_set()
307
308
309class ProjectTester:
310    """
311    A component aggregating testing for one project.
312    """
313
314    def __init__(self, test_info: TestInfo, silent: bool = False):
315        self.project = test_info.project
316        self.override_compiler = test_info.override_compiler
317        self.extra_analyzer_config = test_info.extra_analyzer_config
318        self.extra_checkers = test_info.extra_checkers
319        self.is_reference_build = test_info.is_reference_build
320        self.strictness = test_info.strictness
321        self.silent = silent
322
323    def test(self) -> bool:
324        """
325        Test a given project.
326        :return tests_passed: Whether tests have passed according
327        to the :param strictness: criteria.
328        """
329        if not self.project.enabled:
330            self.out(
331                f" \n\n--- Skipping disabled project {self.project.name}\n")
332            return True
333
334        self.out(f" \n\n--- Building project {self.project.name}\n")
335
336        start_time = time.time()
337
338        project_dir = self.get_project_dir()
339        self.vout(f"  Build directory: {project_dir}.\n")
340
341        # Set the build results directory.
342        output_dir = self.get_output_dir()
343
344        self.build(project_dir, output_dir)
345        check_build(output_dir)
346
347        if self.is_reference_build:
348            cleanup_reference_results(output_dir)
349            passed = True
350        else:
351            passed = run_cmp_results(project_dir, self.strictness)
352
353        self.out(f"Completed tests for project {self.project.name} "
354                 f"(time: {time.time() - start_time:.2f}).\n")
355
356        return passed
357
358    def get_project_dir(self) -> str:
359        return os.path.join(os.path.abspath(os.curdir), self.project.name)
360
361    def get_output_dir(self) -> str:
362        if self.is_reference_build:
363            dirname = REF_PREFIX + OUTPUT_DIR_NAME
364        else:
365            dirname = OUTPUT_DIR_NAME
366
367        return os.path.join(self.get_project_dir(), dirname)
368
369    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
370        build_log_path = get_build_log_path(output_dir)
371
372        self.out(f"Log file: {build_log_path}\n")
373        self.out(f"Output directory: {output_dir}\n")
374
375        remove_log_file(output_dir)
376
377        # Clean up scan build results.
378        if os.path.exists(output_dir):
379            self.vout(f"  Removing old results: {output_dir}\n")
380
381            shutil.rmtree(output_dir)
382
383        assert(not os.path.exists(output_dir))
384        os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
385
386        # Build and analyze the project.
387        with open(build_log_path, "w+") as build_log_file:
388            if self.project.mode == 1:
389                self._download_and_patch(directory, build_log_file)
390                run_cleanup_script(directory, build_log_file)
391                build_time, memory = self.scan_build(directory, output_dir,
392                                                     build_log_file)
393            else:
394                build_time, memory = self.analyze_preprocessed(directory,
395                                                               output_dir)
396
397            if self.is_reference_build:
398                run_cleanup_script(directory, build_log_file)
399                normalize_reference_results(directory, output_dir,
400                                            self.project.mode)
401
402        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
403                 f"peak memory: {utils.memory_to_str(memory)}). "
404                 f"See the log for more details: {build_log_path}\n")
405
406        return build_time, memory
407
408    def scan_build(self, directory: str, output_dir: str,
409                   build_log_file: IO) -> Tuple[float, int]:
410        """
411        Build the project with scan-build by reading in the commands and
412        prefixing them with the scan-build options.
413        """
414        build_script_path = os.path.join(directory, BUILD_SCRIPT)
415        if not os.path.exists(build_script_path):
416            stderr(f"Error: build script is not defined: "
417                   f"{build_script_path}\n")
418            sys.exit(1)
419
420        all_checkers = CHECKERS
421        if 'SA_ADDITIONAL_CHECKERS' in os.environ:
422            all_checkers = (all_checkers + ',' +
423                            os.environ['SA_ADDITIONAL_CHECKERS'])
424        if self.extra_checkers != "":
425            all_checkers += "," + self.extra_checkers
426
427        # Run scan-build from within the patched source directory.
428        cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
429
430        options = f"--use-analyzer '{CLANG}' "
431        options += f"-plist-html -o '{output_dir}' "
432        options += f"-enable-checker {all_checkers} "
433        options += "--keep-empty "
434        options += f"-analyzer-config '{self.generate_config()}' "
435
436        if self.override_compiler:
437            options += "--override-compiler "
438
439        extra_env: Dict[str, str] = {}
440
441        execution_time = 0.0
442        peak_memory = 0
443
444        try:
445            command_file = open(build_script_path, "r")
446            command_prefix = "scan-build " + options + " "
447
448            for command in command_file:
449                command = command.strip()
450
451                if len(command) == 0:
452                    continue
453
454                # Custom analyzer invocation specified by project.
455                # Communicate required information using environment variables
456                # instead.
457                if command == NO_PREFIX_CMD:
458                    command_prefix = ""
459                    extra_env['OUTPUT'] = output_dir
460                    extra_env['CC'] = CLANG
461                    extra_env['ANALYZER_CONFIG'] = self.generate_config()
462                    continue
463
464                if command.startswith("#"):
465                    continue
466
467                # If using 'make', auto imply a -jX argument
468                # to speed up analysis.  xcodebuild will
469                # automatically use the maximum number of cores.
470                if (command.startswith("make ") or command == "make") and \
471                        "-j" not in command:
472                    command += f" -j{MAX_JOBS}"
473
474                command_to_run = command_prefix + command
475
476                self.vout(f"  Executing: {command_to_run}\n")
477
478                time, mem = utils.check_and_measure_call(
479                    command_to_run, cwd=cwd,
480                    stderr=build_log_file,
481                    stdout=build_log_file,
482                    env=dict(os.environ, **extra_env),
483                    shell=True)
484
485                execution_time += time
486                peak_memory = max(peak_memory, mem)
487
488        except CalledProcessError:
489            stderr("Error: scan-build failed. Its output was: \n")
490            build_log_file.seek(0)
491            shutil.copyfileobj(build_log_file, LOCAL.stderr)
492            sys.exit(1)
493
494        return execution_time, peak_memory
495
496    def analyze_preprocessed(self, directory: str,
497                             output_dir: str) -> Tuple[float, int]:
498        """
499        Run analysis on a set of preprocessed files.
500        """
501        if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
502            stderr(f"Error: The preprocessed files project "
503                   f"should not contain {BUILD_SCRIPT}\n")
504            raise Exception()
505
506        prefix = CLANG + " --analyze "
507
508        prefix += "--analyzer-output plist "
509        prefix += " -Xclang -analyzer-checker=" + CHECKERS
510        prefix += " -fcxx-exceptions -fblocks "
511        prefix += " -Xclang -analyzer-config "
512        prefix += f"-Xclang {self.generate_config()} "
513
514        if self.project.mode == 2:
515            prefix += "-std=c++11 "
516
517        plist_path = os.path.join(directory, output_dir, "date")
518        fail_path = os.path.join(plist_path, "failures")
519        os.makedirs(fail_path)
520
521        execution_time = 0.0
522        peak_memory = 0
523
524        for full_file_name in glob.glob(directory + "/*"):
525            file_name = os.path.basename(full_file_name)
526            failed = False
527
528            # Only run the analyzes on supported files.
529            if utils.has_no_extension(file_name):
530                continue
531            if not utils.is_valid_single_input_file(file_name):
532                stderr(f"Error: Invalid single input file {full_file_name}.\n")
533                raise Exception()
534
535            # Build and call the analyzer command.
536            plist_basename = os.path.join(plist_path, file_name)
537            output_option = f"-o '{plist_basename}.plist' "
538            command = f"{prefix}{output_option}'{file_name}'"
539
540            log_path = os.path.join(fail_path, file_name + ".stderr.txt")
541            with open(log_path, "w+") as log_file:
542                try:
543                    self.vout(f"  Executing: {command}\n")
544
545                    time, mem = utils.check_and_measure_call(
546                        command, cwd=directory, stderr=log_file,
547                        stdout=log_file, shell=True)
548
549                    execution_time += time
550                    peak_memory = max(peak_memory, mem)
551
552                except CalledProcessError as e:
553                    stderr(f"Error: Analyzes of {full_file_name} failed. "
554                           f"See {log_file.name} for details. "
555                           f"Error code {e.returncode}.\n")
556                    failed = True
557
558                # If command did not fail, erase the log file.
559                if not failed:
560                    os.remove(log_file.name)
561
562        return execution_time, peak_memory
563
564    def generate_config(self) -> str:
565        out = "serialize-stats=true,stable-report-filename=true"
566
567        if self.extra_analyzer_config:
568            out += "," + self.extra_analyzer_config
569
570        return out
571
572    def _download_and_patch(self, directory: str, build_log_file: IO):
573        """
574        Download the project and apply the local patchfile if it exists.
575        """
576        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
577
578        # If the we don't already have the cached source, run the project's
579        # download script to download it.
580        if not os.path.exists(cached_source):
581            self._download(directory, build_log_file)
582            if not os.path.exists(cached_source):
583                stderr(f"Error: '{cached_source}' not found after download.\n")
584                exit(1)
585
586        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
587
588        # Remove potentially stale patched source.
589        if os.path.exists(patched_source):
590            shutil.rmtree(patched_source)
591
592        # Copy the cached source and apply any patches to the copy.
593        shutil.copytree(cached_source, patched_source, symlinks=True)
594        self._apply_patch(directory, build_log_file)
595
596    def _download(self, directory: str, build_log_file: IO):
597        """
598        Run the script to download the project, if it exists.
599        """
600        if self.project.source == DownloadType.GIT:
601            self._download_from_git(directory, build_log_file)
602        elif self.project.source == DownloadType.ZIP:
603            self._unpack_zip(directory, build_log_file)
604        elif self.project.source == DownloadType.SCRIPT:
605            self._run_download_script(directory, build_log_file)
606        else:
607            raise ValueError(
608                f"Unknown source type '{self.project.source}' is found "
609                f"for the '{self.project.name}' project")
610
611    def _download_from_git(self, directory: str, build_log_file: IO):
612        repo = self.project.origin
613        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
614
615        check_call(f"git clone --recursive {repo} {cached_source}",
616                   cwd=directory, stderr=build_log_file,
617                   stdout=build_log_file, shell=True)
618        check_call(f"git checkout --quiet {self.project.commit}",
619                   cwd=cached_source, stderr=build_log_file,
620                   stdout=build_log_file, shell=True)
621
622    def _unpack_zip(self, directory: str, build_log_file: IO):
623        zip_files = list(glob.glob(directory + "/*.zip"))
624
625        if len(zip_files) == 0:
626            raise ValueError(
627                f"Couldn't find any zip files to unpack for the "
628                f"'{self.project.name}' project")
629
630        if len(zip_files) > 1:
631            raise ValueError(
632                f"Couldn't decide which of the zip files ({zip_files}) "
633                f"for the '{self.project.name}' project to unpack")
634
635        with zipfile.ZipFile(zip_files[0], "r") as zip_file:
636            zip_file.extractall(os.path.join(directory,
637                                             CACHED_SOURCE_DIR_NAME))
638
639    @staticmethod
640    def _run_download_script(directory: str, build_log_file: IO):
641        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
642        utils.run_script(script_path, build_log_file, directory,
643                         out=LOCAL.stdout, err=LOCAL.stderr,
644                         verbose=VERBOSE)
645
646    def _apply_patch(self, directory: str, build_log_file: IO):
647        patchfile_path = os.path.join(directory, PATCHFILE_NAME)
648        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
649
650        if not os.path.exists(patchfile_path):
651            self.out("  No local patches.\n")
652            return
653
654        self.out("  Applying patch.\n")
655        try:
656            check_call(f"patch -p1 < '{patchfile_path}'",
657                       cwd=patched_source,
658                       stderr=build_log_file,
659                       stdout=build_log_file,
660                       shell=True)
661
662        except CalledProcessError:
663            stderr(f"Error: Patch failed. "
664                   f"See {build_log_file.name} for details.\n")
665            sys.exit(1)
666
667    def out(self, what: str):
668        if not self.silent:
669            stdout(what)
670
671    def vout(self, what: str):
672        if VERBOSE >= 1:
673            self.out(what)
674
675
676class TestProjectThread(threading.Thread):
677    def __init__(self, tasks_queue: TestQueue,
678                 results_differ: threading.Event,
679                 failure_flag: threading.Event):
680        """
681        :param results_differ: Used to signify that results differ from
682               the canonical ones.
683        :param failure_flag: Used to signify a failure during the run.
684        """
685        self.tasks_queue = tasks_queue
686        self.results_differ = results_differ
687        self.failure_flag = failure_flag
688        super().__init__()
689
690        # Needed to gracefully handle interrupts with Ctrl-C
691        self.daemon = True
692
693    def run(self):
694        while not self.tasks_queue.empty():
695            try:
696                test_info = self.tasks_queue.get()
697                init_logger(test_info.project.name)
698
699                tester = ProjectTester(test_info)
700                if not tester.test():
701                    self.results_differ.set()
702
703                self.tasks_queue.task_done()
704
705            except BaseException:
706                self.failure_flag.set()
707                raise
708
709
710###############################################################################
711# Utility functions.
712###############################################################################
713
714
715def check_build(output_dir: str):
716    """
717    Given the scan-build output directory, checks if the build failed
718    (by searching for the failures directories). If there are failures, it
719    creates a summary file in the output directory.
720
721    """
722    # Check if there are failures.
723    failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
724    total_failed = len(failures)
725
726    if total_failed == 0:
727        clean_up_empty_plists(output_dir)
728        clean_up_empty_folders(output_dir)
729
730        plists = glob.glob(output_dir + "/*/*.plist")
731        stdout(f"Number of bug reports "
732               f"(non-empty plist files) produced: {len(plists)}\n")
733        return
734
735    stderr("Error: analysis failed.\n")
736    stderr(f"Total of {total_failed} failures discovered.\n")
737
738    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
739        stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
740
741    for index, failed_log_path in enumerate(failures, start=1):
742        if index >= NUM_OF_FAILURES_IN_SUMMARY:
743            break
744
745        stderr(f"\n-- Error #{index} -----------\n")
746
747        with open(failed_log_path, "r") as failed_log:
748            shutil.copyfileobj(failed_log, LOCAL.stdout)
749
750    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
751        stderr("See the results folder for more.")
752
753    sys.exit(1)
754
755
756def cleanup_reference_results(output_dir: str):
757    """
758    Delete html, css, and js files from reference results. These can
759    include multiple copies of the benchmark source and so get very large.
760    """
761    extensions = ["html", "css", "js"]
762
763    for extension in extensions:
764        for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
765            file_to_rm = os.path.join(output_dir, file_to_rm)
766            os.remove(file_to_rm)
767
768    # Remove the log file. It leaks absolute path names.
769    remove_log_file(output_dir)
770
771
772def run_cmp_results(directory: str, strictness: int = 0) -> bool:
773    """
774    Compare the warnings produced by scan-build.
775    strictness defines the success criteria for the test:
776      0 - success if there are no crashes or analyzer failure.
777      1 - success if there are no difference in the number of reported bugs.
778      2 - success if all the bug reports are identical.
779
780    :return success: Whether tests pass according to the strictness
781    criteria.
782    """
783    tests_passed = True
784    start_time = time.time()
785
786    ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
787    new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
788
789    # We have to go one level down the directory tree.
790    ref_list = glob.glob(ref_dir + "/*")
791    new_list = glob.glob(new_dir + "/*")
792
793    # Log folders are also located in the results dir, so ignore them.
794    ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
795    if ref_log_dir in ref_list:
796        ref_list.remove(ref_log_dir)
797    new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
798
799    if len(ref_list) != len(new_list):
800        stderr(f"Mismatch in number of results folders: "
801               f"{ref_list} vs {new_list}")
802        sys.exit(1)
803
804    # There might be more then one folder underneath - one per each scan-build
805    # command (Ex: one for configure and one for make).
806    if len(ref_list) > 1:
807        # Assume that the corresponding folders have the same names.
808        ref_list.sort()
809        new_list.sort()
810
811    # Iterate and find the differences.
812    num_diffs = 0
813    for ref_dir, new_dir in zip(ref_list, new_list):
814        assert(ref_dir != new_dir)
815
816        if VERBOSE >= 1:
817            stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
818
819        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
820
821        ref_results = CmpRuns.ResultsDirectory(ref_dir)
822        new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
823
824        # Scan the results, delete empty plist files.
825        num_diffs, reports_in_ref, reports_in_new = \
826            CmpRuns.dump_scan_build_results_diff(ref_results, new_results,
827                                                 delete_empty=False,
828                                                 out=LOCAL.stdout)
829
830        if num_diffs > 0:
831            stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
832
833        if strictness >= 2 and num_diffs > 0:
834            stdout("Error: Diffs found in strict mode (2).\n")
835            tests_passed = False
836
837        elif strictness >= 1 and reports_in_ref != reports_in_new:
838            stdout("Error: The number of results are different "
839                   " strict mode (1).\n")
840            tests_passed = False
841
842    stdout(f"Diagnostic comparison complete "
843           f"(time: {time.time() - start_time:.2f}).\n")
844
845    return tests_passed
846
847
848def normalize_reference_results(directory: str, output_dir: str,
849                                build_mode: int):
850    """
851    Make the absolute paths relative in the reference results.
852    """
853    for dir_path, _, filenames in os.walk(output_dir):
854        for filename in filenames:
855            if not filename.endswith('plist'):
856                continue
857
858            plist = os.path.join(dir_path, filename)
859            with open(plist, "rb") as plist_file:
860                data = plistlib.load(plist_file)
861            path_prefix = directory
862
863            if build_mode == 1:
864                path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
865
866            paths = [source[len(path_prefix) + 1:]
867                     if source.startswith(path_prefix) else source
868                     for source in data['files']]
869            data['files'] = paths
870
871            # Remove transient fields which change from run to run.
872            for diagnostic in data['diagnostics']:
873                if 'HTMLDiagnostics_files' in diagnostic:
874                    diagnostic.pop('HTMLDiagnostics_files')
875
876            if 'clang_version' in data:
877                data.pop('clang_version')
878
879            with open(plist, "wb") as plist_file:
880                plistlib.dump(data, plist_file)
881
882
883def get_build_log_path(output_dir: str) -> str:
884    return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
885
886
887def remove_log_file(output_dir: str):
888    build_log_path = get_build_log_path(output_dir)
889
890    # Clean up the log file.
891    if os.path.exists(build_log_path):
892        if VERBOSE >= 1:
893            stdout(f"  Removing log file: {build_log_path}\n")
894
895        os.remove(build_log_path)
896
897
898def clean_up_empty_plists(output_dir: str):
899    """
900    A plist file is created for each call to the analyzer(each source file).
901    We are only interested on the once that have bug reports,
902    so delete the rest.
903    """
904    for plist in glob.glob(output_dir + "/*/*.plist"):
905        plist = os.path.join(output_dir, plist)
906
907        try:
908            with open(plist, "rb") as plist_file:
909                data = plistlib.load(plist_file)
910            # Delete empty reports.
911            if not data['files']:
912                os.remove(plist)
913                continue
914
915        except InvalidFileException as e:
916            stderr(f"Error parsing plist file {plist}: {str(e)}")
917            continue
918
919
920def clean_up_empty_folders(output_dir: str):
921    """
922    Remove empty folders from results, as git would not store them.
923    """
924    subdirs = glob.glob(output_dir + "/*")
925    for subdir in subdirs:
926        if not os.listdir(subdir):
927            os.removedirs(subdir)
928
929
930if __name__ == "__main__":
931    print("SATestBuild.py should not be used on its own.")
932    print("Please use 'SATest.py build' instead")
933    sys.exit(1)
934