1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, 10 for regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 15Usage: 16 17 # Load the results of both runs, to obtain lists of the corresponding 18 # AnalysisDiagnostic objects. 19 # 20 resultsA = load_results_from_single_run(singleRunInfoA, delete_empty) 21 resultsB = load_results_from_single_run(singleRunInfoB, delete_empty) 22 23 # Generate a relation from diagnostics in run A to diagnostics in run B 24 # to obtain a list of triples (a, b, confidence). 25 diff = compare_results(resultsA, resultsB) 26 27""" 28import json 29import os 30import plistlib 31import re 32import sys 33 34from math import log 35from collections import defaultdict 36from copy import copy 37from enum import Enum 38from typing import (Any, DefaultDict, Dict, List, NamedTuple, Optional, 39 Sequence, Set, TextIO, TypeVar, Tuple, Union) 40 41 42Number = Union[int, float] 43Stats = Dict[str, Dict[str, Number]] 44Plist = Dict[str, Any] 45JSON = Dict[str, Any] 46# Diff in a form: field -> (before, after) 47JSONDiff = Dict[str, Tuple[str, str]] 48# Type for generics 49T = TypeVar('T') 50 51STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL) 52 53 54class Colors: 55 """ 56 Color for terminal highlight. 57 """ 58 RED = '\x1b[2;30;41m' 59 GREEN = '\x1b[6;30;42m' 60 CLEAR = '\x1b[0m' 61 62 63class HistogramType(str, Enum): 64 RELATIVE = "relative" 65 LOG_RELATIVE = "log-relative" 66 ABSOLUTE = "absolute" 67 68 69class ResultsDirectory(NamedTuple): 70 path: str 71 root: str = "" 72 73 74class SingleRunInfo: 75 """ 76 Information about analysis run: 77 path - the analysis output directory 78 root - the name of the root directory, which will be disregarded when 79 determining the source file name 80 """ 81 def __init__(self, results: ResultsDirectory, 82 verbose_log: Optional[str] = None): 83 self.path = results.path 84 self.root = results.root.rstrip("/\\") 85 self.verbose_log = verbose_log 86 87 88class AnalysisDiagnostic: 89 def __init__(self, data: Plist, report: "AnalysisReport", 90 html_report: Optional[str]): 91 self._data = data 92 self._loc = self._data['location'] 93 self._report = report 94 self._html_report = html_report 95 self._report_size = len(self._data['path']) 96 97 def get_file_name(self) -> str: 98 root = self._report.run.root 99 file_name = self._report.files[self._loc['file']] 100 101 if file_name.startswith(root) and len(root) > 0: 102 return file_name[len(root) + 1:] 103 104 return file_name 105 106 def get_root_file_name(self) -> str: 107 path = self._data['path'] 108 109 if not path: 110 return self.get_file_name() 111 112 p = path[0] 113 if 'location' in p: 114 file_index = p['location']['file'] 115 else: # control edge 116 file_index = path[0]['edges'][0]['start'][0]['file'] 117 118 out = self._report.files[file_index] 119 root = self._report.run.root 120 121 if out.startswith(root): 122 return out[len(root):] 123 124 return out 125 126 def get_line(self) -> int: 127 return self._loc['line'] 128 129 def get_column(self) -> int: 130 return self._loc['col'] 131 132 def get_path_length(self) -> int: 133 return self._report_size 134 135 def get_category(self) -> str: 136 return self._data['category'] 137 138 def get_description(self) -> str: 139 return self._data['description'] 140 141 def get_location(self) -> str: 142 return f"{self.get_file_name()}:{self.get_line()}:{self.get_column()}" 143 144 def get_issue_identifier(self) -> str: 145 id = self.get_file_name() + "+" 146 147 if "issue_context" in self._data: 148 id += self._data["issue_context"] + "+" 149 150 if "issue_hash_content_of_line_in_context" in self._data: 151 id += str(self._data["issue_hash_content_of_line_in_context"]) 152 153 return id 154 155 def get_html_report(self) -> str: 156 if self._html_report is None: 157 return " " 158 159 return os.path.join(self._report.run.path, self._html_report) 160 161 def get_readable_name(self) -> str: 162 if "issue_context" in self._data: 163 funcname_postfix = "#" + self._data["issue_context"] 164 else: 165 funcname_postfix = "" 166 167 root_filename = self.get_root_file_name() 168 file_name = self.get_file_name() 169 170 if root_filename != file_name: 171 file_prefix = f"[{root_filename}] {file_name}" 172 else: 173 file_prefix = root_filename 174 175 line = self.get_line() 176 col = self.get_column() 177 return f"{file_prefix}{funcname_postfix}:{line}:{col}" \ 178 f", {self.get_category()}: {self.get_description()}" 179 180 KEY_FIELDS = ["check_name", "category", "description"] 181 182 def is_similar_to(self, other: "AnalysisDiagnostic") -> bool: 183 # We consider two diagnostics similar only if at least one 184 # of the key fields is the same in both diagnostics. 185 return len(self.get_diffs(other)) != len(self.KEY_FIELDS) 186 187 def get_diffs(self, other: "AnalysisDiagnostic") -> JSONDiff: 188 return {field: (self._data[field], other._data[field]) 189 for field in self.KEY_FIELDS 190 if self._data[field] != other._data[field]} 191 192 # Note, the data format is not an API and may change from one analyzer 193 # version to another. 194 def get_raw_data(self) -> Plist: 195 return self._data 196 197 def __eq__(self, other: object) -> bool: 198 return hash(self) == hash(other) 199 200 def __ne__(self, other: object) -> bool: 201 return hash(self) != hash(other) 202 203 def __hash__(self) -> int: 204 return hash(self.get_issue_identifier()) 205 206 207class AnalysisRun: 208 def __init__(self, info: SingleRunInfo): 209 self.path = info.path 210 self.root = info.root 211 self.info = info 212 self.reports: List[AnalysisReport] = [] 213 # Cumulative list of all diagnostics from all the reports. 214 self.diagnostics: List[AnalysisDiagnostic] = [] 215 self.clang_version: Optional[str] = None 216 self.raw_stats: List[JSON] = [] 217 218 def get_clang_version(self) -> Optional[str]: 219 return self.clang_version 220 221 def read_single_file(self, path: str, delete_empty: bool): 222 with open(path, "rb") as plist_file: 223 data = plistlib.load(plist_file) 224 225 if 'statistics' in data: 226 self.raw_stats.append(json.loads(data['statistics'])) 227 data.pop('statistics') 228 229 # We want to retrieve the clang version even if there are no 230 # reports. Assume that all reports were created using the same 231 # clang version (this is always true and is more efficient). 232 if 'clang_version' in data: 233 if self.clang_version is None: 234 self.clang_version = data.pop('clang_version') 235 else: 236 data.pop('clang_version') 237 238 # Ignore/delete empty reports. 239 if not data['files']: 240 if delete_empty: 241 os.remove(path) 242 return 243 244 # Extract the HTML reports, if they exists. 245 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 246 htmlFiles = [] 247 for d in data['diagnostics']: 248 # FIXME: Why is this named files, when does it have multiple 249 # files? 250 assert len(d['HTMLDiagnostics_files']) == 1 251 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 252 else: 253 htmlFiles = [None] * len(data['diagnostics']) 254 255 report = AnalysisReport(self, data.pop('files')) 256 diagnostics = [AnalysisDiagnostic(d, report, h) 257 for d, h in zip(data.pop('diagnostics'), htmlFiles)] 258 259 assert not data 260 261 report.diagnostics.extend(diagnostics) 262 self.reports.append(report) 263 self.diagnostics.extend(diagnostics) 264 265 266class AnalysisReport: 267 def __init__(self, run: AnalysisRun, files: List[str]): 268 self.run = run 269 self.files = files 270 self.diagnostics: List[AnalysisDiagnostic] = [] 271 272 273def load_results(results: ResultsDirectory, delete_empty: bool = True, 274 verbose_log: Optional[str] = None) -> AnalysisRun: 275 """ 276 Backwards compatibility API. 277 """ 278 return load_results_from_single_run(SingleRunInfo(results, 279 verbose_log), 280 delete_empty) 281 282 283def load_results_from_single_run(info: SingleRunInfo, 284 delete_empty: bool = True) -> AnalysisRun: 285 """ 286 # Load results of the analyzes from a given output folder. 287 # - info is the SingleRunInfo object 288 # - delete_empty specifies if the empty plist files should be deleted 289 290 """ 291 path = info.path 292 run = AnalysisRun(info) 293 294 if os.path.isfile(path): 295 run.read_single_file(path, delete_empty) 296 else: 297 for dirpath, dirnames, filenames in os.walk(path): 298 for f in filenames: 299 if not f.endswith('plist'): 300 continue 301 302 p = os.path.join(dirpath, f) 303 run.read_single_file(p, delete_empty) 304 305 return run 306 307 308def cmp_analysis_diagnostic(d): 309 return d.get_issue_identifier() 310 311 312AnalysisDiagnosticPair = Tuple[AnalysisDiagnostic, AnalysisDiagnostic] 313 314 315class ComparisonResult: 316 def __init__(self): 317 self.present_in_both: List[AnalysisDiagnostic] = [] 318 self.present_only_in_old: List[AnalysisDiagnostic] = [] 319 self.present_only_in_new: List[AnalysisDiagnostic] = [] 320 self.changed_between_new_and_old: List[AnalysisDiagnosticPair] = [] 321 322 def add_common(self, issue: AnalysisDiagnostic): 323 self.present_in_both.append(issue) 324 325 def add_removed(self, issue: AnalysisDiagnostic): 326 self.present_only_in_old.append(issue) 327 328 def add_added(self, issue: AnalysisDiagnostic): 329 self.present_only_in_new.append(issue) 330 331 def add_changed(self, old_issue: AnalysisDiagnostic, 332 new_issue: AnalysisDiagnostic): 333 self.changed_between_new_and_old.append((old_issue, new_issue)) 334 335 336GroupedDiagnostics = DefaultDict[str, List[AnalysisDiagnostic]] 337 338 339def get_grouped_diagnostics(diagnostics: List[AnalysisDiagnostic] 340 ) -> GroupedDiagnostics: 341 result: GroupedDiagnostics = defaultdict(list) 342 for diagnostic in diagnostics: 343 result[diagnostic.get_location()].append(diagnostic) 344 return result 345 346 347def compare_results(results_old: AnalysisRun, results_new: AnalysisRun, 348 histogram: Optional[HistogramType] = None 349 ) -> ComparisonResult: 350 """ 351 compare_results - Generate a relation from diagnostics in run A to 352 diagnostics in run B. 353 354 The result is the relation as a list of triples (a, b) where 355 each element {a,b} is None or a matching element from the respective run 356 """ 357 358 res = ComparisonResult() 359 360 # Map size_before -> size_after 361 path_difference_data: List[float] = [] 362 363 diags_old = get_grouped_diagnostics(results_old.diagnostics) 364 diags_new = get_grouped_diagnostics(results_new.diagnostics) 365 366 locations_old = set(diags_old.keys()) 367 locations_new = set(diags_new.keys()) 368 369 common_locations = locations_old & locations_new 370 371 for location in common_locations: 372 old = diags_old[location] 373 new = diags_new[location] 374 375 # Quadratic algorithms in this part are fine because 'old' and 'new' 376 # are most commonly of size 1. 377 common: Set[AnalysisDiagnostic] = set() 378 for a in old: 379 for b in new: 380 if a.get_issue_identifier() == b.get_issue_identifier(): 381 a_path_len = a.get_path_length() 382 b_path_len = b.get_path_length() 383 384 if a_path_len != b_path_len: 385 386 if histogram == HistogramType.RELATIVE: 387 path_difference_data.append( 388 float(a_path_len) / b_path_len) 389 390 elif histogram == HistogramType.LOG_RELATIVE: 391 path_difference_data.append( 392 log(float(a_path_len) / b_path_len)) 393 394 elif histogram == HistogramType.ABSOLUTE: 395 path_difference_data.append( 396 a_path_len - b_path_len) 397 398 res.add_common(b) 399 common.add(a) 400 401 old = filter_issues(old, common) 402 new = filter_issues(new, common) 403 common = set() 404 405 for a in old: 406 for b in new: 407 if a.is_similar_to(b): 408 res.add_changed(a, b) 409 common.add(a) 410 common.add(b) 411 412 old = filter_issues(old, common) 413 new = filter_issues(new, common) 414 415 # Whatever is left in 'old' doesn't have a corresponding diagnostic 416 # in 'new', so we need to mark it as 'removed'. 417 for a in old: 418 res.add_removed(a) 419 420 # Whatever is left in 'new' doesn't have a corresponding diagnostic 421 # in 'old', so we need to mark it as 'added'. 422 for b in new: 423 res.add_added(b) 424 425 only_old_locations = locations_old - common_locations 426 for location in only_old_locations: 427 for a in diags_old[location]: 428 # These locations have been found only in the old build, so we 429 # need to mark all of therm as 'removed' 430 res.add_removed(a) 431 432 only_new_locations = locations_new - common_locations 433 for location in only_new_locations: 434 for b in diags_new[location]: 435 # These locations have been found only in the new build, so we 436 # need to mark all of therm as 'added' 437 res.add_added(b) 438 439 # FIXME: Add fuzzy matching. One simple and possible effective idea would 440 # be to bin the diagnostics, print them in a normalized form (based solely 441 # on the structure of the diagnostic), compute the diff, then use that as 442 # the basis for matching. This has the nice property that we don't depend 443 # in any way on the diagnostic format. 444 445 if histogram: 446 from matplotlib import pyplot 447 pyplot.hist(path_difference_data, bins=100) 448 pyplot.show() 449 450 return res 451 452 453def filter_issues(origin: List[AnalysisDiagnostic], 454 to_remove: Set[AnalysisDiagnostic]) \ 455 -> List[AnalysisDiagnostic]: 456 return [diag for diag in origin if diag not in to_remove] 457 458 459def compute_percentile(values: Sequence[T], percentile: float) -> T: 460 """ 461 Return computed percentile. 462 """ 463 return sorted(values)[int(round(percentile * len(values) + 0.5)) - 1] 464 465 466def derive_stats(results: AnalysisRun) -> Stats: 467 # Assume all keys are the same in each statistics bucket. 468 combined_data = defaultdict(list) 469 470 # Collect data on paths length. 471 for report in results.reports: 472 for diagnostic in report.diagnostics: 473 combined_data['PathsLength'].append(diagnostic.get_path_length()) 474 475 for stat in results.raw_stats: 476 for key, value in stat.items(): 477 combined_data[str(key)].append(value) 478 479 combined_stats: Stats = {} 480 481 for key, values in combined_data.items(): 482 combined_stats[key] = { 483 "max": max(values), 484 "min": min(values), 485 "mean": sum(values) / len(values), 486 "90th %tile": compute_percentile(values, 0.9), 487 "95th %tile": compute_percentile(values, 0.95), 488 "median": sorted(values)[len(values) // 2], 489 "total": sum(values) 490 } 491 492 return combined_stats 493 494 495# TODO: compare_results decouples comparison from the output, we should 496# do it here as well 497def compare_stats(results_old: AnalysisRun, results_new: AnalysisRun, 498 out: TextIO = sys.stdout): 499 stats_old = derive_stats(results_old) 500 stats_new = derive_stats(results_new) 501 502 old_keys = set(stats_old.keys()) 503 new_keys = set(stats_new.keys()) 504 keys = sorted(old_keys & new_keys) 505 506 for key in keys: 507 out.write(f"{key}\n") 508 509 nested_keys = sorted(set(stats_old[key]) & set(stats_new[key])) 510 511 for nested_key in nested_keys: 512 val_old = float(stats_old[key][nested_key]) 513 val_new = float(stats_new[key][nested_key]) 514 515 report = f"{val_old:.3f} -> {val_new:.3f}" 516 517 # Only apply highlighting when writing to TTY and it's not Windows 518 if out.isatty() and os.name != 'nt': 519 if val_new != 0: 520 ratio = (val_new - val_old) / val_new 521 if ratio < -0.2: 522 report = Colors.GREEN + report + Colors.CLEAR 523 elif ratio > 0.2: 524 report = Colors.RED + report + Colors.CLEAR 525 526 out.write(f"\t {nested_key} {report}\n") 527 528 removed_keys = old_keys - new_keys 529 if removed_keys: 530 out.write(f"REMOVED statistics: {removed_keys}\n") 531 532 added_keys = new_keys - old_keys 533 if added_keys: 534 out.write(f"ADDED statistics: {added_keys}\n") 535 536 out.write("\n") 537 538 539def dump_scan_build_results_diff(dir_old: ResultsDirectory, 540 dir_new: ResultsDirectory, 541 delete_empty: bool = True, 542 out: TextIO = sys.stdout, 543 show_stats: bool = False, 544 stats_only: bool = False, 545 histogram: Optional[HistogramType] = None, 546 verbose_log: Optional[str] = None): 547 """ 548 Compare directories with analysis results and dump results. 549 550 :param delete_empty: delete empty plist files 551 :param out: buffer to dump comparison results to. 552 :param show_stats: compare execution stats as well. 553 :param stats_only: compare ONLY execution stats. 554 :param histogram: optional histogram type to plot path differences. 555 :param verbose_log: optional path to an additional log file. 556 """ 557 results_old = load_results(dir_old, delete_empty, verbose_log) 558 results_new = load_results(dir_new, delete_empty, verbose_log) 559 560 if show_stats or stats_only: 561 compare_stats(results_old, results_new) 562 if stats_only: 563 return 564 565 # Open the verbose log, if given. 566 if verbose_log: 567 aux_log: Optional[TextIO] = open(verbose_log, "w") 568 else: 569 aux_log = None 570 571 diff = compare_results(results_old, results_new, histogram) 572 found_diffs = 0 573 total_added = 0 574 total_removed = 0 575 total_modified = 0 576 577 for new in diff.present_only_in_new: 578 out.write(f"ADDED: {new.get_readable_name()}\n\n") 579 found_diffs += 1 580 total_added += 1 581 if aux_log: 582 aux_log.write(f"('ADDED', {new.get_readable_name()}, " 583 f"{new.get_html_report()})\n") 584 585 for old in diff.present_only_in_old: 586 out.write(f"REMOVED: {old.get_readable_name()}\n\n") 587 found_diffs += 1 588 total_removed += 1 589 if aux_log: 590 aux_log.write(f"('REMOVED', {old.get_readable_name()}, " 591 f"{old.get_html_report()})\n") 592 593 for old, new in diff.changed_between_new_and_old: 594 out.write(f"MODIFIED: {old.get_readable_name()}\n") 595 found_diffs += 1 596 total_modified += 1 597 diffs = old.get_diffs(new) 598 str_diffs = [f" '{key}' changed: " 599 f"'{old_value}' -> '{new_value}'" 600 for key, (old_value, new_value) in diffs.items()] 601 out.write(",\n".join(str_diffs) + "\n\n") 602 if aux_log: 603 aux_log.write(f"('MODIFIED', {old.get_readable_name()}, " 604 f"{old.get_html_report()})\n") 605 606 total_reports = len(results_new.diagnostics) 607 out.write(f"TOTAL REPORTS: {total_reports}\n") 608 out.write(f"TOTAL ADDED: {total_added}\n") 609 out.write(f"TOTAL REMOVED: {total_removed}\n") 610 out.write(f"TOTAL MODIFIED: {total_modified}\n") 611 612 if aux_log: 613 aux_log.write(f"('TOTAL NEW REPORTS', {total_reports})\n") 614 aux_log.write(f"('TOTAL DIFFERENCES', {found_diffs})\n") 615 aux_log.close() 616 617 # TODO: change to NamedTuple 618 return found_diffs, len(results_old.diagnostics), \ 619 len(results_new.diagnostics) 620 621 622if __name__ == "__main__": 623 print("CmpRuns.py should not be used on its own.") 624 print("Please use 'SATest.py compare' instead") 625 sys.exit(1) 626