1#! /usr/bin/env python3 2 3# ################################################################ 4# Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# ########################################################################## 11 12# Limitations: 13# - doesn't support filenames with spaces 14# - dir1/zstd and dir2/zstd will be merged in a single results file 15 16import argparse 17import os # getloadavg 18import string 19import subprocess 20import time # strftime 21import traceback 22import hashlib 23import platform # system 24 25script_version = 'v1.1.2 (2017-03-26)' 26default_repo_url = 'https://github.com/facebook/zstd.git' 27working_dir_name = 'speedTest' 28working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest 29clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd 30email_header = 'ZSTD_speedTest' 31pid = str(os.getpid()) 32verbose = False 33clang_version = "unknown" 34gcc_version = "unknown" 35args = None 36 37 38def hashfile(hasher, fname, blocksize=65536): 39 with open(fname, "rb") as f: 40 for chunk in iter(lambda: f.read(blocksize), b""): 41 hasher.update(chunk) 42 return hasher.hexdigest() 43 44 45def log(text): 46 print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) 47 48 49def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): 50 if print_command: 51 log("> " + command) 52 popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd) 53 stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout) 54 stderr_lines = stderr_lines.decode("utf-8") 55 stdout_lines = stdout_lines.decode("utf-8") 56 if print_output: 57 if stdout_lines: 58 print(stdout_lines) 59 if stderr_lines: 60 print(stderr_lines) 61 if popen.returncode is not None and popen.returncode != 0: 62 if stderr_lines and not print_output and print_error: 63 print(stderr_lines) 64 raise RuntimeError(stdout_lines + stderr_lines) 65 return (stdout_lines + stderr_lines).splitlines() 66execute.cwd = None 67 68 69def does_command_exist(command): 70 try: 71 execute(command, verbose, False, False) 72 except Exception: 73 return False 74 return True 75 76 77def send_email(emails, topic, text, have_mutt, have_mail): 78 logFileName = working_path + '/' + 'tmpEmailContent' 79 with open(logFileName, "w") as myfile: 80 myfile.writelines(text) 81 myfile.close() 82 if have_mutt: 83 execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) 84 elif have_mail: 85 execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) 86 else: 87 log("e-mail cannot be sent (mail or mutt not found)") 88 89 90def send_email_with_attachments(branch, commit, last_commit, args, text, results_files, 91 logFileName, have_mutt, have_mail): 92 with open(logFileName, "w") as myfile: 93 myfile.writelines(text) 94 myfile.close() 95 email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \ 96 % (email_header, pid, branch, commit, last_commit, 97 args.lowerLimit, args.ratioLimit) 98 if have_mutt: 99 execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files 100 + ' < ' + logFileName) 101 elif have_mail: 102 execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName) 103 else: 104 log("e-mail cannot be sent (mail or mutt not found)") 105 106 107def git_get_branches(): 108 execute('git fetch -p', verbose) 109 branches = execute('git branch -rl', verbose) 110 output = [] 111 for line in branches: 112 if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line): 113 output.append(line.strip()) 114 return output 115 116 117def git_get_changes(branch, commit, last_commit): 118 fmt = '--format="%h: (%an) %s, %ar"' 119 if last_commit is None: 120 commits = execute('git log -n 10 %s %s' % (fmt, commit)) 121 else: 122 commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit)) 123 return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits) 124 125 126def get_last_results(resultsFileName): 127 if not os.path.isfile(resultsFileName): 128 return None, None, None, None 129 commit = None 130 csize = [] 131 cspeed = [] 132 dspeed = [] 133 with open(resultsFileName, 'r') as f: 134 for line in f: 135 words = line.split() 136 if len(words) <= 4: # branch + commit + compilerVer + md5 137 commit = words[1] 138 csize = [] 139 cspeed = [] 140 dspeed = [] 141 if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files" 142 csize.append(int(words[1])) 143 cspeed.append(float(words[3])) 144 dspeed.append(float(words[5])) 145 return commit, csize, cspeed, dspeed 146 147 148def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, 149 testFilePath, fileName, last_csize, last_cspeed, last_dspeed): 150 sleepTime = 30 151 while os.getloadavg()[0] > args.maxLoadAvg: 152 log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" 153 % (os.getloadavg()[0], args.maxLoadAvg, sleepTime)) 154 time.sleep(sleepTime) 155 start_load = str(os.getloadavg()) 156 osType = platform.system() 157 if osType == 'Linux': 158 cpuSelector = "taskset --cpu-list 0" 159 else: 160 cpuSelector = "" 161 if args.dictionary: 162 result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True) 163 else: 164 result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True) 165 end_load = str(os.getloadavg()) 166 linesExpected = args.lastCLevel + 1 167 if len(result) != linesExpected: 168 raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result))) 169 with open(resultsFileName, "a") as myfile: 170 myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum)) 171 myfile.write('\n'.join(result) + '\n') 172 myfile.close() 173 if (last_cspeed == None): 174 log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName)) 175 return "" 176 commit, csize, cspeed, dspeed = get_last_results(resultsFileName) 177 text = "" 178 for i in range(0, min(len(cspeed), len(last_cspeed))): 179 print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName)) 180 if (cspeed[i]/last_cspeed[i] < args.lowerLimit): 181 text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName) 182 if (dspeed[i]/last_dspeed[i] < args.lowerLimit): 183 text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName) 184 if (float(last_csize[i])/csize[i] < args.ratioLimit): 185 text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName) 186 if text: 187 text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text 188 return text 189 190 191def update_config_file(branch, commit): 192 last_commit = None 193 commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt" 194 if os.path.isfile(commitFileName): 195 with open(commitFileName, 'r') as infile: 196 last_commit = infile.read() 197 with open(commitFileName, 'w') as outfile: 198 outfile.write(commit) 199 return last_commit 200 201 202def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName): 203 last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName) 204 if not args.dry_run: 205 text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) 206 if text: 207 log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit)) 208 text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) 209 return text 210 211 212def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail): 213 local_branch = branch.split('/')[1] 214 version = local_branch.rpartition('-')[2] + '_' + commit 215 if not args.dry_run: 216 execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version + 217 'mv programs/zstd programs/zstd_clang && ' + 218 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version) 219 md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd') 220 md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32') 221 md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang') 222 print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang)) 223 print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version)) 224 225 logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt" 226 text_to_send = [] 227 results_files = "" 228 if args.dictionary: 229 dictName = args.dictionary.rpartition('/')[2] 230 else: 231 dictName = None 232 233 for filePath in testFilePaths: 234 fileName = filePath.rpartition('/')[2] 235 if dictName: 236 resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" 237 else: 238 resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" 239 text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) 240 if text: 241 text_to_send.append(text) 242 results_files += resultsFileName + " " 243 resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" 244 text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) 245 if text: 246 text_to_send.append(text) 247 results_files += resultsFileName + " " 248 resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" 249 text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName) 250 if text: 251 text_to_send.append(text) 252 results_files += resultsFileName + " " 253 if text_to_send: 254 send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail) 255 256 257if __name__ == '__main__': 258 parser = argparse.ArgumentParser() 259 parser.add_argument('testFileNames', help='file or directory names list for speed benchmark') 260 parser.add_argument('emails', help='list of e-mail addresses to send warnings') 261 parser.add_argument('--dictionary', '-D', help='path to the dictionary') 262 parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="") 263 parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url) 264 parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98) 265 parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999) 266 parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75) 267 parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5) 268 parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300) 269 parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800) 270 parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False) 271 parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False) 272 args = parser.parse_args() 273 verbose = args.verbose 274 275 # check if test files are accessible 276 testFileNames = args.testFileNames.split() 277 testFilePaths = [] 278 for fileName in testFileNames: 279 fileName = os.path.expanduser(fileName) 280 if os.path.isfile(fileName) or os.path.isdir(fileName): 281 testFilePaths.append(os.path.abspath(fileName)) 282 else: 283 log("ERROR: File/directory not found: " + fileName) 284 exit(1) 285 286 # check if dictionary is accessible 287 if args.dictionary: 288 args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary)) 289 if not os.path.isfile(args.dictionary): 290 log("ERROR: Dictionary not found: " + args.dictionary) 291 exit(1) 292 293 # check availability of e-mail senders 294 have_mutt = does_command_exist("mutt -h") 295 have_mail = does_command_exist("mail -V") 296 if not have_mutt and not have_mail: 297 log("ERROR: e-mail senders 'mail' or 'mutt' not found") 298 exit(1) 299 300 clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0]; 301 gcc_version = execute("gcc -dumpversion", verbose)[0]; 302 303 if verbose: 304 print("PARAMETERS:\nrepoURL=%s" % args.repoURL) 305 print("working_path=%s" % working_path) 306 print("clone_path=%s" % clone_path) 307 print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths)) 308 print("message=%s" % args.message) 309 print("emails=%s" % args.emails) 310 print("dictionary=%s" % args.dictionary) 311 print("maxLoadAvg=%s" % args.maxLoadAvg) 312 print("lowerLimit=%s" % args.lowerLimit) 313 print("ratioLimit=%s" % args.ratioLimit) 314 print("lastCLevel=%s" % args.lastCLevel) 315 print("sleepTime=%s" % args.sleepTime) 316 print("timeout=%s" % args.timeout) 317 print("dry_run=%s" % args.dry_run) 318 print("verbose=%s" % args.verbose) 319 print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail)) 320 321 # clone ZSTD repo if needed 322 if not os.path.isdir(working_path): 323 os.mkdir(working_path) 324 if not os.path.isdir(clone_path): 325 execute.cwd = working_path 326 execute('git clone ' + args.repoURL) 327 if not os.path.isdir(clone_path): 328 log("ERROR: ZSTD clone not found: " + clone_path) 329 exit(1) 330 execute.cwd = clone_path 331 332 # check if speedTest.pid already exists 333 pidfile = "./speedTest.pid" 334 if os.path.isfile(pidfile): 335 log("ERROR: %s already exists, exiting" % pidfile) 336 exit(1) 337 338 send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail) 339 with open(pidfile, 'w') as the_file: 340 the_file.write(pid) 341 342 branch = "" 343 commit = "" 344 first_time = True 345 while True: 346 try: 347 if first_time: 348 first_time = False 349 else: 350 time.sleep(args.sleepTime) 351 loadavg = os.getloadavg()[0] 352 if (loadavg <= args.maxLoadAvg): 353 branches = git_get_branches() 354 for branch in branches: 355 commit = execute('git show -s --format=%h ' + branch, verbose)[0] 356 last_commit = update_config_file(branch, commit) 357 if commit == last_commit: 358 log("skipping branch %s: head %s already processed" % (branch, commit)) 359 else: 360 log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit)) 361 execute('git checkout -- . && git checkout ' + branch) 362 print(git_get_changes(branch, commit, last_commit)) 363 test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail) 364 else: 365 log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg)) 366 if verbose: 367 log("sleep for %s seconds" % args.sleepTime) 368 except Exception as e: 369 stack = traceback.format_exc() 370 email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit) 371 send_email(args.emails, email_topic, stack, have_mutt, have_mail) 372 print(stack) 373 except KeyboardInterrupt: 374 os.unlink(pidfile) 375 send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail) 376 exit(0) 377