1/* Title: Pure/Admin/build_log.scala 2 Author: Makarius 3 4Management of build log files and database storage. 5*/ 6 7package isabelle 8 9 10import java.io.{File => JFile} 11import java.time.format.{DateTimeFormatter, DateTimeParseException} 12import java.util.Locale 13 14import scala.collection.immutable.SortedMap 15import scala.collection.mutable 16import scala.util.matching.Regex 17 18 19object Build_Log 20{ 21 /** content **/ 22 23 /* properties */ 24 25 object Prop 26 { 27 val build_tags = SQL.Column.string("build_tags") // lines 28 val build_args = SQL.Column.string("build_args") // lines 29 val build_group_id = SQL.Column.string("build_group_id") 30 val build_id = SQL.Column.string("build_id") 31 val build_engine = SQL.Column.string("build_engine") 32 val build_host = SQL.Column.string("build_host") 33 val build_start = SQL.Column.date("build_start") 34 val build_end = SQL.Column.date("build_end") 35 val isabelle_version = SQL.Column.string("isabelle_version") 36 val afp_version = SQL.Column.string("afp_version") 37 38 val all_props: List[SQL.Column] = 39 List(build_tags, build_args, build_group_id, build_id, build_engine, 40 build_host, build_start, build_end, isabelle_version, afp_version) 41 } 42 43 44 /* settings */ 45 46 object Settings 47 { 48 val ISABELLE_BUILD_OPTIONS = SQL.Column.string("ISABELLE_BUILD_OPTIONS") 49 val ML_PLATFORM = SQL.Column.string("ML_PLATFORM") 50 val ML_HOME = SQL.Column.string("ML_HOME") 51 val ML_SYSTEM = SQL.Column.string("ML_SYSTEM") 52 val ML_OPTIONS = SQL.Column.string("ML_OPTIONS") 53 54 val ml_settings = List(ML_PLATFORM, ML_HOME, ML_SYSTEM, ML_OPTIONS) 55 val all_settings = ISABELLE_BUILD_OPTIONS :: ml_settings 56 57 type Entry = (String, String) 58 type T = List[Entry] 59 60 object Entry 61 { 62 def unapply(s: String): Option[Entry] = 63 s.indexOf('=') match { 64 case -1 => None 65 case i => 66 val a = s.substring(0, i) 67 val b = Library.perhaps_unquote(s.substring(i + 1)) 68 Some((a, b)) 69 } 70 def apply(a: String, b: String): String = a + "=" + quote(b) 71 def getenv(a: String): String = apply(a, Isabelle_System.getenv(a)) 72 } 73 74 def show(): String = 75 cat_lines( 76 List(Entry.getenv(ISABELLE_BUILD_OPTIONS.name), "") ::: 77 ml_settings.map(c => Entry.getenv(c.name))) 78 } 79 80 81 /* file names */ 82 83 def log_date(date: Date): String = 84 String.format(Locale.ROOT, "%s.%05d", 85 DateTimeFormatter.ofPattern("yyyy-MM-dd").format(date.rep), 86 java.lang.Long.valueOf((date.time - date.midnight.time).ms / 1000)) 87 88 def log_subdir(date: Date): Path = 89 Path.explode("log") + Path.explode(date.rep.getYear.toString) 90 91 def log_filename(engine: String, date: Date, more: List[String] = Nil): Path = 92 Path.explode((engine :: log_date(date) :: more).mkString("", "_", ".log")) 93 94 95 96 /** log file **/ 97 98 def print_date(date: Date): String = Log_File.Date_Format(date) 99 100 object Log_File 101 { 102 /* log file */ 103 104 def plain_name(name: String): String = 105 { 106 List(".log", ".log.gz", ".log.xz", ".gz", ".xz").find(name.endsWith(_)) match { 107 case Some(s) => Library.try_unsuffix(s, name).get 108 case None => name 109 } 110 } 111 112 def apply(name: String, lines: List[String]): Log_File = 113 new Log_File(plain_name(name), lines) 114 115 def apply(name: String, text: String): Log_File = 116 Log_File(name, Library.trim_split_lines(text)) 117 118 def apply(file: JFile): Log_File = 119 { 120 val name = file.getName 121 val text = 122 if (name.endsWith(".gz")) File.read_gzip(file) 123 else if (name.endsWith(".xz")) File.read_xz(file) 124 else File.read(file) 125 apply(name, text) 126 } 127 128 def apply(path: Path): Log_File = apply(path.file) 129 130 131 /* log file collections */ 132 133 def is_log(file: JFile, 134 prefixes: List[String] = 135 List(Build_History.log_prefix, Identify.log_prefix, Identify.log_prefix2, 136 Isatest.log_prefix, AFP_Test.log_prefix, Jenkins.log_prefix), 137 suffixes: List[String] = List(".log", ".log.gz", ".log.xz")): Boolean = 138 { 139 val name = file.getName 140 141 prefixes.exists(name.startsWith(_)) && 142 suffixes.exists(name.endsWith(_)) && 143 name != "isatest.log" && 144 name != "afp-test.log" && 145 name != "main.log" 146 } 147 148 149 /* date format */ 150 151 val Date_Format = 152 { 153 val fmts = 154 Date.Formatter.variants( 155 List("EEE MMM d HH:mm:ss O yyyy", "EEE MMM d HH:mm:ss VV yyyy"), 156 List(Locale.ENGLISH, Locale.GERMAN)) ::: 157 List( 158 DateTimeFormatter.RFC_1123_DATE_TIME, 159 Date.Formatter.pattern("EEE MMM d HH:mm:ss yyyy").withZone(Date.timezone_berlin)) 160 161 def tune_timezone(s: String): String = 162 s match { 163 case "CET" | "MET" => "GMT+1" 164 case "CEST" | "MEST" => "GMT+2" 165 case "EST" => "Europe/Berlin" 166 case _ => s 167 } 168 def tune_weekday(s: String): String = 169 s match { 170 case "Die" => "Di" 171 case "Mit" => "Mi" 172 case "Don" => "Do" 173 case "Fre" => "Fr" 174 case "Sam" => "Sa" 175 case "Son" => "So" 176 case _ => s 177 } 178 179 def tune(s: String): String = 180 Word.implode( 181 Word.explode(s) match { 182 case a :: "M\uFFFDr" :: bs => tune_weekday(a) :: "M��r" :: bs.map(tune_timezone(_)) 183 case a :: bs => tune_weekday(a) :: bs.map(tune_timezone(_)) 184 case Nil => Nil 185 } 186 ) 187 188 Date.Format.make(fmts, tune) 189 } 190 191 192 /* inlined content */ 193 194 def print_props(marker: String, props: Properties.T): String = 195 marker + YXML.string_of_body(XML.Encode.properties(Properties.encode_lines(props))) 196 } 197 198 class Log_File private(val name: String, val lines: List[String]) 199 { 200 log_file => 201 202 override def toString: String = name 203 204 def text: String = cat_lines(lines) 205 206 def err(msg: String): Nothing = 207 error("Error in log file " + quote(name) + ": " + msg) 208 209 210 /* date format */ 211 212 object Strict_Date 213 { 214 def unapply(s: String): Some[Date] = 215 try { Some(Log_File.Date_Format.parse(s)) } 216 catch { case exn: DateTimeParseException => log_file.err(exn.getMessage) } 217 } 218 219 220 /* inlined content */ 221 222 def find[A](f: String => Option[A]): Option[A] = 223 lines.iterator.map(f).find(_.isDefined).map(_.get) 224 225 def find_line(marker: String): Option[String] = 226 find(Library.try_unprefix(marker, _)) 227 228 def find_match(regexes: List[Regex]): Option[String] = 229 regexes match { 230 case Nil => None 231 case regex :: rest => 232 lines.iterator.map(regex.unapplySeq(_)).find(res => res.isDefined && res.get.length == 1). 233 map(res => res.get.head) orElse find_match(rest) 234 } 235 236 237 /* settings */ 238 239 def get_setting(a: String): Option[Settings.Entry] = 240 lines.find(_.startsWith(a + "=")) match { 241 case Some(line) => Settings.Entry.unapply(line) 242 case None => None 243 } 244 245 def get_all_settings: Settings.T = 246 for { c <- Settings.all_settings; entry <- get_setting(c.name) } 247 yield entry 248 249 250 /* properties (YXML) */ 251 252 val xml_cache = XML.make_cache() 253 254 def parse_props(text: String): Properties.T = 255 try { 256 xml_cache.props(Properties.decode_lines(XML.Decode.properties(YXML.parse_body(text)))) 257 } 258 catch { case _: XML.Error => log_file.err("malformed properties") } 259 260 def filter_lines(marker: String): List[String] = 261 for (line <- lines; s <- Library.try_unprefix(marker, line)) yield s 262 263 def filter_props(marker: String): List[Properties.T] = 264 for (s <- filter_lines(marker) if YXML.detect(s)) yield parse_props(s) 265 266 def find_props(marker: String): Option[Properties.T] = 267 find_line(marker) match { 268 case Some(text) if YXML.detect(text) => Some(parse_props(text)) 269 case _ => None 270 } 271 272 273 /* parse various formats */ 274 275 def parse_meta_info(): Meta_Info = Build_Log.parse_meta_info(log_file) 276 277 def parse_build_info(ml_statistics: Boolean = false): Build_Info = 278 Build_Log.parse_build_info(log_file, ml_statistics) 279 280 def parse_session_info( 281 command_timings: Boolean = false, 282 theory_timings: Boolean = false, 283 ml_statistics: Boolean = false, 284 task_statistics: Boolean = false): Session_Info = 285 Build_Log.parse_session_info( 286 log_file, command_timings, theory_timings, ml_statistics, task_statistics) 287 } 288 289 290 291 /** digested meta info: produced by Admin/build_history in log.xz file **/ 292 293 object Meta_Info 294 { 295 val empty: Meta_Info = Meta_Info(Nil, Nil) 296 } 297 298 sealed case class Meta_Info(props: Properties.T, settings: Settings.T) 299 { 300 def is_empty: Boolean = props.isEmpty && settings.isEmpty 301 302 def get(c: SQL.Column): Option[String] = 303 Properties.get(props, c.name) orElse 304 Properties.get(settings, c.name) 305 306 def get_date(c: SQL.Column): Option[Date] = 307 get(c).map(Log_File.Date_Format.parse(_)) 308 } 309 310 object Identify 311 { 312 val log_prefix = "isabelle_identify_" 313 val log_prefix2 = "plain_identify_" 314 315 def engine(log_file: Log_File): String = 316 if (log_file.name.startsWith(Jenkins.log_prefix)) "jenkins_identify" 317 else if (log_file.name.startsWith(log_prefix2)) "plain_identify" 318 else "identify" 319 320 def content(date: Date, isabelle_version: Option[String], afp_version: Option[String]): String = 321 terminate_lines( 322 List("isabelle_identify: " + Build_Log.print_date(date), "") ::: 323 isabelle_version.map("Isabelle version: " + _).toList ::: 324 afp_version.map("AFP version: " + _).toList) 325 326 val Start = new Regex("""^isabelle_identify: (.+)$""") 327 val No_End = new Regex("""$.""") 328 val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$""")) 329 val AFP_Version = List(new Regex("""^AFP version: (\S+)$""")) 330 } 331 332 object Isatest 333 { 334 val log_prefix = "isatest-makeall-" 335 val engine = "isatest" 336 val Start = new Regex("""^------------------- starting test --- (.+) --- (.+)$""") 337 val End = new Regex("""^------------------- test (?:successful|FAILED) --- (.+) --- .*$""") 338 val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$""")) 339 } 340 341 object AFP_Test 342 { 343 val log_prefix = "afp-test-devel-" 344 val engine = "afp-test" 345 val Start = new Regex("""^Start test(?: for .+)? at ([^,]+), (.*)$""") 346 val Start_Old = new Regex("""^Start test(?: for .+)? at ([^,]+)$""") 347 val End = new Regex("""^End test on (.+), .+, elapsed time:.*$""") 348 val Isabelle_Version = List(new Regex("""^Isabelle version: .* -- hg id (\S+)$""")) 349 val AFP_Version = List(new Regex("""^AFP version: .* -- hg id (\S+)$""")) 350 val Bad_Init = new Regex("""^cp:.*: Disc quota exceeded$""") 351 } 352 353 object Jenkins 354 { 355 val log_prefix = "jenkins_" 356 val engine = "jenkins" 357 val Host = new Regex("""^Building remotely on (\S+) \((\S+)\).*$""") 358 val Start = new Regex("""^(?:Started by an SCM change|Started from command line by admin|).*$""") 359 val Start_Date = new Regex("""^Build started at (.+)$""") 360 val No_End = new Regex("""$.""") 361 val Isabelle_Version = 362 List(new Regex("""^(?:Build for Isabelle id|Isabelle id) (\w+).*$"""), 363 new Regex("""^ISABELLE_CI_REPO_ID="(\w+)".*$"""), 364 new Regex("""^(\w{12}) tip.*$""")) 365 val AFP_Version = 366 List(new Regex("""^(?:Build for AFP id|AFP id) (\w+).*$"""), 367 new Regex("""^ISABELLE_CI_AFP_ID="(\w+)".*$""")) 368 val CONFIGURATION = "=== CONFIGURATION ===" 369 val BUILD = "=== BUILD ===" 370 } 371 372 private def parse_meta_info(log_file: Log_File): Meta_Info = 373 { 374 def parse(engine: String, host: String, start: Date, 375 End: Regex, Isabelle_Version: List[Regex], AFP_Version: List[Regex]): Meta_Info = 376 { 377 val build_id = 378 { 379 val prefix = proper_string(host) orElse proper_string(engine) getOrElse "build" 380 prefix + ":" + start.time.ms 381 } 382 val build_engine = if (engine == "") Nil else List(Prop.build_engine.name -> engine) 383 val build_host = if (host == "") Nil else List(Prop.build_host.name -> host) 384 385 val start_date = List(Prop.build_start.name -> print_date(start)) 386 val end_date = 387 log_file.lines.last match { 388 case End(log_file.Strict_Date(end_date)) => 389 List(Prop.build_end.name -> print_date(end_date)) 390 case _ => Nil 391 } 392 393 val isabelle_version = 394 log_file.find_match(Isabelle_Version).map(Prop.isabelle_version.name -> _) 395 val afp_version = 396 log_file.find_match(AFP_Version).map(Prop.afp_version.name -> _) 397 398 Meta_Info((Prop.build_id.name -> build_id) :: build_engine ::: build_host ::: 399 start_date ::: end_date ::: isabelle_version.toList ::: afp_version.toList, 400 log_file.get_all_settings) 401 } 402 403 log_file.lines match { 404 case line :: _ if line.startsWith(Build_History.META_INFO_MARKER) => 405 Meta_Info(log_file.find_props(Build_History.META_INFO_MARKER).get, 406 log_file.get_all_settings) 407 408 case Identify.Start(log_file.Strict_Date(start)) :: _ => 409 parse(Identify.engine(log_file), "", start, Identify.No_End, 410 Identify.Isabelle_Version, Identify.AFP_Version) 411 412 case Isatest.Start(log_file.Strict_Date(start), host) :: _ => 413 parse(Isatest.engine, host, start, Isatest.End, 414 Isatest.Isabelle_Version, Nil) 415 416 case AFP_Test.Start(log_file.Strict_Date(start), host) :: _ => 417 parse(AFP_Test.engine, host, start, AFP_Test.End, 418 AFP_Test.Isabelle_Version, AFP_Test.AFP_Version) 419 420 case AFP_Test.Start_Old(log_file.Strict_Date(start)) :: _ => 421 parse(AFP_Test.engine, "", start, AFP_Test.End, 422 AFP_Test.Isabelle_Version, AFP_Test.AFP_Version) 423 424 case Jenkins.Start() :: _ => 425 log_file.lines.dropWhile(_ != Jenkins.BUILD) match { 426 case Jenkins.BUILD :: _ :: Jenkins.Start_Date(log_file.Strict_Date(start)) :: _ => 427 val host = 428 log_file.lines.takeWhile(_ != Jenkins.CONFIGURATION).collectFirst({ 429 case Jenkins.Host(a, b) => a + "." + b 430 }).getOrElse("") 431 parse(Jenkins.engine, host, start.to(Date.timezone_berlin), Jenkins.No_End, 432 Jenkins.Isabelle_Version, Jenkins.AFP_Version) 433 case _ => Meta_Info.empty 434 } 435 436 case line :: _ if line.startsWith("\u0000") => Meta_Info.empty 437 case List(Isatest.End(_)) => Meta_Info.empty 438 case _ :: AFP_Test.Bad_Init() :: _ => Meta_Info.empty 439 case Nil => Meta_Info.empty 440 441 case _ => log_file.err("cannot detect log file format") 442 } 443 } 444 445 446 447 /** build info: toplevel output of isabelle build or Admin/build_history **/ 448 449 val THEORY_TIMING_MARKER = "\ftheory_timing = " 450 val ML_STATISTICS_MARKER = "\fML_statistics = " 451 val ERROR_MESSAGE_MARKER = "\ferror_message = " 452 val SESSION_NAME = "session_name" 453 454 object Session_Status extends Enumeration 455 { 456 val existing, finished, failed, cancelled = Value 457 } 458 459 sealed case class Session_Entry( 460 chapter: String = "", 461 groups: List[String] = Nil, 462 threads: Option[Int] = None, 463 timing: Timing = Timing.zero, 464 ml_timing: Timing = Timing.zero, 465 sources: Option[String] = None, 466 heap_size: Option[Long] = None, 467 status: Option[Session_Status.Value] = None, 468 errors: List[String] = Nil, 469 theory_timings: Map[String, Timing] = Map.empty, 470 ml_statistics: List[Properties.T] = Nil) 471 { 472 def proper_groups: Option[String] = if (groups.isEmpty) None else Some(cat_lines(groups)) 473 def finished: Boolean = status == Some(Session_Status.finished) 474 def failed: Boolean = status == Some(Session_Status.failed) 475 } 476 477 object Build_Info 478 { 479 val sessions_dummy: Map[String, Session_Entry] = 480 Map("" -> Session_Entry(theory_timings = Map("" -> Timing.zero))) 481 } 482 483 sealed case class Build_Info(sessions: Map[String, Session_Entry]) 484 { 485 def finished_sessions: List[String] = for ((a, b) <- sessions.toList if b.finished) yield a 486 def failed_sessions: List[String] = for ((a, b) <- sessions.toList if b.failed) yield a 487 } 488 489 private def parse_build_info(log_file: Log_File, parse_ml_statistics: Boolean): Build_Info = 490 { 491 object Chapter_Name 492 { 493 def unapply(s: String): Some[(String, String)] = 494 space_explode('/', s) match { 495 case List(chapter, name) => Some((chapter, name)) 496 case _ => Some(("", s)) 497 } 498 } 499 500 val Session_No_Groups = new Regex("""^Session (\S+)$""") 501 val Session_Groups = new Regex("""^Session (\S+) \((.*)\)$""") 502 val Session_Finished1 = 503 new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time, (\d+):(\d+):(\d+) cpu time.*$""") 504 val Session_Finished2 = 505 new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time.*$""") 506 val Session_Timing = 507 new Regex("""^Timing (\S+) \((\d+) threads, (\d+\.\d+)s elapsed time, (\d+\.\d+)s cpu time, (\d+\.\d+)s GC time.*$""") 508 val Session_Started = new Regex("""^(?:Running|Building) (\S+) \.\.\.$""") 509 val Session_Failed = new Regex("""^(\S+) FAILED""") 510 val Session_Cancelled = new Regex("""^(\S+) CANCELLED""") 511 val Sources = new Regex("""^Sources (\S+) (\S{""" + SHA1.digest_length + """})$""") 512 val Heap = new Regex("""^Heap (\S+) \((\d+) bytes\)$""") 513 514 object Theory_Timing 515 { 516 def unapply(line: String): Option[(String, (String, Timing))] = 517 { 518 val line1 = line.replace('~', '-') 519 Library.try_unprefix(THEORY_TIMING_MARKER, line1).map(log_file.parse_props(_)) match { 520 case Some((SESSION_NAME, name) :: props) => 521 (props, props) match { 522 case (Markup.Name(thy), Markup.Timing_Properties(t)) => Some((name, thy -> t)) 523 case _ => None 524 } 525 case _ => None 526 } 527 } 528 } 529 530 var chapter = Map.empty[String, String] 531 var groups = Map.empty[String, List[String]] 532 var threads = Map.empty[String, Int] 533 var timing = Map.empty[String, Timing] 534 var ml_timing = Map.empty[String, Timing] 535 var started = Set.empty[String] 536 var failed = Set.empty[String] 537 var cancelled = Set.empty[String] 538 var sources = Map.empty[String, String] 539 var heap_sizes = Map.empty[String, Long] 540 var theory_timings = Map.empty[String, Map[String, Timing]] 541 var ml_statistics = Map.empty[String, List[Properties.T]] 542 var errors = Map.empty[String, List[String]] 543 544 def all_sessions: Set[String] = 545 chapter.keySet ++ groups.keySet ++ threads.keySet ++ timing.keySet ++ ml_timing.keySet ++ 546 failed ++ cancelled ++ started ++ sources.keySet ++ heap_sizes.keySet ++ 547 theory_timings.keySet ++ ml_statistics.keySet 548 549 550 for (line <- log_file.lines) { 551 line match { 552 case Session_No_Groups(Chapter_Name(chapt, name)) => 553 chapter += (name -> chapt) 554 groups += (name -> Nil) 555 556 case Session_Groups(Chapter_Name(chapt, name), grps) => 557 chapter += (name -> chapt) 558 groups += (name -> Word.explode(grps)) 559 560 case Session_Started(name) => 561 started += name 562 563 case Session_Finished1(name, 564 Value.Int(e1), Value.Int(e2), Value.Int(e3), 565 Value.Int(c1), Value.Int(c2), Value.Int(c3)) => 566 val elapsed = Time.hms(e1, e2, e3) 567 val cpu = Time.hms(c1, c2, c3) 568 timing += (name -> Timing(elapsed, cpu, Time.zero)) 569 570 case Session_Finished2(name, 571 Value.Int(e1), Value.Int(e2), Value.Int(e3)) => 572 val elapsed = Time.hms(e1, e2, e3) 573 timing += (name -> Timing(elapsed, Time.zero, Time.zero)) 574 575 case Session_Timing(name, 576 Value.Int(t), Value.Double(e), Value.Double(c), Value.Double(g)) => 577 val elapsed = Time.seconds(e) 578 val cpu = Time.seconds(c) 579 val gc = Time.seconds(g) 580 ml_timing += (name -> Timing(elapsed, cpu, gc)) 581 threads += (name -> t) 582 583 case Sources(name, s) => 584 sources += (name -> s) 585 586 case Heap(name, Value.Long(size)) => 587 heap_sizes += (name -> size) 588 589 case _ if line.startsWith(THEORY_TIMING_MARKER) && YXML.detect(line) => 590 line match { 591 case Theory_Timing(name, theory_timing) => 592 theory_timings += (name -> (theory_timings.getOrElse(name, Map.empty) + theory_timing)) 593 case _ => log_file.err("malformed theory_timing " + quote(line)) 594 } 595 596 case _ if parse_ml_statistics && line.startsWith(ML_STATISTICS_MARKER) && YXML.detect(line) => 597 Library.try_unprefix(ML_STATISTICS_MARKER, line).map(log_file.parse_props(_)) match { 598 case Some((SESSION_NAME, name) :: props) => 599 ml_statistics += (name -> (props :: ml_statistics.getOrElse(name, Nil))) 600 case _ => log_file.err("malformed ML_statistics " + quote(line)) 601 } 602 603 case _ if line.startsWith(ERROR_MESSAGE_MARKER) && YXML.detect(line) => 604 Library.try_unprefix(ERROR_MESSAGE_MARKER, line).map(log_file.parse_props(_)) match { 605 case Some(List((SESSION_NAME, name), (Markup.CONTENT, msg))) => 606 errors += (name -> (Library.decode_lines(msg) :: errors.getOrElse(name, Nil))) 607 case _ => log_file.err("malformed error message " + quote(line)) 608 } 609 610 case _ => 611 } 612 } 613 614 val sessions = 615 Map( 616 (for (name <- all_sessions.toList) yield { 617 val status = 618 if (failed(name)) Session_Status.failed 619 else if (cancelled(name)) Session_Status.cancelled 620 else if (timing.isDefinedAt(name) || ml_timing.isDefinedAt(name)) 621 Session_Status.finished 622 else if (started(name)) Session_Status.failed 623 else Session_Status.existing 624 val entry = 625 Session_Entry( 626 chapter = chapter.getOrElse(name, ""), 627 groups = groups.getOrElse(name, Nil), 628 threads = threads.get(name), 629 timing = timing.getOrElse(name, Timing.zero), 630 ml_timing = ml_timing.getOrElse(name, Timing.zero), 631 sources = sources.get(name), 632 heap_size = heap_sizes.get(name), 633 status = Some(status), 634 errors = errors.getOrElse(name, Nil).reverse, 635 theory_timings = theory_timings.getOrElse(name, Map.empty), 636 ml_statistics = ml_statistics.getOrElse(name, Nil).reverse) 637 (name -> entry) 638 }):_*) 639 Build_Info(sessions) 640 } 641 642 643 644 /** session info: produced by isabelle build as session log.gz file **/ 645 646 sealed case class Session_Info( 647 session_timing: Properties.T, 648 command_timings: List[Properties.T], 649 theory_timings: List[Properties.T], 650 ml_statistics: List[Properties.T], 651 task_statistics: List[Properties.T], 652 errors: List[String]) 653 { 654 def error(s: String): Session_Info = 655 copy(errors = errors ::: List(s)) 656 } 657 658 private def parse_session_info( 659 log_file: Log_File, 660 command_timings: Boolean, 661 theory_timings: Boolean, 662 ml_statistics: Boolean, 663 task_statistics: Boolean): Session_Info = 664 { 665 Session_Info( 666 session_timing = log_file.find_props("\fTiming = ") getOrElse Nil, 667 command_timings = if (command_timings) log_file.filter_props("\fcommand_timing = ") else Nil, 668 theory_timings = if (theory_timings) log_file.filter_props(THEORY_TIMING_MARKER) else Nil, 669 ml_statistics = if (ml_statistics) log_file.filter_props(ML_STATISTICS_MARKER) else Nil, 670 task_statistics = if (task_statistics) log_file.filter_props("\ftask_statistics = ") else Nil, 671 errors = log_file.filter_lines(ERROR_MESSAGE_MARKER).map(Library.decode_lines(_))) 672 } 673 674 def compress_errors(errors: List[String], cache: XZ.Cache = XZ.cache()): Option[Bytes] = 675 if (errors.isEmpty) None 676 else { 677 Some(Bytes(YXML.string_of_body(XML.Encode.list(XML.Encode.string)(errors))). 678 compress(cache = cache)) 679 } 680 681 def uncompress_errors(bytes: Bytes, cache: XZ.Cache = XZ.cache()): List[String] = 682 if (bytes.isEmpty) Nil 683 else { 684 XML.Decode.list(YXML.string_of_body(_))(YXML.parse_body(bytes.uncompress(cache = cache).text)) 685 } 686 687 688 689 /** persistent store **/ 690 691 /* SQL data model */ 692 693 object Data 694 { 695 def build_log_table(name: String, columns: List[SQL.Column], body: String = ""): SQL.Table = 696 SQL.Table("isabelle_build_log_" + name, columns, body) 697 698 699 /* main content */ 700 701 val log_name = SQL.Column.string("log_name").make_primary_key 702 val session_name = SQL.Column.string("session_name").make_primary_key 703 val theory_name = SQL.Column.string("theory_name").make_primary_key 704 val chapter = SQL.Column.string("chapter") 705 val groups = SQL.Column.string("groups") 706 val threads = SQL.Column.int("threads") 707 val timing_elapsed = SQL.Column.long("timing_elapsed") 708 val timing_cpu = SQL.Column.long("timing_cpu") 709 val timing_gc = SQL.Column.long("timing_gc") 710 val timing_factor = SQL.Column.double("timing_factor") 711 val ml_timing_elapsed = SQL.Column.long("ml_timing_elapsed") 712 val ml_timing_cpu = SQL.Column.long("ml_timing_cpu") 713 val ml_timing_gc = SQL.Column.long("ml_timing_gc") 714 val ml_timing_factor = SQL.Column.double("ml_timing_factor") 715 val theory_timing_elapsed = SQL.Column.long("theory_timing_elapsed") 716 val theory_timing_cpu = SQL.Column.long("theory_timing_cpu") 717 val theory_timing_gc = SQL.Column.long("theory_timing_gc") 718 val heap_size = SQL.Column.long("heap_size") 719 val status = SQL.Column.string("status") 720 val errors = SQL.Column.bytes("errors") 721 val sources = SQL.Column.string("sources") 722 val ml_statistics = SQL.Column.bytes("ml_statistics") 723 val known = SQL.Column.bool("known") 724 725 val meta_info_table = 726 build_log_table("meta_info", log_name :: Prop.all_props ::: Settings.all_settings) 727 728 val sessions_table = 729 build_log_table("sessions", 730 List(log_name, session_name, chapter, groups, threads, timing_elapsed, timing_cpu, 731 timing_gc, timing_factor, ml_timing_elapsed, ml_timing_cpu, ml_timing_gc, ml_timing_factor, 732 heap_size, status, errors, sources)) 733 734 val theories_table = 735 build_log_table("theories", 736 List(log_name, session_name, theory_name, theory_timing_elapsed, theory_timing_cpu, 737 theory_timing_gc)) 738 739 val ml_statistics_table = 740 build_log_table("ml_statistics", List(log_name, session_name, ml_statistics)) 741 742 743 /* AFP versions */ 744 745 val isabelle_afp_versions_table: SQL.Table = 746 { 747 val version1 = Prop.isabelle_version 748 val version2 = Prop.afp_version 749 build_log_table("isabelle_afp_versions", List(version1.make_primary_key, version2), 750 SQL.select(List(version1, version2), distinct = true) + meta_info_table + 751 " WHERE " + version1.defined + " AND " + version2.defined) 752 } 753 754 755 /* earliest pull date for repository version (PostgreSQL queries) */ 756 757 def pull_date(afp: Boolean = false) = 758 if (afp) SQL.Column.date("afp_pull_date") 759 else SQL.Column.date("pull_date") 760 761 def pull_date_table(afp: Boolean = false): SQL.Table = 762 { 763 val (name, versions) = 764 if (afp) ("afp_pull_date", List(Prop.isabelle_version, Prop.afp_version)) 765 else ("pull_date", List(Prop.isabelle_version)) 766 767 build_log_table(name, versions.map(_.make_primary_key) ::: List(pull_date(afp)), 768 "SELECT " + versions.mkString(", ") + 769 ", min(" + Prop.build_start + ") AS " + pull_date(afp) + 770 " FROM " + meta_info_table + 771 " WHERE " + (versions ::: List(Prop.build_start)).map(_.defined).mkString(" AND ") + 772 " GROUP BY " + versions.mkString(", ")) 773 } 774 775 776 /* recent entries */ 777 778 def recent_time(days: Int): SQL.Source = 779 "now() - INTERVAL '" + days.max(0) + " days'" 780 781 def recent_pull_date_table( 782 days: Int, rev: String = "", afp_rev: Option[String] = None): SQL.Table = 783 { 784 val afp = afp_rev.isDefined 785 val rev2 = afp_rev.getOrElse("") 786 val table = pull_date_table(afp) 787 788 val version1 = Prop.isabelle_version 789 val version2 = Prop.afp_version 790 val eq1 = version1(table) + " = " + SQL.string(rev) 791 val eq2 = version2(table) + " = " + SQL.string(rev2) 792 793 SQL.Table("recent_pull_date", table.columns, 794 table.select(table.columns, 795 "WHERE " + pull_date(afp)(table) + " > " + recent_time(days) + 796 (if (rev != "" && rev2 == "") " OR " + eq1 797 else if (rev == "" && rev2 != "") " OR " + eq2 798 else if (rev != "" && rev2 != "") " OR (" + eq1 + " AND " + eq2 + ")" 799 else ""))) 800 } 801 802 def select_recent_log_names(days: Int): SQL.Source = 803 { 804 val table1 = meta_info_table 805 val table2 = recent_pull_date_table(days) 806 table1.select(List(log_name), distinct = true) + SQL.join_inner + table2.query_named + 807 " ON " + Prop.isabelle_version(table1) + " = " + Prop.isabelle_version(table2) 808 } 809 810 def select_recent_versions(days: Int, 811 rev: String = "", afp_rev: Option[String] = None, sql: SQL.Source = ""): SQL.Source = 812 { 813 val afp = afp_rev.isDefined 814 val version = Prop.isabelle_version 815 val table1 = recent_pull_date_table(days, rev = rev, afp_rev = afp_rev) 816 val table2 = meta_info_table 817 val aux_table = SQL.Table("aux", table2.columns, table2.select(sql = sql)) 818 819 val columns = 820 table1.columns.map(c => c(table1)) ::: 821 List(known.copy(expr = log_name(aux_table).defined)) 822 SQL.select(columns, distinct = true) + 823 table1.query_named + SQL.join_outer + aux_table.query_named + 824 " ON " + version(table1) + " = " + version(aux_table) + 825 " ORDER BY " + pull_date(afp)(table1) + " DESC" 826 } 827 828 829 /* universal view on main data */ 830 831 val universal_table: SQL.Table = 832 { 833 val afp_pull_date = pull_date(afp = true) 834 val version1 = Prop.isabelle_version 835 val version2 = Prop.afp_version 836 val table1 = meta_info_table 837 val table2 = pull_date_table(afp = true) 838 val table3 = pull_date_table() 839 840 val a_columns = log_name :: afp_pull_date :: table1.columns.tail 841 val a_table = 842 SQL.Table("a", a_columns, 843 SQL.select(List(log_name, afp_pull_date) ::: table1.columns.tail.map(_.apply(table1))) + 844 table1 + SQL.join_outer + table2 + 845 " ON " + version1(table1) + " = " + version1(table2) + 846 " AND " + version2(table1) + " = " + version2(table2)) 847 848 val b_columns = log_name :: pull_date() :: a_columns.tail 849 val b_table = 850 SQL.Table("b", b_columns, 851 SQL.select( 852 List(log_name(a_table), pull_date()(table3)) ::: a_columns.tail.map(_.apply(a_table))) + 853 a_table.query_named + SQL.join_outer + table3 + 854 " ON " + version1(a_table) + " = " + version1(table3)) 855 856 val c_columns = b_columns ::: sessions_table.columns.tail 857 val c_table = 858 SQL.Table("c", c_columns, 859 SQL.select(log_name(b_table) :: c_columns.tail) + 860 b_table.query_named + SQL.join_inner + sessions_table + 861 " ON " + log_name(b_table) + " = " + log_name(sessions_table)) 862 863 SQL.Table("isabelle_build_log", c_columns ::: List(ml_statistics), 864 { 865 SQL.select(c_columns.map(_.apply(c_table)) ::: List(ml_statistics)) + 866 c_table.query_named + SQL.join_outer + ml_statistics_table + 867 " ON " + log_name(c_table) + " = " + log_name(ml_statistics_table) + 868 " AND " + session_name(c_table) + " = " + session_name(ml_statistics_table) 869 }) 870 } 871 } 872 873 874 /* database access */ 875 876 def store(options: Options): Store = new Store(options) 877 878 class Store private[Build_Log](options: Options) 879 { 880 val xml_cache: XML.Cache = XML.make_cache() 881 val xz_cache: XZ.Cache = XZ.make_cache() 882 883 def open_database( 884 user: String = options.string("build_log_database_user"), 885 password: String = options.string("build_log_database_password"), 886 database: String = options.string("build_log_database_name"), 887 host: String = options.string("build_log_database_host"), 888 port: Int = options.int("build_log_database_port"), 889 ssh_host: String = options.string("build_log_ssh_host"), 890 ssh_user: String = options.string("build_log_ssh_user"), 891 ssh_port: Int = options.int("build_log_ssh_port")): PostgreSQL.Database = 892 { 893 PostgreSQL.open_database( 894 user = user, password = password, database = database, host = host, port = port, 895 ssh = 896 if (ssh_host == "") None 897 else Some(SSH.open_session(options, host = ssh_host, user = ssh_user, port = port)), 898 ssh_close = true) 899 } 900 901 def update_database(db: PostgreSQL.Database, dirs: List[Path], ml_statistics: Boolean = false) 902 { 903 val log_files = 904 dirs.flatMap(dir => 905 File.find_files(dir.file, pred = Log_File.is_log(_), follow_links = true)) 906 write_info(db, log_files, ml_statistics = ml_statistics) 907 908 db.create_view(Data.pull_date_table()) 909 db.create_view(Data.pull_date_table(afp = true)) 910 db.create_view(Data.universal_table) 911 } 912 913 def snapshot_database(db: PostgreSQL.Database, sqlite_database: Path, 914 days: Int = 100, ml_statistics: Boolean = false) 915 { 916 Isabelle_System.mkdirs(sqlite_database.dir) 917 sqlite_database.file.delete 918 919 using(SQLite.open_database(sqlite_database))(db2 => 920 { 921 db.transaction { 922 db2.transaction { 923 // main content 924 db2.create_table(Data.meta_info_table) 925 db2.create_table(Data.sessions_table) 926 db2.create_table(Data.theories_table) 927 db2.create_table(Data.ml_statistics_table) 928 929 val recent_log_names = 930 db.using_statement(Data.select_recent_log_names(days))(stmt => 931 stmt.execute_query().iterator(_.string(Data.log_name)).toList) 932 933 for (log_name <- recent_log_names) { 934 read_meta_info(db, log_name).foreach(meta_info => 935 update_meta_info(db2, log_name, meta_info)) 936 937 update_sessions(db2, log_name, read_build_info(db, log_name)) 938 939 if (ml_statistics) { 940 update_ml_statistics(db2, log_name, 941 read_build_info(db, log_name, ml_statistics = true)) 942 } 943 } 944 945 // pull_date 946 for (afp <- List(false, true)) 947 { 948 val afp_rev = if (afp) Some("") else None 949 val table = Data.pull_date_table(afp) 950 db2.create_table(table) 951 db2.using_statement(table.insert())(stmt2 => 952 { 953 db.using_statement( 954 Data.recent_pull_date_table(days, afp_rev = afp_rev).query)(stmt => 955 { 956 val res = stmt.execute_query() 957 while (res.next()) { 958 for ((c, i) <- table.columns.zipWithIndex) { 959 stmt2.string(i + 1) = res.get_string(c) 960 } 961 stmt2.execute() 962 } 963 }) 964 }) 965 } 966 967 // full view 968 db2.create_view(Data.universal_table) 969 } 970 } 971 db2.rebuild 972 }) 973 } 974 975 def domain(db: SQL.Database, table: SQL.Table, column: SQL.Column): Set[String] = 976 db.using_statement(table.select(List(column), distinct = true))(stmt => 977 stmt.execute_query().iterator(_.string(column)).toSet) 978 979 def update_meta_info(db: SQL.Database, log_name: String, meta_info: Meta_Info) 980 { 981 val table = Data.meta_info_table 982 db.using_statement(db.insert_permissive(table))(stmt => 983 { 984 stmt.string(1) = log_name 985 for ((c, i) <- table.columns.tail.zipWithIndex) { 986 if (c.T == SQL.Type.Date) 987 stmt.date(i + 2) = meta_info.get_date(c) 988 else 989 stmt.string(i + 2) = meta_info.get(c) 990 } 991 stmt.execute() 992 }) 993 } 994 995 def update_sessions(db: SQL.Database, log_name: String, build_info: Build_Info) 996 { 997 val table = Data.sessions_table 998 db.using_statement(db.insert_permissive(table))(stmt => 999 { 1000 val sessions = 1001 if (build_info.sessions.isEmpty) Build_Info.sessions_dummy 1002 else build_info.sessions 1003 for ((session_name, session) <- sessions) { 1004 stmt.string(1) = log_name 1005 stmt.string(2) = session_name 1006 stmt.string(3) = proper_string(session.chapter) 1007 stmt.string(4) = session.proper_groups 1008 stmt.int(5) = session.threads 1009 stmt.long(6) = session.timing.elapsed.proper_ms 1010 stmt.long(7) = session.timing.cpu.proper_ms 1011 stmt.long(8) = session.timing.gc.proper_ms 1012 stmt.double(9) = session.timing.factor 1013 stmt.long(10) = session.ml_timing.elapsed.proper_ms 1014 stmt.long(11) = session.ml_timing.cpu.proper_ms 1015 stmt.long(12) = session.ml_timing.gc.proper_ms 1016 stmt.double(13) = session.ml_timing.factor 1017 stmt.long(14) = session.heap_size 1018 stmt.string(15) = session.status.map(_.toString) 1019 stmt.bytes(16) = compress_errors(session.errors, cache = xz_cache) 1020 stmt.string(17) = session.sources 1021 stmt.execute() 1022 } 1023 }) 1024 } 1025 1026 def update_theories(db: SQL.Database, log_name: String, build_info: Build_Info) 1027 { 1028 val table = Data.theories_table 1029 db.using_statement(db.insert_permissive(table))(stmt => 1030 { 1031 val sessions = 1032 if (build_info.sessions.forall({ case (_, session) => session.theory_timings.isEmpty })) 1033 Build_Info.sessions_dummy 1034 else build_info.sessions 1035 for { 1036 (session_name, session) <- sessions 1037 (theory_name, timing) <- session.theory_timings 1038 } { 1039 stmt.string(1) = log_name 1040 stmt.string(2) = session_name 1041 stmt.string(3) = theory_name 1042 stmt.long(4) = timing.elapsed.ms 1043 stmt.long(5) = timing.cpu.ms 1044 stmt.long(6) = timing.gc.ms 1045 stmt.execute() 1046 } 1047 }) 1048 } 1049 1050 def update_ml_statistics(db: SQL.Database, log_name: String, build_info: Build_Info) 1051 { 1052 val table = Data.ml_statistics_table 1053 db.using_statement(db.insert_permissive(table))(stmt => 1054 { 1055 val ml_stats: List[(String, Option[Bytes])] = 1056 Par_List.map[(String, Session_Entry), (String, Option[Bytes])]( 1057 { case (a, b) => (a, Properties.compress(b.ml_statistics, cache = xz_cache).proper) }, 1058 build_info.sessions.iterator.filter(p => p._2.ml_statistics.nonEmpty).toList) 1059 val entries = if (ml_stats.nonEmpty) ml_stats else List("" -> None) 1060 for ((session_name, ml_statistics) <- entries) { 1061 stmt.string(1) = log_name 1062 stmt.string(2) = session_name 1063 stmt.bytes(3) = ml_statistics 1064 stmt.execute() 1065 } 1066 }) 1067 } 1068 1069 def write_info(db: SQL.Database, files: List[JFile], ml_statistics: Boolean = false) 1070 { 1071 abstract class Table_Status(table: SQL.Table) 1072 { 1073 db.create_table(table) 1074 private var known: Set[String] = domain(db, table, Data.log_name) 1075 1076 def required(file: JFile): Boolean = !known(Log_File.plain_name(file.getName)) 1077 1078 def update_db(db: SQL.Database, log_file: Log_File): Unit 1079 def update(log_file: Log_File) 1080 { 1081 if (!known(log_file.name)) { 1082 update_db(db, log_file) 1083 known += log_file.name 1084 } 1085 } 1086 } 1087 val status = 1088 List( 1089 new Table_Status(Data.meta_info_table) { 1090 override def update_db(db: SQL.Database, log_file: Log_File): Unit = 1091 update_meta_info(db, log_file.name, log_file.parse_meta_info()) 1092 }, 1093 new Table_Status(Data.sessions_table) { 1094 override def update_db(db: SQL.Database, log_file: Log_File): Unit = 1095 update_sessions(db, log_file.name, log_file.parse_build_info()) 1096 }, 1097 new Table_Status(Data.theories_table) { 1098 override def update_db(db: SQL.Database, log_file: Log_File): Unit = 1099 update_theories(db, log_file.name, log_file.parse_build_info()) 1100 }, 1101 new Table_Status(Data.ml_statistics_table) { 1102 override def update_db(db: SQL.Database, log_file: Log_File): Unit = 1103 if (ml_statistics) { 1104 update_ml_statistics(db, log_file.name, 1105 log_file.parse_build_info(ml_statistics = true)) 1106 } 1107 }) 1108 1109 for (file_group <- 1110 files.filter(file => status.exists(_.required(file))). 1111 grouped(options.int("build_log_transaction_size") max 1)) 1112 { 1113 val log_files = Par_List.map[JFile, Log_File](Log_File.apply _, file_group) 1114 db.transaction { log_files.foreach(log_file => status.foreach(_.update(log_file))) } 1115 } 1116 } 1117 1118 def read_meta_info(db: SQL.Database, log_name: String): Option[Meta_Info] = 1119 { 1120 val table = Data.meta_info_table 1121 val columns = table.columns.tail 1122 db.using_statement(table.select(columns, Data.log_name.where_equal(log_name)))(stmt => 1123 { 1124 val res = stmt.execute_query() 1125 if (!res.next) None 1126 else { 1127 val results = 1128 columns.map(c => c.name -> 1129 (if (c.T == SQL.Type.Date) 1130 res.get_date(c).map(Log_File.Date_Format(_)) 1131 else 1132 res.get_string(c))) 1133 val n = Prop.all_props.length 1134 val props = for ((x, Some(y)) <- results.take(n)) yield (x, y) 1135 val settings = for ((x, Some(y)) <- results.drop(n)) yield (x, y) 1136 Some(Meta_Info(props, settings)) 1137 } 1138 }) 1139 } 1140 1141 def read_build_info( 1142 db: SQL.Database, 1143 log_name: String, 1144 session_names: List[String] = Nil, 1145 ml_statistics: Boolean = false): Build_Info = 1146 { 1147 val table1 = Data.sessions_table 1148 val table2 = Data.ml_statistics_table 1149 1150 val where_log_name = 1151 Data.log_name(table1).where_equal(log_name) + " AND " + 1152 Data.session_name(table1) + " <> ''" 1153 val where = 1154 if (session_names.isEmpty) where_log_name 1155 else where_log_name + " AND " + SQL.member(Data.session_name(table1).ident, session_names) 1156 1157 val columns1 = table1.columns.tail.map(_.apply(table1)) 1158 val (columns, from) = 1159 if (ml_statistics) { 1160 val columns = columns1 ::: List(Data.ml_statistics(table2)) 1161 val join = 1162 table1 + SQL.join_outer + table2 + " ON " + 1163 Data.log_name(table1) + " = " + Data.log_name(table2) + " AND " + 1164 Data.session_name(table1) + " = " + Data.session_name(table2) 1165 (columns, SQL.enclose(join)) 1166 } 1167 else (columns1, table1.ident) 1168 1169 val sessions = 1170 db.using_statement(SQL.select(columns) + from + " " + where)(stmt => 1171 { 1172 stmt.execute_query().iterator(res => 1173 { 1174 val session_name = res.string(Data.session_name) 1175 val session_entry = 1176 Session_Entry( 1177 chapter = res.string(Data.chapter), 1178 groups = split_lines(res.string(Data.groups)), 1179 threads = res.get_int(Data.threads), 1180 timing = res.timing(Data.timing_elapsed, Data.timing_cpu, Data.timing_gc), 1181 ml_timing = 1182 res.timing(Data.ml_timing_elapsed, Data.ml_timing_cpu, Data.ml_timing_gc), 1183 sources = res.get_string(Data.sources), 1184 heap_size = res.get_long(Data.heap_size), 1185 status = res.get_string(Data.status).map(Session_Status.withName(_)), 1186 errors = uncompress_errors(res.bytes(Data.errors), cache = xz_cache), 1187 ml_statistics = 1188 if (ml_statistics) { 1189 Properties.uncompress( 1190 res.bytes(Data.ml_statistics), cache = xz_cache, Some(xml_cache)) 1191 } 1192 else Nil) 1193 session_name -> session_entry 1194 }).toMap 1195 }) 1196 Build_Info(sessions) 1197 } 1198 } 1199} 1200