1/*  Title:      Pure/Admin/build_log.scala
2    Author:     Makarius
3
4Management of build log files and database storage.
5*/
6
7package isabelle
8
9
10import java.io.{File => JFile}
11import java.time.format.{DateTimeFormatter, DateTimeParseException}
12import java.util.Locale
13
14import scala.collection.immutable.SortedMap
15import scala.collection.mutable
16import scala.util.matching.Regex
17
18
19object Build_Log
20{
21  /** content **/
22
23  /* properties */
24
25  object Prop
26  {
27    val build_tags = SQL.Column.string("build_tags")  // lines
28    val build_args = SQL.Column.string("build_args")  // lines
29    val build_group_id = SQL.Column.string("build_group_id")
30    val build_id = SQL.Column.string("build_id")
31    val build_engine = SQL.Column.string("build_engine")
32    val build_host = SQL.Column.string("build_host")
33    val build_start = SQL.Column.date("build_start")
34    val build_end = SQL.Column.date("build_end")
35    val isabelle_version = SQL.Column.string("isabelle_version")
36    val afp_version = SQL.Column.string("afp_version")
37
38    val all_props: List[SQL.Column] =
39      List(build_tags, build_args, build_group_id, build_id, build_engine,
40        build_host, build_start, build_end, isabelle_version, afp_version)
41  }
42
43
44  /* settings */
45
46  object Settings
47  {
48    val ISABELLE_BUILD_OPTIONS = SQL.Column.string("ISABELLE_BUILD_OPTIONS")
49    val ML_PLATFORM = SQL.Column.string("ML_PLATFORM")
50    val ML_HOME = SQL.Column.string("ML_HOME")
51    val ML_SYSTEM = SQL.Column.string("ML_SYSTEM")
52    val ML_OPTIONS = SQL.Column.string("ML_OPTIONS")
53
54    val ml_settings = List(ML_PLATFORM, ML_HOME, ML_SYSTEM, ML_OPTIONS)
55    val all_settings = ISABELLE_BUILD_OPTIONS :: ml_settings
56
57    type Entry = (String, String)
58    type T = List[Entry]
59
60    object Entry
61    {
62      def unapply(s: String): Option[Entry] =
63        s.indexOf('=') match {
64          case -1 => None
65          case i =>
66            val a = s.substring(0, i)
67            val b = Library.perhaps_unquote(s.substring(i + 1))
68            Some((a, b))
69        }
70      def apply(a: String, b: String): String = a + "=" + quote(b)
71      def getenv(a: String): String = apply(a, Isabelle_System.getenv(a))
72    }
73
74    def show(): String =
75      cat_lines(
76        List(Entry.getenv(ISABELLE_BUILD_OPTIONS.name), "") :::
77        ml_settings.map(c => Entry.getenv(c.name)))
78  }
79
80
81  /* file names */
82
83  def log_date(date: Date): String =
84    String.format(Locale.ROOT, "%s.%05d",
85      DateTimeFormatter.ofPattern("yyyy-MM-dd").format(date.rep),
86      java.lang.Long.valueOf((date.time - date.midnight.time).ms / 1000))
87
88  def log_subdir(date: Date): Path =
89    Path.explode("log") + Path.explode(date.rep.getYear.toString)
90
91  def log_filename(engine: String, date: Date, more: List[String] = Nil): Path =
92    Path.explode((engine :: log_date(date) :: more).mkString("", "_", ".log"))
93
94
95
96  /** log file **/
97
98  def print_date(date: Date): String = Log_File.Date_Format(date)
99
100  object Log_File
101  {
102    /* log file */
103
104    def plain_name(name: String): String =
105    {
106      List(".log", ".log.gz", ".log.xz", ".gz", ".xz").find(name.endsWith(_)) match {
107        case Some(s) => Library.try_unsuffix(s, name).get
108        case None => name
109      }
110    }
111
112    def apply(name: String, lines: List[String]): Log_File =
113      new Log_File(plain_name(name), lines)
114
115    def apply(name: String, text: String): Log_File =
116      Log_File(name, Library.trim_split_lines(text))
117
118    def apply(file: JFile): Log_File =
119    {
120      val name = file.getName
121      val text =
122        if (name.endsWith(".gz")) File.read_gzip(file)
123        else if (name.endsWith(".xz")) File.read_xz(file)
124        else File.read(file)
125      apply(name, text)
126    }
127
128    def apply(path: Path): Log_File = apply(path.file)
129
130
131    /* log file collections */
132
133    def is_log(file: JFile,
134      prefixes: List[String] =
135        List(Build_History.log_prefix, Identify.log_prefix, Identify.log_prefix2,
136          Isatest.log_prefix, AFP_Test.log_prefix, Jenkins.log_prefix),
137      suffixes: List[String] = List(".log", ".log.gz", ".log.xz")): Boolean =
138    {
139      val name = file.getName
140
141      prefixes.exists(name.startsWith(_)) &&
142      suffixes.exists(name.endsWith(_)) &&
143      name != "isatest.log" &&
144      name != "afp-test.log" &&
145      name != "main.log"
146    }
147
148
149    /* date format */
150
151    val Date_Format =
152    {
153      val fmts =
154        Date.Formatter.variants(
155          List("EEE MMM d HH:mm:ss O yyyy", "EEE MMM d HH:mm:ss VV yyyy"),
156          List(Locale.ENGLISH, Locale.GERMAN)) :::
157        List(
158          DateTimeFormatter.RFC_1123_DATE_TIME,
159          Date.Formatter.pattern("EEE MMM d HH:mm:ss yyyy").withZone(Date.timezone_berlin))
160
161      def tune_timezone(s: String): String =
162        s match {
163          case "CET" | "MET" => "GMT+1"
164          case "CEST" | "MEST" => "GMT+2"
165          case "EST" => "Europe/Berlin"
166          case _ => s
167        }
168      def tune_weekday(s: String): String =
169        s match {
170          case "Die" => "Di"
171          case "Mit" => "Mi"
172          case "Don" => "Do"
173          case "Fre" => "Fr"
174          case "Sam" => "Sa"
175          case "Son" => "So"
176          case _ => s
177        }
178
179      def tune(s: String): String =
180        Word.implode(
181          Word.explode(s) match {
182            case a :: "M\uFFFDr" :: bs => tune_weekday(a) :: "M��r" :: bs.map(tune_timezone(_))
183            case a :: bs => tune_weekday(a) :: bs.map(tune_timezone(_))
184            case Nil => Nil
185          }
186        )
187
188      Date.Format.make(fmts, tune)
189    }
190
191
192    /* inlined content */
193
194    def print_props(marker: String, props: Properties.T): String =
195      marker + YXML.string_of_body(XML.Encode.properties(Properties.encode_lines(props)))
196  }
197
198  class Log_File private(val name: String, val lines: List[String])
199  {
200    log_file =>
201
202    override def toString: String = name
203
204    def text: String = cat_lines(lines)
205
206    def err(msg: String): Nothing =
207      error("Error in log file " + quote(name) + ": " + msg)
208
209
210    /* date format */
211
212    object Strict_Date
213    {
214      def unapply(s: String): Some[Date] =
215        try { Some(Log_File.Date_Format.parse(s)) }
216        catch { case exn: DateTimeParseException => log_file.err(exn.getMessage) }
217    }
218
219
220    /* inlined content */
221
222    def find[A](f: String => Option[A]): Option[A] =
223      lines.iterator.map(f).find(_.isDefined).map(_.get)
224
225    def find_line(marker: String): Option[String] =
226      find(Library.try_unprefix(marker, _))
227
228    def find_match(regexes: List[Regex]): Option[String] =
229      regexes match {
230        case Nil => None
231        case regex :: rest =>
232          lines.iterator.map(regex.unapplySeq(_)).find(res => res.isDefined && res.get.length == 1).
233            map(res => res.get.head) orElse find_match(rest)
234      }
235
236
237    /* settings */
238
239    def get_setting(a: String): Option[Settings.Entry] =
240      lines.find(_.startsWith(a + "=")) match {
241        case Some(line) => Settings.Entry.unapply(line)
242        case None => None
243      }
244
245    def get_all_settings: Settings.T =
246      for { c <- Settings.all_settings; entry <- get_setting(c.name) }
247      yield entry
248
249
250    /* properties (YXML) */
251
252    val xml_cache = XML.make_cache()
253
254    def parse_props(text: String): Properties.T =
255      try {
256        xml_cache.props(Properties.decode_lines(XML.Decode.properties(YXML.parse_body(text))))
257      }
258      catch { case _: XML.Error => log_file.err("malformed properties") }
259
260    def filter_lines(marker: String): List[String] =
261      for (line <- lines; s <- Library.try_unprefix(marker, line)) yield s
262
263    def filter_props(marker: String): List[Properties.T] =
264      for (s <- filter_lines(marker) if YXML.detect(s)) yield parse_props(s)
265
266    def find_props(marker: String): Option[Properties.T] =
267      find_line(marker) match {
268        case Some(text) if YXML.detect(text) => Some(parse_props(text))
269        case _ => None
270      }
271
272
273    /* parse various formats */
274
275    def parse_meta_info(): Meta_Info = Build_Log.parse_meta_info(log_file)
276
277    def parse_build_info(ml_statistics: Boolean = false): Build_Info =
278      Build_Log.parse_build_info(log_file, ml_statistics)
279
280    def parse_session_info(
281        command_timings: Boolean = false,
282        theory_timings: Boolean = false,
283        ml_statistics: Boolean = false,
284        task_statistics: Boolean = false): Session_Info =
285      Build_Log.parse_session_info(
286        log_file, command_timings, theory_timings, ml_statistics, task_statistics)
287  }
288
289
290
291  /** digested meta info: produced by Admin/build_history in log.xz file **/
292
293  object Meta_Info
294  {
295    val empty: Meta_Info = Meta_Info(Nil, Nil)
296  }
297
298  sealed case class Meta_Info(props: Properties.T, settings: Settings.T)
299  {
300    def is_empty: Boolean = props.isEmpty && settings.isEmpty
301
302    def get(c: SQL.Column): Option[String] =
303      Properties.get(props, c.name) orElse
304      Properties.get(settings, c.name)
305
306    def get_date(c: SQL.Column): Option[Date] =
307      get(c).map(Log_File.Date_Format.parse(_))
308  }
309
310  object Identify
311  {
312    val log_prefix = "isabelle_identify_"
313    val log_prefix2 = "plain_identify_"
314
315    def engine(log_file: Log_File): String =
316      if (log_file.name.startsWith(Jenkins.log_prefix)) "jenkins_identify"
317      else if (log_file.name.startsWith(log_prefix2)) "plain_identify"
318      else "identify"
319
320    def content(date: Date, isabelle_version: Option[String], afp_version: Option[String]): String =
321      terminate_lines(
322        List("isabelle_identify: " + Build_Log.print_date(date), "") :::
323        isabelle_version.map("Isabelle version: " + _).toList :::
324        afp_version.map("AFP version: " + _).toList)
325
326    val Start = new Regex("""^isabelle_identify: (.+)$""")
327    val No_End = new Regex("""$.""")
328    val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$"""))
329    val AFP_Version = List(new Regex("""^AFP version: (\S+)$"""))
330  }
331
332  object Isatest
333  {
334    val log_prefix = "isatest-makeall-"
335    val engine = "isatest"
336    val Start = new Regex("""^------------------- starting test --- (.+) --- (.+)$""")
337    val End = new Regex("""^------------------- test (?:successful|FAILED) --- (.+) --- .*$""")
338    val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$"""))
339  }
340
341  object AFP_Test
342  {
343    val log_prefix = "afp-test-devel-"
344    val engine = "afp-test"
345    val Start = new Regex("""^Start test(?: for .+)? at ([^,]+), (.*)$""")
346    val Start_Old = new Regex("""^Start test(?: for .+)? at ([^,]+)$""")
347    val End = new Regex("""^End test on (.+), .+, elapsed time:.*$""")
348    val Isabelle_Version = List(new Regex("""^Isabelle version: .* -- hg id (\S+)$"""))
349    val AFP_Version = List(new Regex("""^AFP version: .* -- hg id (\S+)$"""))
350    val Bad_Init = new Regex("""^cp:.*: Disc quota exceeded$""")
351  }
352
353  object Jenkins
354  {
355    val log_prefix = "jenkins_"
356    val engine = "jenkins"
357    val Host = new Regex("""^Building remotely on (\S+) \((\S+)\).*$""")
358    val Start = new Regex("""^(?:Started by an SCM change|Started from command line by admin|).*$""")
359    val Start_Date = new Regex("""^Build started at (.+)$""")
360    val No_End = new Regex("""$.""")
361    val Isabelle_Version =
362      List(new Regex("""^(?:Build for Isabelle id|Isabelle id) (\w+).*$"""),
363        new Regex("""^ISABELLE_CI_REPO_ID="(\w+)".*$"""),
364        new Regex("""^(\w{12}) tip.*$"""))
365    val AFP_Version =
366      List(new Regex("""^(?:Build for AFP id|AFP id) (\w+).*$"""),
367        new Regex("""^ISABELLE_CI_AFP_ID="(\w+)".*$"""))
368    val CONFIGURATION = "=== CONFIGURATION ==="
369    val BUILD = "=== BUILD ==="
370  }
371
372  private def parse_meta_info(log_file: Log_File): Meta_Info =
373  {
374    def parse(engine: String, host: String, start: Date,
375      End: Regex, Isabelle_Version: List[Regex], AFP_Version: List[Regex]): Meta_Info =
376    {
377      val build_id =
378      {
379        val prefix = proper_string(host) orElse proper_string(engine) getOrElse "build"
380        prefix + ":" + start.time.ms
381      }
382      val build_engine = if (engine == "") Nil else List(Prop.build_engine.name -> engine)
383      val build_host = if (host == "") Nil else List(Prop.build_host.name -> host)
384
385      val start_date = List(Prop.build_start.name -> print_date(start))
386      val end_date =
387        log_file.lines.last match {
388          case End(log_file.Strict_Date(end_date)) =>
389            List(Prop.build_end.name -> print_date(end_date))
390          case _ => Nil
391        }
392
393      val isabelle_version =
394        log_file.find_match(Isabelle_Version).map(Prop.isabelle_version.name -> _)
395      val afp_version =
396        log_file.find_match(AFP_Version).map(Prop.afp_version.name -> _)
397
398      Meta_Info((Prop.build_id.name -> build_id) :: build_engine ::: build_host :::
399          start_date ::: end_date ::: isabelle_version.toList ::: afp_version.toList,
400        log_file.get_all_settings)
401    }
402
403    log_file.lines match {
404      case line :: _ if line.startsWith(Build_History.META_INFO_MARKER) =>
405        Meta_Info(log_file.find_props(Build_History.META_INFO_MARKER).get,
406          log_file.get_all_settings)
407
408      case Identify.Start(log_file.Strict_Date(start)) :: _ =>
409        parse(Identify.engine(log_file), "", start, Identify.No_End,
410          Identify.Isabelle_Version, Identify.AFP_Version)
411
412      case Isatest.Start(log_file.Strict_Date(start), host) :: _ =>
413        parse(Isatest.engine, host, start, Isatest.End,
414          Isatest.Isabelle_Version, Nil)
415
416      case AFP_Test.Start(log_file.Strict_Date(start), host) :: _ =>
417        parse(AFP_Test.engine, host, start, AFP_Test.End,
418          AFP_Test.Isabelle_Version, AFP_Test.AFP_Version)
419
420      case AFP_Test.Start_Old(log_file.Strict_Date(start)) :: _ =>
421        parse(AFP_Test.engine, "", start, AFP_Test.End,
422          AFP_Test.Isabelle_Version, AFP_Test.AFP_Version)
423
424      case Jenkins.Start() :: _ =>
425        log_file.lines.dropWhile(_ != Jenkins.BUILD) match {
426          case Jenkins.BUILD :: _ :: Jenkins.Start_Date(log_file.Strict_Date(start)) :: _ =>
427            val host =
428              log_file.lines.takeWhile(_ != Jenkins.CONFIGURATION).collectFirst({
429                case Jenkins.Host(a, b) => a + "." + b
430              }).getOrElse("")
431            parse(Jenkins.engine, host, start.to(Date.timezone_berlin), Jenkins.No_End,
432              Jenkins.Isabelle_Version, Jenkins.AFP_Version)
433          case _ => Meta_Info.empty
434        }
435
436      case line :: _ if line.startsWith("\u0000") => Meta_Info.empty
437      case List(Isatest.End(_)) => Meta_Info.empty
438      case _ :: AFP_Test.Bad_Init() :: _ => Meta_Info.empty
439      case Nil => Meta_Info.empty
440
441      case _ => log_file.err("cannot detect log file format")
442    }
443  }
444
445
446
447  /** build info: toplevel output of isabelle build or Admin/build_history **/
448
449  val THEORY_TIMING_MARKER = "\ftheory_timing = "
450  val ML_STATISTICS_MARKER = "\fML_statistics = "
451  val ERROR_MESSAGE_MARKER = "\ferror_message = "
452  val SESSION_NAME = "session_name"
453
454  object Session_Status extends Enumeration
455  {
456    val existing, finished, failed, cancelled = Value
457  }
458
459  sealed case class Session_Entry(
460    chapter: String = "",
461    groups: List[String] = Nil,
462    threads: Option[Int] = None,
463    timing: Timing = Timing.zero,
464    ml_timing: Timing = Timing.zero,
465    sources: Option[String] = None,
466    heap_size: Option[Long] = None,
467    status: Option[Session_Status.Value] = None,
468    errors: List[String] = Nil,
469    theory_timings: Map[String, Timing] = Map.empty,
470    ml_statistics: List[Properties.T] = Nil)
471  {
472    def proper_groups: Option[String] = if (groups.isEmpty) None else Some(cat_lines(groups))
473    def finished: Boolean = status == Some(Session_Status.finished)
474    def failed: Boolean = status == Some(Session_Status.failed)
475  }
476
477  object Build_Info
478  {
479    val sessions_dummy: Map[String, Session_Entry] =
480      Map("" -> Session_Entry(theory_timings = Map("" -> Timing.zero)))
481  }
482
483  sealed case class Build_Info(sessions: Map[String, Session_Entry])
484  {
485    def finished_sessions: List[String] = for ((a, b) <- sessions.toList if b.finished) yield a
486    def failed_sessions: List[String] = for ((a, b) <- sessions.toList if b.failed) yield a
487  }
488
489  private def parse_build_info(log_file: Log_File, parse_ml_statistics: Boolean): Build_Info =
490  {
491    object Chapter_Name
492    {
493      def unapply(s: String): Some[(String, String)] =
494        space_explode('/', s) match {
495          case List(chapter, name) => Some((chapter, name))
496          case _ => Some(("", s))
497        }
498    }
499
500    val Session_No_Groups = new Regex("""^Session (\S+)$""")
501    val Session_Groups = new Regex("""^Session (\S+) \((.*)\)$""")
502    val Session_Finished1 =
503      new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time, (\d+):(\d+):(\d+) cpu time.*$""")
504    val Session_Finished2 =
505      new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time.*$""")
506    val Session_Timing =
507      new Regex("""^Timing (\S+) \((\d+) threads, (\d+\.\d+)s elapsed time, (\d+\.\d+)s cpu time, (\d+\.\d+)s GC time.*$""")
508    val Session_Started = new Regex("""^(?:Running|Building) (\S+) \.\.\.$""")
509    val Session_Failed = new Regex("""^(\S+) FAILED""")
510    val Session_Cancelled = new Regex("""^(\S+) CANCELLED""")
511    val Sources = new Regex("""^Sources (\S+) (\S{""" + SHA1.digest_length + """})$""")
512    val Heap = new Regex("""^Heap (\S+) \((\d+) bytes\)$""")
513
514    object Theory_Timing
515    {
516      def unapply(line: String): Option[(String, (String, Timing))] =
517      {
518        val line1 = line.replace('~', '-')
519        Library.try_unprefix(THEORY_TIMING_MARKER, line1).map(log_file.parse_props(_)) match {
520          case Some((SESSION_NAME, name) :: props) =>
521            (props, props) match {
522              case (Markup.Name(thy), Markup.Timing_Properties(t)) => Some((name, thy -> t))
523              case _ => None
524            }
525          case _ => None
526        }
527      }
528    }
529
530    var chapter = Map.empty[String, String]
531    var groups = Map.empty[String, List[String]]
532    var threads = Map.empty[String, Int]
533    var timing = Map.empty[String, Timing]
534    var ml_timing = Map.empty[String, Timing]
535    var started = Set.empty[String]
536    var failed = Set.empty[String]
537    var cancelled = Set.empty[String]
538    var sources = Map.empty[String, String]
539    var heap_sizes = Map.empty[String, Long]
540    var theory_timings = Map.empty[String, Map[String, Timing]]
541    var ml_statistics = Map.empty[String, List[Properties.T]]
542    var errors = Map.empty[String, List[String]]
543
544    def all_sessions: Set[String] =
545      chapter.keySet ++ groups.keySet ++ threads.keySet ++ timing.keySet ++ ml_timing.keySet ++
546      failed ++ cancelled ++ started ++ sources.keySet ++ heap_sizes.keySet ++
547      theory_timings.keySet ++ ml_statistics.keySet
548
549
550    for (line <- log_file.lines) {
551      line match {
552        case Session_No_Groups(Chapter_Name(chapt, name)) =>
553          chapter += (name -> chapt)
554          groups += (name -> Nil)
555
556        case Session_Groups(Chapter_Name(chapt, name), grps) =>
557          chapter += (name -> chapt)
558          groups += (name -> Word.explode(grps))
559
560        case Session_Started(name) =>
561          started += name
562
563        case Session_Finished1(name,
564            Value.Int(e1), Value.Int(e2), Value.Int(e3),
565            Value.Int(c1), Value.Int(c2), Value.Int(c3)) =>
566          val elapsed = Time.hms(e1, e2, e3)
567          val cpu = Time.hms(c1, c2, c3)
568          timing += (name -> Timing(elapsed, cpu, Time.zero))
569
570        case Session_Finished2(name,
571            Value.Int(e1), Value.Int(e2), Value.Int(e3)) =>
572          val elapsed = Time.hms(e1, e2, e3)
573          timing += (name -> Timing(elapsed, Time.zero, Time.zero))
574
575        case Session_Timing(name,
576            Value.Int(t), Value.Double(e), Value.Double(c), Value.Double(g)) =>
577          val elapsed = Time.seconds(e)
578          val cpu = Time.seconds(c)
579          val gc = Time.seconds(g)
580          ml_timing += (name -> Timing(elapsed, cpu, gc))
581          threads += (name -> t)
582
583        case Sources(name, s) =>
584          sources += (name -> s)
585
586        case Heap(name, Value.Long(size)) =>
587          heap_sizes += (name -> size)
588
589        case _ if line.startsWith(THEORY_TIMING_MARKER) && YXML.detect(line) =>
590          line match {
591            case Theory_Timing(name, theory_timing) =>
592              theory_timings += (name -> (theory_timings.getOrElse(name, Map.empty) + theory_timing))
593            case _ => log_file.err("malformed theory_timing " + quote(line))
594          }
595
596        case _ if parse_ml_statistics && line.startsWith(ML_STATISTICS_MARKER) && YXML.detect(line) =>
597          Library.try_unprefix(ML_STATISTICS_MARKER, line).map(log_file.parse_props(_)) match {
598            case Some((SESSION_NAME, name) :: props) =>
599              ml_statistics += (name -> (props :: ml_statistics.getOrElse(name, Nil)))
600            case _ => log_file.err("malformed ML_statistics " + quote(line))
601          }
602
603        case _ if line.startsWith(ERROR_MESSAGE_MARKER) && YXML.detect(line) =>
604          Library.try_unprefix(ERROR_MESSAGE_MARKER, line).map(log_file.parse_props(_)) match {
605            case Some(List((SESSION_NAME, name), (Markup.CONTENT, msg))) =>
606              errors += (name -> (Library.decode_lines(msg) :: errors.getOrElse(name, Nil)))
607            case _ => log_file.err("malformed error message " + quote(line))
608          }
609
610        case _ =>
611      }
612    }
613
614    val sessions =
615      Map(
616        (for (name <- all_sessions.toList) yield {
617          val status =
618            if (failed(name)) Session_Status.failed
619            else if (cancelled(name)) Session_Status.cancelled
620            else if (timing.isDefinedAt(name) || ml_timing.isDefinedAt(name))
621              Session_Status.finished
622            else if (started(name)) Session_Status.failed
623            else Session_Status.existing
624          val entry =
625            Session_Entry(
626              chapter = chapter.getOrElse(name, ""),
627              groups = groups.getOrElse(name, Nil),
628              threads = threads.get(name),
629              timing = timing.getOrElse(name, Timing.zero),
630              ml_timing = ml_timing.getOrElse(name, Timing.zero),
631              sources = sources.get(name),
632              heap_size = heap_sizes.get(name),
633              status = Some(status),
634              errors = errors.getOrElse(name, Nil).reverse,
635              theory_timings = theory_timings.getOrElse(name, Map.empty),
636              ml_statistics = ml_statistics.getOrElse(name, Nil).reverse)
637          (name -> entry)
638        }):_*)
639    Build_Info(sessions)
640  }
641
642
643
644  /** session info: produced by isabelle build as session log.gz file **/
645
646  sealed case class Session_Info(
647    session_timing: Properties.T,
648    command_timings: List[Properties.T],
649    theory_timings: List[Properties.T],
650    ml_statistics: List[Properties.T],
651    task_statistics: List[Properties.T],
652    errors: List[String])
653  {
654    def error(s: String): Session_Info =
655      copy(errors = errors ::: List(s))
656  }
657
658  private def parse_session_info(
659    log_file: Log_File,
660    command_timings: Boolean,
661    theory_timings: Boolean,
662    ml_statistics: Boolean,
663    task_statistics: Boolean): Session_Info =
664  {
665    Session_Info(
666      session_timing = log_file.find_props("\fTiming = ") getOrElse Nil,
667      command_timings = if (command_timings) log_file.filter_props("\fcommand_timing = ") else Nil,
668      theory_timings = if (theory_timings) log_file.filter_props(THEORY_TIMING_MARKER) else Nil,
669      ml_statistics = if (ml_statistics) log_file.filter_props(ML_STATISTICS_MARKER) else Nil,
670      task_statistics = if (task_statistics) log_file.filter_props("\ftask_statistics = ") else Nil,
671      errors = log_file.filter_lines(ERROR_MESSAGE_MARKER).map(Library.decode_lines(_)))
672  }
673
674  def compress_errors(errors: List[String], cache: XZ.Cache = XZ.cache()): Option[Bytes] =
675    if (errors.isEmpty) None
676    else {
677      Some(Bytes(YXML.string_of_body(XML.Encode.list(XML.Encode.string)(errors))).
678        compress(cache = cache))
679    }
680
681  def uncompress_errors(bytes: Bytes, cache: XZ.Cache = XZ.cache()): List[String] =
682    if (bytes.isEmpty) Nil
683    else {
684      XML.Decode.list(YXML.string_of_body(_))(YXML.parse_body(bytes.uncompress(cache = cache).text))
685    }
686
687
688
689  /** persistent store **/
690
691  /* SQL data model */
692
693  object Data
694  {
695    def build_log_table(name: String, columns: List[SQL.Column], body: String = ""): SQL.Table =
696      SQL.Table("isabelle_build_log_" + name, columns, body)
697
698
699    /* main content */
700
701    val log_name = SQL.Column.string("log_name").make_primary_key
702    val session_name = SQL.Column.string("session_name").make_primary_key
703    val theory_name = SQL.Column.string("theory_name").make_primary_key
704    val chapter = SQL.Column.string("chapter")
705    val groups = SQL.Column.string("groups")
706    val threads = SQL.Column.int("threads")
707    val timing_elapsed = SQL.Column.long("timing_elapsed")
708    val timing_cpu = SQL.Column.long("timing_cpu")
709    val timing_gc = SQL.Column.long("timing_gc")
710    val timing_factor = SQL.Column.double("timing_factor")
711    val ml_timing_elapsed = SQL.Column.long("ml_timing_elapsed")
712    val ml_timing_cpu = SQL.Column.long("ml_timing_cpu")
713    val ml_timing_gc = SQL.Column.long("ml_timing_gc")
714    val ml_timing_factor = SQL.Column.double("ml_timing_factor")
715    val theory_timing_elapsed = SQL.Column.long("theory_timing_elapsed")
716    val theory_timing_cpu = SQL.Column.long("theory_timing_cpu")
717    val theory_timing_gc = SQL.Column.long("theory_timing_gc")
718    val heap_size = SQL.Column.long("heap_size")
719    val status = SQL.Column.string("status")
720    val errors = SQL.Column.bytes("errors")
721    val sources = SQL.Column.string("sources")
722    val ml_statistics = SQL.Column.bytes("ml_statistics")
723    val known = SQL.Column.bool("known")
724
725    val meta_info_table =
726      build_log_table("meta_info", log_name :: Prop.all_props ::: Settings.all_settings)
727
728    val sessions_table =
729      build_log_table("sessions",
730        List(log_name, session_name, chapter, groups, threads, timing_elapsed, timing_cpu,
731          timing_gc, timing_factor, ml_timing_elapsed, ml_timing_cpu, ml_timing_gc, ml_timing_factor,
732          heap_size, status, errors, sources))
733
734    val theories_table =
735      build_log_table("theories",
736        List(log_name, session_name, theory_name, theory_timing_elapsed, theory_timing_cpu,
737          theory_timing_gc))
738
739    val ml_statistics_table =
740      build_log_table("ml_statistics", List(log_name, session_name, ml_statistics))
741
742
743    /* AFP versions */
744
745    val isabelle_afp_versions_table: SQL.Table =
746    {
747      val version1 = Prop.isabelle_version
748      val version2 = Prop.afp_version
749      build_log_table("isabelle_afp_versions", List(version1.make_primary_key, version2),
750        SQL.select(List(version1, version2), distinct = true) + meta_info_table +
751        " WHERE " + version1.defined + " AND " + version2.defined)
752    }
753
754
755    /* earliest pull date for repository version (PostgreSQL queries) */
756
757    def pull_date(afp: Boolean = false) =
758      if (afp) SQL.Column.date("afp_pull_date")
759      else SQL.Column.date("pull_date")
760
761    def pull_date_table(afp: Boolean = false): SQL.Table =
762    {
763      val (name, versions) =
764        if (afp) ("afp_pull_date", List(Prop.isabelle_version, Prop.afp_version))
765        else ("pull_date", List(Prop.isabelle_version))
766
767      build_log_table(name, versions.map(_.make_primary_key) ::: List(pull_date(afp)),
768        "SELECT " + versions.mkString(", ") +
769          ", min(" + Prop.build_start + ") AS " + pull_date(afp) +
770        " FROM " + meta_info_table +
771        " WHERE " + (versions ::: List(Prop.build_start)).map(_.defined).mkString(" AND ") +
772        " GROUP BY " + versions.mkString(", "))
773    }
774
775
776    /* recent entries */
777
778    def recent_time(days: Int): SQL.Source =
779      "now() - INTERVAL '" + days.max(0) + " days'"
780
781    def recent_pull_date_table(
782      days: Int, rev: String = "", afp_rev: Option[String] = None): SQL.Table =
783    {
784      val afp = afp_rev.isDefined
785      val rev2 = afp_rev.getOrElse("")
786      val table = pull_date_table(afp)
787
788      val version1 = Prop.isabelle_version
789      val version2 = Prop.afp_version
790      val eq1 = version1(table) + " = " + SQL.string(rev)
791      val eq2 = version2(table) + " = " + SQL.string(rev2)
792
793      SQL.Table("recent_pull_date", table.columns,
794        table.select(table.columns,
795          "WHERE " + pull_date(afp)(table) + " > " + recent_time(days) +
796          (if (rev != "" && rev2 == "") " OR " + eq1
797           else if (rev == "" && rev2 != "") " OR " + eq2
798           else if (rev != "" && rev2 != "") " OR (" + eq1 + " AND " + eq2 + ")"
799           else "")))
800    }
801
802    def select_recent_log_names(days: Int): SQL.Source =
803    {
804      val table1 = meta_info_table
805      val table2 = recent_pull_date_table(days)
806      table1.select(List(log_name), distinct = true) + SQL.join_inner + table2.query_named +
807        " ON " + Prop.isabelle_version(table1) + " = " + Prop.isabelle_version(table2)
808    }
809
810    def select_recent_versions(days: Int,
811      rev: String = "", afp_rev: Option[String] = None, sql: SQL.Source = ""): SQL.Source =
812    {
813      val afp = afp_rev.isDefined
814      val version = Prop.isabelle_version
815      val table1 = recent_pull_date_table(days, rev = rev, afp_rev = afp_rev)
816      val table2 = meta_info_table
817      val aux_table = SQL.Table("aux", table2.columns, table2.select(sql = sql))
818
819      val columns =
820        table1.columns.map(c => c(table1)) :::
821          List(known.copy(expr = log_name(aux_table).defined))
822      SQL.select(columns, distinct = true) +
823        table1.query_named + SQL.join_outer + aux_table.query_named +
824        " ON " + version(table1) + " = " + version(aux_table) +
825        " ORDER BY " + pull_date(afp)(table1) + " DESC"
826    }
827
828
829    /* universal view on main data */
830
831    val universal_table: SQL.Table =
832    {
833      val afp_pull_date = pull_date(afp = true)
834      val version1 = Prop.isabelle_version
835      val version2 = Prop.afp_version
836      val table1 = meta_info_table
837      val table2 = pull_date_table(afp = true)
838      val table3 = pull_date_table()
839
840      val a_columns = log_name :: afp_pull_date :: table1.columns.tail
841      val a_table =
842        SQL.Table("a", a_columns,
843          SQL.select(List(log_name, afp_pull_date) ::: table1.columns.tail.map(_.apply(table1))) +
844          table1 + SQL.join_outer + table2 +
845          " ON " + version1(table1) + " = " + version1(table2) +
846          " AND " + version2(table1) + " = " + version2(table2))
847
848      val b_columns = log_name :: pull_date() :: a_columns.tail
849      val b_table =
850        SQL.Table("b", b_columns,
851          SQL.select(
852            List(log_name(a_table), pull_date()(table3)) ::: a_columns.tail.map(_.apply(a_table))) +
853          a_table.query_named + SQL.join_outer + table3 +
854          " ON " + version1(a_table) + " = " + version1(table3))
855
856      val c_columns = b_columns ::: sessions_table.columns.tail
857      val c_table =
858        SQL.Table("c", c_columns,
859          SQL.select(log_name(b_table) :: c_columns.tail) +
860          b_table.query_named + SQL.join_inner + sessions_table +
861          " ON " + log_name(b_table) + " = " + log_name(sessions_table))
862
863      SQL.Table("isabelle_build_log", c_columns ::: List(ml_statistics),
864        {
865          SQL.select(c_columns.map(_.apply(c_table)) ::: List(ml_statistics)) +
866          c_table.query_named + SQL.join_outer + ml_statistics_table +
867          " ON " + log_name(c_table) + " = " + log_name(ml_statistics_table) +
868          " AND " + session_name(c_table) + " = " + session_name(ml_statistics_table)
869        })
870    }
871  }
872
873
874  /* database access */
875
876  def store(options: Options): Store = new Store(options)
877
878  class Store private[Build_Log](options: Options)
879  {
880    val xml_cache: XML.Cache = XML.make_cache()
881    val xz_cache: XZ.Cache = XZ.make_cache()
882
883    def open_database(
884      user: String = options.string("build_log_database_user"),
885      password: String = options.string("build_log_database_password"),
886      database: String = options.string("build_log_database_name"),
887      host: String = options.string("build_log_database_host"),
888      port: Int = options.int("build_log_database_port"),
889      ssh_host: String = options.string("build_log_ssh_host"),
890      ssh_user: String = options.string("build_log_ssh_user"),
891      ssh_port: Int = options.int("build_log_ssh_port")): PostgreSQL.Database =
892    {
893      PostgreSQL.open_database(
894        user = user, password = password, database = database, host = host, port = port,
895        ssh =
896          if (ssh_host == "") None
897          else Some(SSH.open_session(options, host = ssh_host, user = ssh_user, port = port)),
898        ssh_close = true)
899    }
900
901    def update_database(db: PostgreSQL.Database, dirs: List[Path], ml_statistics: Boolean = false)
902    {
903      val log_files =
904        dirs.flatMap(dir =>
905          File.find_files(dir.file, pred = Log_File.is_log(_), follow_links = true))
906      write_info(db, log_files, ml_statistics = ml_statistics)
907
908      db.create_view(Data.pull_date_table())
909      db.create_view(Data.pull_date_table(afp = true))
910      db.create_view(Data.universal_table)
911    }
912
913    def snapshot_database(db: PostgreSQL.Database, sqlite_database: Path,
914      days: Int = 100, ml_statistics: Boolean = false)
915    {
916      Isabelle_System.mkdirs(sqlite_database.dir)
917      sqlite_database.file.delete
918
919      using(SQLite.open_database(sqlite_database))(db2 =>
920      {
921        db.transaction {
922          db2.transaction {
923            // main content
924            db2.create_table(Data.meta_info_table)
925            db2.create_table(Data.sessions_table)
926            db2.create_table(Data.theories_table)
927            db2.create_table(Data.ml_statistics_table)
928
929            val recent_log_names =
930              db.using_statement(Data.select_recent_log_names(days))(stmt =>
931                stmt.execute_query().iterator(_.string(Data.log_name)).toList)
932
933            for (log_name <- recent_log_names) {
934              read_meta_info(db, log_name).foreach(meta_info =>
935                update_meta_info(db2, log_name, meta_info))
936
937              update_sessions(db2, log_name, read_build_info(db, log_name))
938
939              if (ml_statistics) {
940                update_ml_statistics(db2, log_name,
941                  read_build_info(db, log_name, ml_statistics = true))
942              }
943            }
944
945            // pull_date
946            for (afp <- List(false, true))
947            {
948              val afp_rev = if (afp) Some("") else None
949              val table = Data.pull_date_table(afp)
950              db2.create_table(table)
951              db2.using_statement(table.insert())(stmt2 =>
952              {
953                db.using_statement(
954                  Data.recent_pull_date_table(days, afp_rev = afp_rev).query)(stmt =>
955                {
956                  val res = stmt.execute_query()
957                  while (res.next()) {
958                    for ((c, i) <- table.columns.zipWithIndex) {
959                      stmt2.string(i + 1) = res.get_string(c)
960                    }
961                    stmt2.execute()
962                  }
963                })
964              })
965            }
966
967            // full view
968            db2.create_view(Data.universal_table)
969          }
970        }
971        db2.rebuild
972      })
973    }
974
975    def domain(db: SQL.Database, table: SQL.Table, column: SQL.Column): Set[String] =
976      db.using_statement(table.select(List(column), distinct = true))(stmt =>
977        stmt.execute_query().iterator(_.string(column)).toSet)
978
979    def update_meta_info(db: SQL.Database, log_name: String, meta_info: Meta_Info)
980    {
981      val table = Data.meta_info_table
982      db.using_statement(db.insert_permissive(table))(stmt =>
983      {
984        stmt.string(1) = log_name
985        for ((c, i) <- table.columns.tail.zipWithIndex) {
986          if (c.T == SQL.Type.Date)
987            stmt.date(i + 2) = meta_info.get_date(c)
988          else
989            stmt.string(i + 2) = meta_info.get(c)
990        }
991        stmt.execute()
992      })
993    }
994
995    def update_sessions(db: SQL.Database, log_name: String, build_info: Build_Info)
996    {
997      val table = Data.sessions_table
998      db.using_statement(db.insert_permissive(table))(stmt =>
999      {
1000        val sessions =
1001          if (build_info.sessions.isEmpty) Build_Info.sessions_dummy
1002          else build_info.sessions
1003        for ((session_name, session) <- sessions) {
1004          stmt.string(1) = log_name
1005          stmt.string(2) = session_name
1006          stmt.string(3) = proper_string(session.chapter)
1007          stmt.string(4) = session.proper_groups
1008          stmt.int(5) = session.threads
1009          stmt.long(6) = session.timing.elapsed.proper_ms
1010          stmt.long(7) = session.timing.cpu.proper_ms
1011          stmt.long(8) = session.timing.gc.proper_ms
1012          stmt.double(9) = session.timing.factor
1013          stmt.long(10) = session.ml_timing.elapsed.proper_ms
1014          stmt.long(11) = session.ml_timing.cpu.proper_ms
1015          stmt.long(12) = session.ml_timing.gc.proper_ms
1016          stmt.double(13) = session.ml_timing.factor
1017          stmt.long(14) = session.heap_size
1018          stmt.string(15) = session.status.map(_.toString)
1019          stmt.bytes(16) = compress_errors(session.errors, cache = xz_cache)
1020          stmt.string(17) = session.sources
1021          stmt.execute()
1022        }
1023      })
1024    }
1025
1026    def update_theories(db: SQL.Database, log_name: String, build_info: Build_Info)
1027    {
1028      val table = Data.theories_table
1029      db.using_statement(db.insert_permissive(table))(stmt =>
1030      {
1031        val sessions =
1032          if (build_info.sessions.forall({ case (_, session) => session.theory_timings.isEmpty }))
1033            Build_Info.sessions_dummy
1034          else build_info.sessions
1035        for {
1036          (session_name, session) <- sessions
1037          (theory_name, timing) <- session.theory_timings
1038        } {
1039          stmt.string(1) = log_name
1040          stmt.string(2) = session_name
1041          stmt.string(3) = theory_name
1042          stmt.long(4) = timing.elapsed.ms
1043          stmt.long(5) = timing.cpu.ms
1044          stmt.long(6) = timing.gc.ms
1045          stmt.execute()
1046        }
1047      })
1048    }
1049
1050    def update_ml_statistics(db: SQL.Database, log_name: String, build_info: Build_Info)
1051    {
1052      val table = Data.ml_statistics_table
1053      db.using_statement(db.insert_permissive(table))(stmt =>
1054      {
1055        val ml_stats: List[(String, Option[Bytes])] =
1056          Par_List.map[(String, Session_Entry), (String, Option[Bytes])](
1057            { case (a, b) => (a, Properties.compress(b.ml_statistics, cache = xz_cache).proper) },
1058            build_info.sessions.iterator.filter(p => p._2.ml_statistics.nonEmpty).toList)
1059        val entries = if (ml_stats.nonEmpty) ml_stats else List("" -> None)
1060        for ((session_name, ml_statistics) <- entries) {
1061          stmt.string(1) = log_name
1062          stmt.string(2) = session_name
1063          stmt.bytes(3) = ml_statistics
1064          stmt.execute()
1065        }
1066      })
1067    }
1068
1069    def write_info(db: SQL.Database, files: List[JFile], ml_statistics: Boolean = false)
1070    {
1071      abstract class Table_Status(table: SQL.Table)
1072      {
1073        db.create_table(table)
1074        private var known: Set[String] = domain(db, table, Data.log_name)
1075
1076        def required(file: JFile): Boolean = !known(Log_File.plain_name(file.getName))
1077
1078        def update_db(db: SQL.Database, log_file: Log_File): Unit
1079        def update(log_file: Log_File)
1080        {
1081          if (!known(log_file.name)) {
1082            update_db(db, log_file)
1083            known += log_file.name
1084          }
1085        }
1086      }
1087      val status =
1088        List(
1089          new Table_Status(Data.meta_info_table) {
1090            override def update_db(db: SQL.Database, log_file: Log_File): Unit =
1091              update_meta_info(db, log_file.name, log_file.parse_meta_info())
1092          },
1093          new Table_Status(Data.sessions_table) {
1094            override def update_db(db: SQL.Database, log_file: Log_File): Unit =
1095              update_sessions(db, log_file.name, log_file.parse_build_info())
1096          },
1097          new Table_Status(Data.theories_table) {
1098            override def update_db(db: SQL.Database, log_file: Log_File): Unit =
1099              update_theories(db, log_file.name, log_file.parse_build_info())
1100          },
1101          new Table_Status(Data.ml_statistics_table) {
1102            override def update_db(db: SQL.Database, log_file: Log_File): Unit =
1103            if (ml_statistics) {
1104              update_ml_statistics(db, log_file.name,
1105                log_file.parse_build_info(ml_statistics = true))
1106            }
1107          })
1108
1109      for (file_group <-
1110            files.filter(file => status.exists(_.required(file))).
1111              grouped(options.int("build_log_transaction_size") max 1))
1112      {
1113        val log_files = Par_List.map[JFile, Log_File](Log_File.apply _, file_group)
1114        db.transaction { log_files.foreach(log_file => status.foreach(_.update(log_file))) }
1115      }
1116    }
1117
1118    def read_meta_info(db: SQL.Database, log_name: String): Option[Meta_Info] =
1119    {
1120      val table = Data.meta_info_table
1121      val columns = table.columns.tail
1122      db.using_statement(table.select(columns, Data.log_name.where_equal(log_name)))(stmt =>
1123      {
1124        val res = stmt.execute_query()
1125        if (!res.next) None
1126        else {
1127          val results =
1128            columns.map(c => c.name ->
1129              (if (c.T == SQL.Type.Date)
1130                res.get_date(c).map(Log_File.Date_Format(_))
1131               else
1132                res.get_string(c)))
1133          val n = Prop.all_props.length
1134          val props = for ((x, Some(y)) <- results.take(n)) yield (x, y)
1135          val settings = for ((x, Some(y)) <- results.drop(n)) yield (x, y)
1136          Some(Meta_Info(props, settings))
1137        }
1138      })
1139    }
1140
1141    def read_build_info(
1142      db: SQL.Database,
1143      log_name: String,
1144      session_names: List[String] = Nil,
1145      ml_statistics: Boolean = false): Build_Info =
1146    {
1147      val table1 = Data.sessions_table
1148      val table2 = Data.ml_statistics_table
1149
1150      val where_log_name =
1151        Data.log_name(table1).where_equal(log_name) + " AND " +
1152        Data.session_name(table1) + " <> ''"
1153      val where =
1154        if (session_names.isEmpty) where_log_name
1155        else where_log_name + " AND " + SQL.member(Data.session_name(table1).ident, session_names)
1156
1157      val columns1 = table1.columns.tail.map(_.apply(table1))
1158      val (columns, from) =
1159        if (ml_statistics) {
1160          val columns = columns1 ::: List(Data.ml_statistics(table2))
1161          val join =
1162            table1 + SQL.join_outer + table2 + " ON " +
1163            Data.log_name(table1) + " = " + Data.log_name(table2) + " AND " +
1164            Data.session_name(table1) + " = " + Data.session_name(table2)
1165          (columns, SQL.enclose(join))
1166        }
1167        else (columns1, table1.ident)
1168
1169      val sessions =
1170        db.using_statement(SQL.select(columns) + from + " " + where)(stmt =>
1171        {
1172          stmt.execute_query().iterator(res =>
1173          {
1174            val session_name = res.string(Data.session_name)
1175            val session_entry =
1176              Session_Entry(
1177                chapter = res.string(Data.chapter),
1178                groups = split_lines(res.string(Data.groups)),
1179                threads = res.get_int(Data.threads),
1180                timing = res.timing(Data.timing_elapsed, Data.timing_cpu, Data.timing_gc),
1181                ml_timing =
1182                  res.timing(Data.ml_timing_elapsed, Data.ml_timing_cpu, Data.ml_timing_gc),
1183                sources = res.get_string(Data.sources),
1184                heap_size = res.get_long(Data.heap_size),
1185                status = res.get_string(Data.status).map(Session_Status.withName(_)),
1186                errors = uncompress_errors(res.bytes(Data.errors), cache = xz_cache),
1187                ml_statistics =
1188                  if (ml_statistics) {
1189                    Properties.uncompress(
1190                      res.bytes(Data.ml_statistics), cache = xz_cache, Some(xml_cache))
1191                  }
1192                  else Nil)
1193            session_name -> session_entry
1194          }).toMap
1195        })
1196      Build_Info(sessions)
1197    }
1198  }
1199}
1200