1//===-- HTMLLogger.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the HTML logger. Given a directory dir/, we write
10// dir/0.html for the first analysis, etc.
11// These files contain a visualization that allows inspecting the CFG and the
12// state of the analysis at each point.
13// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14// so each output file is self-contained.
15//
16// VIEWS
17//
18// The timeline and function view are always shown. These allow selecting basic
19// blocks, statements within them, and processing iterations (BBs are visited
20// multiple times when e.g. loops are involved).
21// These are written directly into the HTML body.
22//
23// There are also listings of particular basic blocks, and dumps of the state
24// at particular analysis points (i.e. BB2 iteration 3 statement 2).
25// These are only shown when the relevant BB/analysis point is *selected*.
26//
27// DATA AND TEMPLATES
28//
29// The HTML proper is mostly static.
30// The analysis data is in a JSON object HTMLLoggerData which is embedded as
31// a <script> in the <head>.
32// This gets rendered into DOM by a simple template processor which substitutes
33// the data into <template> tags embedded in the HTML. (see inflate() in JS).
34//
35// SELECTION
36//
37// This is the only real interactive mechanism.
38//
39// At any given time, there are several named selections, e.g.:
40//   bb: B2               (basic block 0 is selected)
41//   elt: B2.4            (statement 4 is selected)
42//   iter: B2:1           (iteration 1 of the basic block is selected)
43//   hover: B3            (hovering over basic block 3)
44//
45// The selection is updated by mouse events: hover by moving the mouse and
46// others by clicking. Elements that are click targets generally have attributes
47// (id or data-foo) that define what they should select.
48// See watchSelection() in JS for the exact logic.
49//
50// When the "bb" selection is set to "B2":
51//   - sections <section data-selection="bb"> get shown
52//   - templates under such sections get re-rendered
53//   - elements with class/id "B2" get class "bb-select"
54//
55//===----------------------------------------------------------------------===//
56
57#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
58#include "clang/Analysis/FlowSensitive/DebugSupport.h"
59#include "clang/Analysis/FlowSensitive/Logger.h"
60#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61#include "clang/Analysis/FlowSensitive/Value.h"
62#include "clang/Basic/SourceManager.h"
63#include "clang/Lex/Lexer.h"
64#include "llvm/ADT/DenseMap.h"
65#include "llvm/ADT/ScopeExit.h"
66#include "llvm/Support/Error.h"
67#include "llvm/Support/FormatVariadic.h"
68#include "llvm/Support/JSON.h"
69#include "llvm/Support/Program.h"
70#include "llvm/Support/ScopedPrinter.h"
71#include "llvm/Support/raw_ostream.h"
72// Defines assets: HTMLLogger_{html_js,css}
73#include "HTMLLogger.inc"
74
75namespace clang::dataflow {
76namespace {
77
78// Render a graphviz graph specification to SVG using the `dot` tool.
79llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80
81using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82
83// Recursively dumps Values/StorageLocations as JSON
84class ModelDumper {
85public:
86  ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87      : JOS(JOS), Env(Env) {}
88
89  void dump(Value &V) {
90    JOS.attribute("value_id", llvm::to_string(&V));
91    if (!Visited.insert(&V).second)
92      return;
93
94    JOS.attribute("kind", debugString(V.getKind()));
95
96    switch (V.getKind()) {
97    case Value::Kind::Integer:
98    case Value::Kind::Record:
99    case Value::Kind::TopBool:
100    case Value::Kind::AtomicBool:
101    case Value::Kind::FormulaBool:
102      break;
103    case Value::Kind::Pointer:
104      JOS.attributeObject(
105          "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
106      break;
107    }
108
109    for (const auto& Prop : V.properties())
110      JOS.attributeObject(("p:" + Prop.first()).str(),
111                          [&] { dump(*Prop.second); });
112
113    // Running the SAT solver is expensive, but knowing which booleans are
114    // guaranteed true/false here is valuable and hard to determine by hand.
115    if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
116      JOS.attribute("formula", llvm::to_string(B->formula()));
117      JOS.attribute("truth", Env.proves(B->formula()) ? "true"
118                             : Env.proves(Env.arena().makeNot(B->formula()))
119                                 ? "false"
120                                 : "unknown");
121    }
122  }
123  void dump(const StorageLocation &L) {
124    JOS.attribute("location", llvm::to_string(&L));
125    if (!Visited.insert(&L).second)
126      return;
127
128    JOS.attribute("type", L.getType().getAsString());
129    if (auto *V = Env.getValue(L))
130      dump(*V);
131
132    if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133      for (const auto &Child : RLoc->children())
134        JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135          if (Child.second)
136            if (Value *Val = Env.getValue(*Child.second))
137              dump(*Val);
138        });
139
140      for (const auto &SyntheticField : RLoc->synthetic_fields())
141        JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
142                            [&] { dump(*SyntheticField.second); });
143    }
144  }
145
146  llvm::DenseSet<const void*> Visited;
147  llvm::json::OStream &JOS;
148  const Environment &Env;
149};
150
151class HTMLLogger : public Logger {
152  struct Iteration {
153    const CFGBlock *Block;
154    unsigned Iter;
155    bool PostVisit;
156    bool Converged;
157  };
158
159  StreamFactory Streams;
160  std::unique_ptr<llvm::raw_ostream> OS;
161  std::optional<llvm::json::OStream> JOS;
162
163  const ControlFlowContext *CFG;
164  // Timeline of iterations of CFG block visitation.
165  std::vector<Iteration> Iters;
166  // Indexes  in `Iters` of the iterations for each block.
167  llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
168  // The messages logged in the current context but not yet written.
169  std::string ContextLogs;
170  // The number of elements we have visited within the current CFG block.
171  unsigned ElementIndex;
172
173public:
174  explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
175  void beginAnalysis(const ControlFlowContext &CFG,
176                     TypeErasedDataflowAnalysis &A) override {
177    OS = Streams();
178    this->CFG = &CFG;
179    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
180
181    const auto &D = CFG.getDecl();
182    const auto &SM = A.getASTContext().getSourceManager();
183    *OS << "<title>";
184    if (const auto *ND = dyn_cast<NamedDecl>(&D))
185      *OS << ND->getNameAsString() << " at ";
186    *OS << SM.getFilename(D.getLocation()) << ":"
187        << SM.getSpellingLineNumber(D.getLocation());
188    *OS << "</title>\n";
189
190    *OS << "<style>" << HTMLLogger_css << "</style>\n";
191    *OS << "<script>" << HTMLLogger_js << "</script>\n";
192
193    writeCode();
194    writeCFG();
195
196    *OS << "<script>var HTMLLoggerData = \n";
197    JOS.emplace(*OS, /*Indent=*/2);
198    JOS->objectBegin();
199    JOS->attributeBegin("states");
200    JOS->objectBegin();
201  }
202  // Between beginAnalysis() and endAnalysis() we write all the states for
203  // particular analysis points into the `timeline` array.
204  void endAnalysis() override {
205    JOS->objectEnd();
206    JOS->attributeEnd();
207
208    JOS->attributeArray("timeline", [&] {
209      for (const auto &E : Iters) {
210        JOS->object([&] {
211          JOS->attribute("block", blockID(E.Block->getBlockID()));
212          JOS->attribute("iter", E.Iter);
213          JOS->attribute("post_visit", E.PostVisit);
214          JOS->attribute("converged", E.Converged);
215        });
216      }
217    });
218    JOS->attributeObject("cfg", [&] {
219      for (const auto &E : BlockIters)
220        writeBlock(*E.first, E.second);
221    });
222
223    JOS->objectEnd();
224    JOS.reset();
225    *OS << ";\n</script>\n";
226    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
227  }
228
229  void enterBlock(const CFGBlock &B, bool PostVisit) override {
230    llvm::SmallVector<size_t> &BIter = BlockIters[&B];
231    unsigned IterNum = BIter.size() + 1;
232    BIter.push_back(Iters.size());
233    Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
234    ElementIndex = 0;
235  }
236  void enterElement(const CFGElement &E) override {
237    ++ElementIndex;
238  }
239
240  static std::string blockID(unsigned Block) {
241    return llvm::formatv("B{0}", Block);
242  }
243  static std::string eltID(unsigned Block, unsigned Element) {
244    return llvm::formatv("B{0}.{1}", Block, Element);
245  }
246  static std::string iterID(unsigned Block, unsigned Iter) {
247    return llvm::formatv("B{0}:{1}", Block, Iter);
248  }
249  static std::string elementIterID(unsigned Block, unsigned Iter,
250                                   unsigned Element) {
251    return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
252  }
253
254  // Write the analysis state associated with a particular analysis point.
255  // FIXME: this dump is fairly opaque. We should show:
256  //  - values associated with the current Stmt
257  //  - values associated with its children
258  //  - meaningful names for values
259  //  - which boolean values are implied true/false by the flow condition
260  void recordState(TypeErasedDataflowAnalysisState &State) override {
261    unsigned Block = Iters.back().Block->getBlockID();
262    unsigned Iter = Iters.back().Iter;
263    bool PostVisit = Iters.back().PostVisit;
264    JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
265      JOS->attribute("block", blockID(Block));
266      JOS->attribute("iter", Iter);
267      JOS->attribute("post_visit", PostVisit);
268      JOS->attribute("element", ElementIndex);
269
270      // If this state immediately follows an Expr, show its built-in model.
271      if (ElementIndex > 0) {
272        auto S =
273            Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
274        if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
275          if (E->isPRValue()) {
276            if (auto *V = State.Env.getValue(*E))
277              JOS->attributeObject(
278                  "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); });
279          } else {
280            if (auto *Loc = State.Env.getStorageLocation(*E))
281              JOS->attributeObject(
282                  "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); });
283          }
284        }
285      }
286      if (!ContextLogs.empty()) {
287        JOS->attribute("logs", ContextLogs);
288        ContextLogs.clear();
289      }
290      {
291        std::string BuiltinLattice;
292        llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
293        State.Env.dump(BuiltinLatticeS);
294        JOS->attribute("builtinLattice", BuiltinLattice);
295      }
296    });
297  }
298  void blockConverged() override { Iters.back().Converged = true; }
299
300  void logText(llvm::StringRef S) override {
301    ContextLogs.append(S.begin(), S.end());
302    ContextLogs.push_back('\n');
303  }
304
305private:
306  // Write the CFG block details.
307  // Currently this is just the list of elements in execution order.
308  // FIXME: an AST dump would be a useful view, too.
309  void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
310    JOS->attributeObject(blockID(B.getBlockID()), [&] {
311      JOS->attributeArray("iters", [&] {
312        for (size_t IterIdx : ItersForB) {
313          const Iteration &Iter = Iters[IterIdx];
314          JOS->object([&] {
315            JOS->attribute("iter", Iter.Iter);
316            JOS->attribute("post_visit", Iter.PostVisit);
317            JOS->attribute("converged", Iter.Converged);
318          });
319        }
320      });
321      JOS->attributeArray("elements", [&] {
322        for (const auto &Elt : B.Elements) {
323          std::string Dump;
324          llvm::raw_string_ostream DumpS(Dump);
325          Elt.dumpToStream(DumpS);
326          JOS->value(Dump);
327        }
328      });
329    });
330  }
331
332  // Write the code of function being examined.
333  // We want to overlay the code with <span>s that mark which BB particular
334  // tokens are associated with, and even which BB element (so that clicking
335  // can select the right element).
336  void writeCode() {
337    const auto &AST = CFG->getDecl().getASTContext();
338    bool Invalid = false;
339
340    // Extract the source code from the original file.
341    // Pretty-printing from the AST would probably be nicer (no macros or
342    // indentation to worry about), but we need the boundaries of particular
343    // AST nodes and the printer doesn't provide this.
344    auto Range = clang::Lexer::makeFileCharRange(
345        CharSourceRange::getTokenRange(CFG->getDecl().getSourceRange()),
346        AST.getSourceManager(), AST.getLangOpts());
347    if (Range.isInvalid())
348      return;
349    llvm::StringRef Code = clang::Lexer::getSourceText(
350        Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
351    if (Invalid)
352      return;
353
354    // TokenInfo stores the BB and set of elements that a token is part of.
355    struct TokenInfo {
356      enum : unsigned { Missing = static_cast<unsigned>(-1) };
357
358      // The basic block this is part of.
359      // This is the BB of the stmt with the smallest containing range.
360      unsigned BB = Missing;
361      unsigned BBPriority = 0;
362      // The most specific stmt this is part of (smallest range).
363      unsigned Elt = Missing;
364      unsigned EltPriority = 0;
365      // All stmts this is part of.
366      SmallVector<unsigned> Elts;
367
368      // Mark this token as being part of BB.Elt.
369      // RangeLen is the character length of the element's range, used to
370      // distinguish inner vs outer statements.
371      // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
372      // However "a" has a smaller range, so is more specific. Clicking on the
373      // token "a" should select the stmt "a".
374      void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
375        // A worse BB (larger range) => ignore.
376        if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
377          return;
378        if (BB != this->BB) {
379          this->BB = BB;
380          Elts.clear();
381          BBPriority = RangeLen;
382        }
383        BBPriority = std::min(BBPriority, RangeLen);
384        Elts.push_back(Elt);
385        if (this->Elt == Missing || EltPriority > RangeLen)
386          this->Elt = Elt;
387      }
388      bool operator==(const TokenInfo &Other) const {
389        return std::tie(BB, Elt, Elts) ==
390               std::tie(Other.BB, Other.Elt, Other.Elts);
391      }
392      // Write the attributes for the <span> on this token.
393      void write(llvm::raw_ostream &OS) const {
394        OS << "class='c";
395        if (BB != Missing)
396          OS << " " << blockID(BB);
397        for (unsigned Elt : Elts)
398          OS << " " << eltID(BB, Elt);
399        OS << "'";
400
401        if (Elt != Missing)
402          OS << " data-elt='" << eltID(BB, Elt) << "'";
403        if (BB != Missing)
404          OS << " data-bb='" << blockID(BB) << "'";
405      }
406    };
407
408    // Construct one TokenInfo per character in a flat array.
409    // This is inefficient (chars in a token all have the same info) but simple.
410    std::vector<TokenInfo> State(Code.size());
411    for (const auto *Block : CFG->getCFG()) {
412      unsigned EltIndex = 0;
413      for (const auto& Elt : *Block) {
414        ++EltIndex;
415        if (const auto S = Elt.getAs<CFGStmt>()) {
416          auto EltRange = clang::Lexer::makeFileCharRange(
417              CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
418              AST.getSourceManager(), AST.getLangOpts());
419          if (EltRange.isInvalid())
420            continue;
421          if (EltRange.getBegin() < Range.getBegin() ||
422              EltRange.getEnd() >= Range.getEnd() ||
423              EltRange.getEnd() < Range.getBegin() ||
424              EltRange.getEnd() >= Range.getEnd())
425            continue;
426          unsigned Off = EltRange.getBegin().getRawEncoding() -
427                         Range.getBegin().getRawEncoding();
428          unsigned Len = EltRange.getEnd().getRawEncoding() -
429                         EltRange.getBegin().getRawEncoding();
430          for (unsigned I = 0; I < Len; ++I)
431            State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
432        }
433      }
434    }
435
436    // Finally, write the code with the correct <span>s.
437    unsigned Line =
438        AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
439    *OS << "<template data-copy='code'>\n";
440    *OS << "<code class='filename'>";
441    llvm::printHTMLEscaped(
442        llvm::sys::path::filename(
443            AST.getSourceManager().getFilename(Range.getBegin())),
444        *OS);
445    *OS << "</code>";
446    *OS << "<code class='line' data-line='" << Line++ << "'>";
447    for (unsigned I = 0; I < Code.size(); ++I) {
448      // Don't actually write a <span> around each character, only break spans
449      // when the TokenInfo changes.
450      bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
451      bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
452      if (NeedOpen) {
453        *OS << "<span ";
454        State[I].write(*OS);
455        *OS << ">";
456      }
457      if (Code[I] == '\n')
458        *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
459      else
460        llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
461      if (NeedClose) *OS << "</span>";
462    }
463    *OS << "</code>\n";
464    *OS << "</template>";
465  }
466
467  // Write the CFG diagram, a graph of basic blocks.
468  // Laying out graphs is hard, so we construct a graphviz description and shell
469  // out to `dot` to turn it into an SVG.
470  void writeCFG() {
471    *OS << "<template data-copy='cfg'>\n";
472    if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
473      *OS << *SVG;
474    else
475      *OS << "Can't draw CFG: " << toString(SVG.takeError());
476    *OS << "</template>\n";
477  }
478
479  // Produce a graphviz description of a CFG.
480  static std::string buildCFGDot(const clang::CFG &CFG) {
481    std::string Graph;
482    llvm::raw_string_ostream GraphS(Graph);
483    // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
484    GraphS << R"(digraph {
485      tooltip=" "
486      node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
487      edge[tooltip = " "]
488)";
489    for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
490      GraphS << "  " << blockID(I) << " [id=" << blockID(I) << "]\n";
491    for (const auto *Block : CFG) {
492      for (const auto &Succ : Block->succs()) {
493        if (Succ.getReachableBlock())
494          GraphS << "  " << blockID(Block->getBlockID()) << " -> "
495                 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
496      }
497    }
498    GraphS << "}\n";
499    return Graph;
500  }
501};
502
503// Nothing interesting here, just subprocess/temp-file plumbing.
504llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
505  std::string DotPath;
506  if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
507    DotPath = FromEnv;
508  else {
509    auto FromPath = llvm::sys::findProgramByName("dot");
510    if (!FromPath)
511      return llvm::createStringError(FromPath.getError(),
512                                     "'dot' not found on PATH");
513    DotPath = FromPath.get();
514  }
515
516  // Create input and output files for `dot` subprocess.
517  // (We create the output file as empty, to reserve the temp filename).
518  llvm::SmallString<256> Input, Output;
519  int InputFD;
520  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
521                                                   Input))
522    return llvm::createStringError(EC, "failed to create `dot` temp input");
523  llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
524  auto DeleteInput =
525      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
526  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
527    return llvm::createStringError(EC, "failed to create `dot` temp output");
528  auto DeleteOutput =
529      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
530
531  std::vector<std::optional<llvm::StringRef>> Redirects = {
532      Input, Output,
533      /*stderr=*/std::nullopt};
534  std::string ErrMsg;
535  int Code = llvm::sys::ExecuteAndWait(
536      DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
537      /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
538  if (!ErrMsg.empty())
539    return llvm::createStringError(llvm::inconvertibleErrorCode(),
540                                   "'dot' failed: " + ErrMsg);
541  if (Code != 0)
542    return llvm::createStringError(llvm::inconvertibleErrorCode(),
543                                   "'dot' failed (" + llvm::Twine(Code) + ")");
544
545  auto Buf = llvm::MemoryBuffer::getFile(Output);
546  if (!Buf)
547    return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
548
549  // Output has <?xml> prefix we don't want. Skip to <svg> tag.
550  llvm::StringRef Result = Buf.get()->getBuffer();
551  auto Pos = Result.find("<svg");
552  if (Pos == llvm::StringRef::npos)
553    return llvm::createStringError(llvm::inconvertibleErrorCode(),
554                                   "Can't find <svg> tag in `dot` output");
555  return Result.substr(Pos).str();
556}
557
558} // namespace
559
560std::unique_ptr<Logger>
561Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
562  return std::make_unique<HTMLLogger>(std::move(Streams));
563}
564
565} // namespace clang::dataflow
566