clang 18.0.0git
HTMLLogger.cpp
Go to the documentation of this file.
1//===-- HTMLLogger.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the HTML logger. Given a directory dir/, we write
10// dir/0.html for the first analysis, etc.
11// These files contain a visualization that allows inspecting the CFG and the
12// state of the analysis at each point.
13// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14// so each output file is self-contained.
15//
16// VIEWS
17//
18// The timeline and function view are always shown. These allow selecting basic
19// blocks, statements within them, and processing iterations (BBs are visited
20// multiple times when e.g. loops are involved).
21// These are written directly into the HTML body.
22//
23// There are also listings of particular basic blocks, and dumps of the state
24// at particular analysis points (i.e. BB2 iteration 3 statement 2).
25// These are only shown when the relevant BB/analysis point is *selected*.
26//
27// DATA AND TEMPLATES
28//
29// The HTML proper is mostly static.
30// The analysis data is in a JSON object HTMLLoggerData which is embedded as
31// a <script> in the <head>.
32// This gets rendered into DOM by a simple template processor which substitutes
33// the data into <template> tags embedded in the HTML. (see inflate() in JS).
34//
35// SELECTION
36//
37// This is the only real interactive mechanism.
38//
39// At any given time, there are several named selections, e.g.:
40// bb: B2 (basic block 0 is selected)
41// elt: B2.4 (statement 4 is selected)
42// iter: B2:1 (iteration 1 of the basic block is selected)
43// hover: B3 (hovering over basic block 3)
44//
45// The selection is updated by mouse events: hover by moving the mouse and
46// others by clicking. Elements that are click targets generally have attributes
47// (id or data-foo) that define what they should select.
48// See watchSelection() in JS for the exact logic.
49//
50// When the "bb" selection is set to "B2":
51// - sections <section data-selection="bb"> get shown
52// - templates under such sections get re-rendered
53// - elements with class/id "B2" get class "bb-select"
54//
55//===----------------------------------------------------------------------===//
56
63#include "clang/Lex/Lexer.h"
64#include "llvm/ADT/DenseMap.h"
65#include "llvm/ADT/ScopeExit.h"
66#include "llvm/Support/Error.h"
67#include "llvm/Support/FormatVariadic.h"
68#include "llvm/Support/JSON.h"
69#include "llvm/Support/Program.h"
70#include "llvm/Support/ScopedPrinter.h"
71#include "llvm/Support/raw_ostream.h"
72// Defines assets: HTMLLogger_{html_js,css}
73#include "HTMLLogger.inc"
74
75namespace clang::dataflow {
76namespace {
77
78// Render a graphviz graph specification to SVG using the `dot` tool.
79llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80
81using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82
83// Recursively dumps Values/StorageLocations as JSON
84class ModelDumper {
85public:
86 ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87 : JOS(JOS), Env(Env) {}
88
89 void dump(Value &V) {
90 JOS.attribute("value_id", llvm::to_string(&V));
91 if (!Visited.insert(&V).second)
92 return;
93
94 JOS.attribute("kind", debugString(V.getKind()));
95
96 switch (V.getKind()) {
102 break;
104 JOS.attributeObject(
105 "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
106 break;
107 }
108
109 for (const auto& Prop : V.properties())
110 JOS.attributeObject(("p:" + Prop.first()).str(),
111 [&] { dump(*Prop.second); });
112
113 // Running the SAT solver is expensive, but knowing which booleans are
114 // guaranteed true/false here is valuable and hard to determine by hand.
115 if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
116 JOS.attribute("formula", llvm::to_string(B->formula()));
117 JOS.attribute(
118 "truth", Env.flowConditionImplies(B->formula()) ? "true"
119 : Env.flowConditionImplies(Env.arena().makeNot(B->formula()))
120 ? "false"
121 : "unknown");
122 }
123 }
124 void dump(const StorageLocation &L) {
125 JOS.attribute("location", llvm::to_string(&L));
126 if (!Visited.insert(&L).second)
127 return;
128
129 JOS.attribute("type", L.getType().getAsString());
130 if (auto *V = Env.getValue(L))
131 dump(*V);
132
133 if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
134 for (const auto &Child : RLoc->children())
135 JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
136 if (Child.second)
137 if (Value *Val = Env.getValue(*Child.second))
138 dump(*Val);
139 });
140 }
141 }
142
144 llvm::json::OStream &JOS;
145 const Environment &Env;
146};
147
148class HTMLLogger : public Logger {
149 struct Iteration {
150 const CFGBlock *Block;
151 unsigned Iter;
153 };
154
155 StreamFactory Streams;
156 std::unique_ptr<llvm::raw_ostream> OS;
157 std::optional<llvm::json::OStream> JOS;
158
159 const ControlFlowContext *CFG;
160 // Timeline of iterations of CFG block visitation.
161 std::vector<Iteration> Iters;
162 // Number of times each CFG block has been seen.
163 llvm::DenseMap<const CFGBlock *, llvm::SmallVector<Iteration>> BlockIters;
164 // The messages logged in the current context but not yet written.
165 std::string ContextLogs;
166 // The number of elements we have visited within the current CFG block.
167 unsigned ElementIndex;
168
169public:
170 explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
171 void beginAnalysis(const ControlFlowContext &CFG,
172 TypeErasedDataflowAnalysis &A) override {
173 OS = Streams();
174 this->CFG = &CFG;
175 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
176
177 const auto &D = CFG.getDecl();
178 const auto &SM = A.getASTContext().getSourceManager();
179 *OS << "<title>";
180 if (const auto *ND = dyn_cast<NamedDecl>(&D))
181 *OS << ND->getNameAsString() << " at ";
182 *OS << SM.getFilename(D.getLocation()) << ":"
183 << SM.getSpellingLineNumber(D.getLocation());
184 *OS << "</title>\n";
185
186 *OS << "<style>" << HTMLLogger_css << "</style>\n";
187 *OS << "<script>" << HTMLLogger_js << "</script>\n";
188
189 writeCode();
190 writeCFG();
191
192 *OS << "<script>var HTMLLoggerData = \n";
193 JOS.emplace(*OS, /*Indent=*/2);
194 JOS->objectBegin();
195 JOS->attributeBegin("states");
196 JOS->objectBegin();
197 }
198 // Between beginAnalysis() and endAnalysis() we write all the states for
199 // particular analysis points into the `timeline` array.
200 void endAnalysis() override {
201 JOS->objectEnd();
202 JOS->attributeEnd();
203
204 JOS->attributeArray("timeline", [&] {
205 for (const auto &E : Iters) {
206 JOS->object([&] {
207 JOS->attribute("block", blockID(E.Block->getBlockID()));
208 JOS->attribute("iter", E.Iter);
209 JOS->attribute("post_visit", E.PostVisit);
210 });
211 }
212 });
213 JOS->attributeObject("cfg", [&] {
214 for (const auto &E : BlockIters)
215 writeBlock(*E.first, E.second);
216 });
217
218 JOS->objectEnd();
219 JOS.reset();
220 *OS << ";\n</script>\n";
221 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
222 }
223
224 void enterBlock(const CFGBlock &B, bool PostVisit) override {
225 llvm::SmallVector<Iteration> &BIter = BlockIters[&B];
226 unsigned IterNum = BIter.size() + 1;
227 BIter.push_back({&B, IterNum, PostVisit});
228 Iters.push_back({&B, IterNum, PostVisit});
229 ElementIndex = 0;
230 }
231 void enterElement(const CFGElement &E) override {
232 ++ElementIndex;
233 }
234
235 static std::string blockID(unsigned Block) {
236 return llvm::formatv("B{0}", Block);
237 }
238 static std::string eltID(unsigned Block, unsigned Element) {
239 return llvm::formatv("B{0}.{1}", Block, Element);
240 }
241 static std::string iterID(unsigned Block, unsigned Iter) {
242 return llvm::formatv("B{0}:{1}", Block, Iter);
243 }
244 static std::string elementIterID(unsigned Block, unsigned Iter,
245 unsigned Element) {
246 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
247 }
248
249 // Write the analysis state associated with a particular analysis point.
250 // FIXME: this dump is fairly opaque. We should show:
251 // - values associated with the current Stmt
252 // - values associated with its children
253 // - meaningful names for values
254 // - which boolean values are implied true/false by the flow condition
255 void recordState(TypeErasedDataflowAnalysisState &State) override {
256 unsigned Block = Iters.back().Block->getBlockID();
257 unsigned Iter = Iters.back().Iter;
258 bool PostVisit = Iters.back().PostVisit;
259 JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
260 JOS->attribute("block", blockID(Block));
261 JOS->attribute("iter", Iter);
262 JOS->attribute("post_visit", PostVisit);
263 JOS->attribute("element", ElementIndex);
264
265 // If this state immediately follows an Expr, show its built-in model.
266 if (ElementIndex > 0) {
267 auto S =
268 Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
269 if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
270 if (E->isPRValue()) {
271 if (auto *V = State.Env.getValue(*E))
272 JOS->attributeObject(
273 "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); });
274 } else {
275 if (auto *Loc = State.Env.getStorageLocation(*E))
276 JOS->attributeObject(
277 "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); });
278 }
279 }
280 }
281 if (!ContextLogs.empty()) {
282 JOS->attribute("logs", ContextLogs);
283 ContextLogs.clear();
284 }
285 {
286 std::string BuiltinLattice;
287 llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
288 State.Env.dump(BuiltinLatticeS);
289 JOS->attribute("builtinLattice", BuiltinLattice);
290 }
291 });
292 }
293 void blockConverged() override { logText("Block converged"); }
294
295 void logText(llvm::StringRef S) override {
296 ContextLogs.append(S.begin(), S.end());
297 ContextLogs.push_back('\n');
298 }
299
300private:
301 // Write the CFG block details.
302 // Currently this is just the list of elements in execution order.
303 // FIXME: an AST dump would be a useful view, too.
304 void writeBlock(const CFGBlock &B, llvm::ArrayRef<Iteration> ItersForB) {
305 JOS->attributeObject(blockID(B.getBlockID()), [&] {
306 JOS->attributeArray("iters", [&] {
307 for (const auto &Iter : ItersForB) {
308 JOS->object([&] {
309 JOS->attribute("iter", Iter.Iter);
310 JOS->attribute("post_visit", Iter.PostVisit);
311 });
312 }
313 });
314 JOS->attributeArray("elements", [&] {
315 for (const auto &Elt : B.Elements) {
316 std::string Dump;
317 llvm::raw_string_ostream DumpS(Dump);
318 Elt.dumpToStream(DumpS);
319 JOS->value(Dump);
320 }
321 });
322 });
323 }
324
325 // Write the code of function being examined.
326 // We want to overlay the code with <span>s that mark which BB particular
327 // tokens are associated with, and even which BB element (so that clicking
328 // can select the right element).
329 void writeCode() {
330 const auto &AST = CFG->getDecl().getASTContext();
331 bool Invalid = false;
332
333 // Extract the source code from the original file.
334 // Pretty-printing from the AST would probably be nicer (no macros or
335 // indentation to worry about), but we need the boundaries of particular
336 // AST nodes and the printer doesn't provide this.
338 CharSourceRange::getTokenRange(CFG->getDecl().getSourceRange()),
339 AST.getSourceManager(), AST.getLangOpts());
340 if (Range.isInvalid())
341 return;
342 llvm::StringRef Code = clang::Lexer::getSourceText(
343 Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
344 if (Invalid)
345 return;
346
347 static constexpr unsigned Missing = -1;
348 // TokenInfo stores the BB and set of elements that a token is part of.
349 struct TokenInfo {
350 // The basic block this is part of.
351 // This is the BB of the stmt with the smallest containing range.
352 unsigned BB = Missing;
353 unsigned BBPriority = 0;
354 // The most specific stmt this is part of (smallest range).
355 unsigned Elt = Missing;
356 unsigned EltPriority = 0;
357 // All stmts this is part of.
358 SmallVector<unsigned> Elts;
359
360 // Mark this token as being part of BB.Elt.
361 // RangeLen is the character length of the element's range, used to
362 // distinguish inner vs outer statements.
363 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
364 // However "a" has a smaller range, so is more specific. Clicking on the
365 // token "a" should select the stmt "a".
366 void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
367 // A worse BB (larger range) => ignore.
368 if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
369 return;
370 if (BB != this->BB) {
371 this->BB = BB;
372 Elts.clear();
373 BBPriority = RangeLen;
374 }
375 BBPriority = std::min(BBPriority, RangeLen);
376 Elts.push_back(Elt);
377 if (this->Elt == Missing || EltPriority > RangeLen)
378 this->Elt = Elt;
379 }
380 bool operator==(const TokenInfo &Other) const {
381 return std::tie(BB, Elt, Elts) ==
382 std::tie(Other.BB, Other.Elt, Other.Elts);
383 }
384 // Write the attributes for the <span> on this token.
385 void write(llvm::raw_ostream &OS) const {
386 OS << "class='c";
387 if (BB != Missing)
388 OS << " " << blockID(BB);
389 for (unsigned Elt : Elts)
390 OS << " " << eltID(BB, Elt);
391 OS << "'";
392
393 if (Elt != Missing)
394 OS << " data-elt='" << eltID(BB, Elt) << "'";
395 if (BB != Missing)
396 OS << " data-bb='" << blockID(BB) << "'";
397 }
398 };
399
400 // Construct one TokenInfo per character in a flat array.
401 // This is inefficient (chars in a token all have the same info) but simple.
402 std::vector<TokenInfo> State(Code.size());
403 for (const auto *Block : CFG->getCFG()) {
404 unsigned EltIndex = 0;
405 for (const auto& Elt : *Block) {
406 ++EltIndex;
407 if (const auto S = Elt.getAs<CFGStmt>()) {
408 auto EltRange = clang::Lexer::makeFileCharRange(
409 CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
410 AST.getSourceManager(), AST.getLangOpts());
411 if (EltRange.isInvalid())
412 continue;
413 if (EltRange.getBegin() < Range.getBegin() ||
414 EltRange.getEnd() >= Range.getEnd() ||
415 EltRange.getEnd() < Range.getBegin() ||
416 EltRange.getEnd() >= Range.getEnd())
417 continue;
418 unsigned Off = EltRange.getBegin().getRawEncoding() -
419 Range.getBegin().getRawEncoding();
420 unsigned Len = EltRange.getEnd().getRawEncoding() -
421 EltRange.getBegin().getRawEncoding();
422 for (unsigned I = 0; I < Len; ++I)
423 State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
424 }
425 }
426 }
427
428 // Finally, write the code with the correct <span>s.
429 unsigned Line =
430 AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
431 *OS << "<template data-copy='code'>\n";
432 *OS << "<code class='filename'>";
433 llvm::printHTMLEscaped(
434 llvm::sys::path::filename(
435 AST.getSourceManager().getFilename(Range.getBegin())),
436 *OS);
437 *OS << "</code>";
438 *OS << "<code class='line' data-line='" << Line++ << "'>";
439 for (unsigned I = 0; I < Code.size(); ++I) {
440 // Don't actually write a <span> around each character, only break spans
441 // when the TokenInfo changes.
442 bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
443 bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
444 if (NeedOpen) {
445 *OS << "<span ";
446 State[I].write(*OS);
447 *OS << ">";
448 }
449 if (Code[I] == '\n')
450 *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
451 else
452 llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
453 if (NeedClose) *OS << "</span>";
454 }
455 *OS << "</code>\n";
456 *OS << "</template>";
457 }
458
459 // Write the CFG diagram, a graph of basic blocks.
460 // Laying out graphs is hard, so we construct a graphviz description and shell
461 // out to `dot` to turn it into an SVG.
462 void writeCFG() {
463 *OS << "<template data-copy='cfg'>\n";
464 if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
465 *OS << *SVG;
466 else
467 *OS << "Can't draw CFG: " << toString(SVG.takeError());
468 *OS << "</template>\n";
469 }
470
471 // Produce a graphviz description of a CFG.
472 static std::string buildCFGDot(const clang::CFG &CFG) {
473 std::string Graph;
474 llvm::raw_string_ostream GraphS(Graph);
475 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
476 GraphS << R"(digraph {
477 tooltip=" "
478 node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
479 edge[tooltip = " "]
480)";
481 for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
482 GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n";
483 for (const auto *Block : CFG) {
484 for (const auto &Succ : Block->succs()) {
485 if (Succ.getReachableBlock())
486 GraphS << " " << blockID(Block->getBlockID()) << " -> "
487 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
488 }
489 }
490 GraphS << "}\n";
491 return Graph;
492 }
493};
494
495// Nothing interesting here, just subprocess/temp-file plumbing.
496llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
497 std::string DotPath;
498 if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
499 DotPath = FromEnv;
500 else {
501 auto FromPath = llvm::sys::findProgramByName("dot");
502 if (!FromPath)
503 return llvm::createStringError(FromPath.getError(),
504 "'dot' not found on PATH");
505 DotPath = FromPath.get();
506 }
507
508 // Create input and output files for `dot` subprocess.
509 // (We create the output file as empty, to reserve the temp filename).
510 llvm::SmallString<256> Input, Output;
511 int InputFD;
512 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
513 Input))
514 return llvm::createStringError(EC, "failed to create `dot` temp input");
515 llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
516 auto DeleteInput =
517 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
518 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
519 return llvm::createStringError(EC, "failed to create `dot` temp output");
520 auto DeleteOutput =
521 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
522
523 std::vector<std::optional<llvm::StringRef>> Redirects = {
524 Input, Output,
525 /*stderr=*/std::nullopt};
526 std::string ErrMsg;
527 int Code = llvm::sys::ExecuteAndWait(
528 DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
529 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
530 if (!ErrMsg.empty())
531 return llvm::createStringError(llvm::inconvertibleErrorCode(),
532 "'dot' failed: " + ErrMsg);
533 if (Code != 0)
534 return llvm::createStringError(llvm::inconvertibleErrorCode(),
535 "'dot' failed (" + llvm::Twine(Code) + ")");
536
537 auto Buf = llvm::MemoryBuffer::getFile(Output);
538 if (!Buf)
539 return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
540
541 // Output has <?xml> prefix we don't want. Skip to <svg> tag.
542 llvm::StringRef Result = Buf.get()->getBuffer();
543 auto Pos = Result.find("<svg");
544 if (Pos == llvm::StringRef::npos)
545 return llvm::createStringError(llvm::inconvertibleErrorCode(),
546 "Can't find <svg> tag in `dot` output");
547 return Result.substr(Pos).str();
548}
549
550} // namespace
551
552std::unique_ptr<Logger>
553Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
554 return std::make_unique<HTMLLogger>(std::move(Streams));
555}
556
557} // namespace clang::dataflow
#define V(N, I)
Definition: ASTContext.h:3233
#define SM(sm)
Definition: Cuda.cpp:80
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
llvm::json::OStream & JOS
Definition: HTMLLogger.cpp:144
const CFGBlock * Block
Definition: HTMLLogger.cpp:150
const Environment & Env
Definition: HTMLLogger.cpp:145
llvm::DenseSet< const void * > Visited
Definition: HTMLLogger.cpp:143
unsigned Iter
Definition: HTMLLogger.cpp:151
bool PostVisit
Definition: HTMLLogger.cpp:152
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
Defines the SourceManager interface.
Represents a source-level, intra-procedural CFG that represents the control-flow of a Stmt.
Definition: CFG.h:1211
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition: Lexer.cpp:976
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
Definition: Lexer.cpp:907
Dataflow Directional Tag Classes.
Definition: Arena.h:17
llvm::StringRef debugString(Value::Kind Kind)
Returns a string representation of a value kind.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:207
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
Definition: Format.h:5078