clang  16.0.0git
Sarif.cpp
Go to the documentation of this file.
1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/ConvertUTF.h"
25 #include "llvm/Support/JSON.h"
26 #include "llvm/Support/Path.h"
27 
28 #include <string>
29 #include <utility>
30 
31 using namespace clang;
32 using namespace llvm;
33 
36 
37 static StringRef getFileName(const FileEntry &FE) {
38  StringRef Filename = FE.tryGetRealPathName();
39  if (Filename.empty())
40  Filename = FE.getName();
41  return Filename;
42 }
43 /// \name URI
44 /// @{
45 
46 /// \internal
47 /// \brief
48 /// Return the RFC3986 encoding of the input character.
49 ///
50 /// \param C Character to encode to RFC3986.
51 ///
52 /// \return The RFC3986 representation of \c C.
54  // RFC 3986 claims alpha, numeric, and this handful of
55  // characters are not reserved for the path component and
56  // should be written out directly. Otherwise, percent
57  // encode the character and write that out instead of the
58  // reserved character.
59  if (llvm::isAlnum(C) ||
60  StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
61  return std::string(&C, 1);
62  return "%" + llvm::toHex(StringRef(&C, 1));
63 }
64 
65 /// \internal
66 /// \brief Return a URI representing the given file name.
67 ///
68 /// \param Filename The filename to be represented as URI.
69 ///
70 /// \return RFC3986 URI representing the input file name.
71 static std::string fileNameToURI(StringRef Filename) {
72  SmallString<32> Ret = StringRef("file://");
73 
74  // Get the root name to see if it has a URI authority.
75  StringRef Root = sys::path::root_name(Filename);
76  if (Root.startswith("//")) {
77  // There is an authority, so add it to the URI.
78  Ret += Root.drop_front(2).str();
79  } else if (!Root.empty()) {
80  // There is no authority, so end the component and add the root to the URI.
81  Ret += Twine("/" + Root).str();
82  }
83 
84  auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
85  assert(Iter != End && "Expected there to be a non-root path component.");
86  // Add the rest of the path components, encoding any reserved characters;
87  // we skip past the first path component, as it was handled it above.
88  std::for_each(++Iter, End, [&Ret](StringRef Component) {
89  // For reasons unknown to me, we may get a backslash with Windows native
90  // paths for the initial backslash following the drive component, which
91  // we need to ignore as a URI path part.
92  if (Component == "\\")
93  return;
94 
95  // Add the separator between the previous path part and the one being
96  // currently processed.
97  Ret += "/";
98 
99  // URI encode the part.
100  for (char C : Component) {
102  }
103  });
104 
105  return std::string(Ret);
106 }
107 /// @}
108 
109 /// \brief Calculate the column position expressed in the number of UTF-8 code
110 /// points from column start to the source location
111 ///
112 /// \param Loc The source location whose column needs to be calculated.
113 /// \param TokenLen Optional hint for when the token is multiple bytes long.
114 ///
115 /// \return The column number as a UTF-8 aware byte offset from column start to
116 /// the effective source location.
117 static unsigned int adjustColumnPos(FullSourceLoc Loc,
118  unsigned int TokenLen = 0) {
119  assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
120 
121  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
123  Loc.getManager().getBufferOrNone(LocInfo.first);
124  assert(Buf && "got an invalid buffer for the location's file");
125  assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
126  "token extends past end of buffer?");
127 
128  // Adjust the offset to be the start of the line, since we'll be counting
129  // Unicode characters from there until our column offset.
130  unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
131  unsigned int Ret = 1;
132  while (Off < (LocInfo.second + TokenLen)) {
133  Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
134  Ret++;
135  }
136 
137  return Ret;
138 }
139 
140 /// \name SARIF Utilities
141 /// @{
142 
143 /// \internal
144 json::Object createMessage(StringRef Text) {
145  return json::Object{{"text", Text.str()}};
146 }
147 
148 /// \internal
149 /// \pre CharSourceRange must be a token range
150 static json::Object createTextRegion(const SourceManager &SM,
151  const CharSourceRange &R) {
152  FullSourceLoc BeginCharLoc{R.getBegin(), SM};
153  FullSourceLoc EndCharLoc{R.getEnd(), SM};
154  json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
155  {"startColumn", adjustColumnPos(BeginCharLoc)}};
156 
157  if (BeginCharLoc == EndCharLoc) {
158  Region["endColumn"] = adjustColumnPos(BeginCharLoc);
159  } else {
160  Region["endLine"] = EndCharLoc.getExpansionLineNumber();
161  Region["endColumn"] = adjustColumnPos(EndCharLoc);
162  }
163  return Region;
164 }
165 
166 static json::Object createLocation(json::Object &&PhysicalLocation,
167  StringRef Message = "") {
168  json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
169  if (!Message.empty())
170  Ret.insert({"message", createMessage(Message)});
171  return Ret;
172 }
173 
175  switch (I) {
177  return "important";
179  return "essential";
181  return "unimportant";
182  }
183  llvm_unreachable("Fully covered switch is not so fully covered");
184 }
185 
186 static StringRef resultLevelToStr(SarifResultLevel R) {
187  switch (R) {
189  return "none";
191  return "note";
193  return "warning";
195  return "error";
196  }
197  llvm_unreachable("Potentially un-handled SarifResultLevel. "
198  "Is the switch not fully covered?");
199 }
200 
201 static json::Object
202 createThreadFlowLocation(json::Object &&Location,
203  const ThreadFlowImportance &Importance) {
204  return json::Object{{"location", std::move(Location)},
205  {"importance", importanceToStr(Importance)}};
206 }
207 /// @}
208 
209 json::Object
210 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
211  assert(R.isValid() &&
212  "Cannot create a physicalLocation from invalid SourceRange!");
213  assert(R.isCharRange() &&
214  "Cannot create a physicalLocation from a token range!");
215  FullSourceLoc Start{R.getBegin(), SourceMgr};
216  const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
217  assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
218 
219  const std::string &FileURI = fileNameToURI(getFileName(*FE));
220  auto I = CurrentArtifacts.find(FileURI);
221 
222  if (I == CurrentArtifacts.end()) {
223  uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
224  const SarifArtifactLocation &Location =
225  SarifArtifactLocation::create(FileURI).setIndex(Idx);
226  const SarifArtifact &Artifact = SarifArtifact::create(Location)
227  .setRoles({"resultFile"})
228  .setLength(FE->getSize())
229  .setMimeType("text/plain");
230  auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
231  // If inserted, ensure the original iterator points to the newly inserted
232  // element, so it can be used downstream.
233  if (StatusIter.second)
234  I = StatusIter.first;
235  }
236  assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
237  const SarifArtifactLocation &Location = I->second.Location;
238  json::Object ArtifactLocationObject{{"uri", Location.URI}};
239  if (Location.Index.has_value())
240  ArtifactLocationObject["index"] = Location.Index.value();
241  return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
242  {"region", createTextRegion(SourceMgr, R)}}};
243 }
244 
245 json::Object &SarifDocumentWriter::getCurrentTool() {
246  assert(!Closed && "SARIF Document is closed. "
247  "Need to call createRun() before using getcurrentTool!");
248 
249  // Since Closed = false here, expect there to be at least 1 Run, anything
250  // else is an invalid state.
251  assert(!Runs.empty() && "There are no runs associated with the document!");
252 
253  return *Runs.back().getAsObject()->get("tool")->getAsObject();
254 }
255 
256 void SarifDocumentWriter::reset() {
257  CurrentRules.clear();
258  CurrentArtifacts.clear();
259 }
260 
262  // Exit early if trying to close a closed Document.
263  if (Closed) {
264  reset();
265  return;
266  }
267 
268  // Since Closed = false here, expect there to be at least 1 Run, anything
269  // else is an invalid state.
270  assert(!Runs.empty() && "There are no runs associated with the document!");
271 
272  // Flush all the rules.
273  json::Object &Tool = getCurrentTool();
274  json::Array Rules;
275  for (const SarifRule &R : CurrentRules) {
276  json::Object Config{
277  {"enabled", R.DefaultConfiguration.Enabled},
278  {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
279  {"rank", R.DefaultConfiguration.Rank}};
280  json::Object Rule{
281  {"name", R.Name},
282  {"id", R.Id},
283  {"fullDescription", json::Object{{"text", R.Description}}},
284  {"defaultConfiguration", std::move(Config)}};
285  if (!R.HelpURI.empty())
286  Rule["helpUri"] = R.HelpURI;
287  Rules.emplace_back(std::move(Rule));
288  }
289  json::Object &Driver = *Tool.getObject("driver");
290  Driver["rules"] = std::move(Rules);
291 
292  // Flush all the artifacts.
293  json::Object &Run = getCurrentRun();
294  json::Array *Artifacts = Run.getArray("artifacts");
295  for (const auto &Pair : CurrentArtifacts) {
296  const SarifArtifact &A = Pair.getValue();
297  json::Object Loc{{"uri", A.Location.URI}};
298  if (A.Location.Index.has_value()) {
299  Loc["index"] = static_cast<int64_t>(A.Location.Index.value());
300  }
301  json::Object Artifact;
302  Artifact["location"] = std::move(Loc);
303  if (A.Length.has_value())
304  Artifact["length"] = static_cast<int64_t>(A.Length.value());
305  if (!A.Roles.empty())
306  Artifact["roles"] = json::Array(A.Roles);
307  if (!A.MimeType.empty())
308  Artifact["mimeType"] = A.MimeType;
309  if (A.Offset.has_value())
310  Artifact["offset"] = A.Offset;
311  Artifacts->push_back(json::Value(std::move(Artifact)));
312  }
313 
314  // Clear, reset temporaries before next run.
315  reset();
316 
317  // Mark the document as closed.
318  Closed = true;
319 }
320 
321 json::Array
322 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
323  json::Object Ret{{"locations", json::Array{}}};
324  json::Array Locs;
325  for (const auto &ThreadFlow : ThreadFlows) {
326  json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
327  json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
328  Locs.emplace_back(
329  createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
330  }
331  Ret["locations"] = std::move(Locs);
332  return json::Array{std::move(Ret)};
333 }
334 
335 json::Object
336 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
337  return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
338 }
339 
340 void SarifDocumentWriter::createRun(StringRef ShortToolName,
341  StringRef LongToolName,
342  StringRef ToolVersion) {
343  // Clear resources associated with a previous run.
344  endRun();
345 
346  // Signify a new run has begun.
347  Closed = false;
348 
349  json::Object Tool{
350  {"driver",
351  json::Object{{"name", ShortToolName},
352  {"fullName", LongToolName},
353  {"language", "en-US"},
354  {"version", ToolVersion},
355  {"informationUri",
356  "https://clang.llvm.org/docs/UsersManual.html"}}}};
357  json::Object TheRun{{"tool", std::move(Tool)},
358  {"results", {}},
359  {"artifacts", {}},
360  {"columnKind", "unicodeCodePoints"}};
361  Runs.emplace_back(std::move(TheRun));
362 }
363 
364 json::Object &SarifDocumentWriter::getCurrentRun() {
365  assert(!Closed &&
366  "SARIF Document is closed. "
367  "Can only getCurrentRun() if document is opened via createRun(), "
368  "create a run first");
369 
370  // Since Closed = false here, expect there to be at least 1 Run, anything
371  // else is an invalid state.
372  assert(!Runs.empty() && "There are no runs associated with the document!");
373  return *Runs.back().getAsObject();
374 }
375 
377  size_t Ret = CurrentRules.size();
378  CurrentRules.emplace_back(Rule);
379  return Ret;
380 }
381 
383  size_t RuleIdx = Result.RuleIdx;
384  assert(RuleIdx < CurrentRules.size() &&
385  "Trying to reference a rule that doesn't exist");
386  const SarifRule &Rule = CurrentRules[RuleIdx];
387  assert(Rule.DefaultConfiguration.Enabled &&
388  "Cannot add a result referencing a disabled Rule");
389  json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
390  {"ruleIndex", static_cast<int64_t>(RuleIdx)},
391  {"ruleId", Rule.Id}};
392  if (!Result.Locations.empty()) {
393  json::Array Locs;
394  for (auto &Range : Result.Locations) {
395  Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
396  }
397  Ret["locations"] = std::move(Locs);
398  }
399  if (!Result.ThreadFlows.empty())
400  Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
401 
402  Ret["level"] = resultLevelToStr(
403  Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
404 
405  json::Object &Run = getCurrentRun();
406  json::Array *Results = Run.getArray("results");
407  Results->emplace_back(std::move(Ret));
408 }
409 
411  // Flush all temporaries to their destinations if needed.
412  endRun();
413 
414  json::Object Doc{
415  {"$schema", SchemaURI},
416  {"version", SchemaVersion},
417  };
418  if (!Runs.empty())
419  Doc["runs"] = json::Array(Runs);
420  return Doc;
421 }
fileNameToURI
static std::string fileNameToURI(StringRef Filename)
Definition: Sarif.cpp:71
clang::FullSourceLoc::getManager
const SourceManager & getManager() const
Definition: SourceLocation.h:382
llvm
YAML serialization mapping.
Definition: Dominators.h:30
clang::detail::SarifArtifactLocation
Definition: Sarif.h:71
clang::CharSourceRange::isCharRange
bool isCharRange() const
Definition: SourceLocation.h:281
resultLevelToStr
static StringRef resultLevelToStr(SarifResultLevel R)
Definition: Sarif.cpp:186
clang::SarifResultLevel::Warning
@ Warning
clang::CharSourceRange::getBegin
SourceLocation getBegin() const
Definition: SourceLocation.h:283
clang::FullSourceLoc
A SourceLocation and its associated SourceManager.
Definition: SourceLocation.h:368
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::ThreadFlowImportance::Important
@ Important
Ret
static bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.cpp:34
clang::SarifDocumentWriter::createRun
void createRun(const llvm::StringRef ShortToolName, const llvm::StringRef LongToolName, const llvm::StringRef ToolVersion=CLANG_VERSION_STRING)
Create a new run with which any upcoming analysis will be associated.
Definition: Sarif.cpp:340
clang::if
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
Definition: RecursiveASTVisitor.h:1081
clang::SourceManager::getBufferOrNone
llvm::Optional< llvm::MemoryBufferRef > getBufferOrNone(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
Definition: SourceManager.h:1024
Filename
StringRef Filename
Definition: Format.cpp:2715
llvm::Optional
Definition: LLVM.h:40
SourceManager.h
clang::SarifResultLevel::Error
@ Error
getFileName
static StringRef getFileName(const FileEntry &FE)
Definition: Sarif.cpp:37
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:636
clang::SarifResultLevel::None
@ None
clang::SarifDocumentWriter::createRule
size_t createRule(const SarifRule &Rule)
Associate the given rule with the current run.
Definition: Sarif.cpp:376
clang::SarifResult
A SARIF result (also called a "reporting item") is a unit of output produced when one of the tool's r...
Definition: Sarif.h:315
createThreadFlowLocation
static json::Object createThreadFlowLocation(json::Object &&Location, const ThreadFlowImportance &Importance)
Definition: Sarif.cpp:202
clang::ThreadFlowImportance::Essential
@ Essential
llvm::SmallString< 32 >
percentEncodeURICharacter
static std::string percentEncodeURICharacter(char C)
Definition: Sarif.cpp:53
clang::FileEntry::tryGetRealPathName
StringRef tryGetRealPathName() const
Definition: FileEntry.h:400
clang::SarifRule
A SARIF rule (reportingDescriptor object) contains information that describes a reporting item genera...
Definition: Sarif.h:257
clang::FileEntry
Cached information about one file (either on disk or in the virtual file system).
Definition: FileEntry.h:366
SourceLocation.h
clang::serialized_diags::create
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
Definition: SerializedDiagnosticPrinter.cpp:301
clang::FullSourceLoc::getDecomposedExpansionLoc
std::pair< FileID, unsigned > getDecomposedExpansionLoc() const
Decompose the underlying SourceLocation into a raw (FileID + Offset) pair, after walking through all ...
Definition: SourceLocation.cpp:169
importanceToStr
static StringRef importanceToStr(ThreadFlowImportance I)
Definition: Sarif.cpp:174
llvm::ArrayRef
Definition: LLVM.h:34
createThreadFlows
static SmallVector< ThreadFlow, 8 > createThreadFlows(const PathDiagnostic *Diag, const LangOptions &LO)
Definition: SarifDiagnostics.cpp:129
Value
Value
Definition: UninitializedValues.cpp:103
clang::CharSourceRange
Represents a character-granular source range.
Definition: SourceLocation.h:253
clang::CharSourceRange::getEnd
SourceLocation getEnd() const
Definition: SourceLocation.h:284
clang::SarifResultLevel::Note
@ Note
clang::ThreadFlowImportance
ThreadFlowImportance
Definition: Sarif.h:146
clang::CharSourceRange::isValid
bool isValid() const
Definition: SourceLocation.h:291
clang::SourceLocation::isInvalid
bool isInvalid() const
Definition: SourceLocation.h:111
clang
Definition: CalledOnceCheck.h:17
Text
StringRef Text
Definition: Format.cpp:2716
hlsl::int64_t
long int64_t
Definition: hlsl_basic_types.h:26
createTextRegion
static json::Object createTextRegion(const SourceManager &SM, const CharSourceRange &R)
Definition: Sarif.cpp:150
clang::FileEntry::getName
StringRef getName() const
Definition: FileEntry.h:397
Sarif.h
adjustColumnPos
static unsigned int adjustColumnPos(FullSourceLoc Loc, unsigned int TokenLen=0)
Calculate the column position expressed in the number of UTF-8 code points from column start to the s...
Definition: Sarif.cpp:117
createLocation
static json::Object createLocation(json::Object &&PhysicalLocation, StringRef Message="")
Definition: Sarif.cpp:166
clang::SarifDocumentWriter::appendResult
void appendResult(const SarifResult &SarifResult)
Append a new result to the currently in-flight run.
Definition: Sarif.cpp:382
clang::SarifResultLevel
SarifResultLevel
The level of severity associated with a SarifResult.
Definition: Sarif.h:165
clang::ThreadFlowImportance::Unimportant
@ Unimportant
clang::detail::SarifArtifact
Since every clang artifact MUST have a location (there being no nested artifacts),...
Definition: Sarif.h:104
SM
#define SM(sm)
Definition: Cuda.cpp:79
clang::FileEntry::getSize
off_t getSize() const
Definition: FileEntry.h:401
clang::SarifDocumentWriter::createDocument
llvm::json::Object createDocument()
Return the SARIF document in its current state.
Definition: Sarif.cpp:410
clang::ThreadFlow
A thread flow is a sequence of code locations that specify a possible path through a single thread of...
Definition: Sarif.h:175
createMessage
json::Object createMessage(StringRef Text)
Definition: Sarif.cpp:144
clang::FullSourceLoc::getExpansionColumnNumber
unsigned getExpansionColumnNumber(bool *Invalid=nullptr) const
Definition: SourceLocation.cpp:235
clang::SarifDocumentWriter::endRun
void endRun()
If there is a current run, end it.
Definition: Sarif.cpp:261