clang 18.0.0git
Sarif.cpp
Go to the documentation of this file.
1//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the SARIFDocumentWriter class, and
11/// associated builders such as:
12/// - \ref SarifArtifact
13/// - \ref SarifArtifactLocation
14/// - \ref SarifRule
15/// - \ref SarifResult
16//===----------------------------------------------------------------------===//
17#include "clang/Basic/Sarif.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/StringMap.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Support/ConvertUTF.h"
26#include "llvm/Support/JSON.h"
27#include "llvm/Support/Path.h"
28
29#include <optional>
30#include <string>
31#include <utility>
32
33using namespace clang;
34using namespace llvm;
35
38
39static StringRef getFileName(FileEntryRef FE) {
40 StringRef Filename = FE.getFileEntry().tryGetRealPathName();
41 if (Filename.empty())
42 Filename = FE.getName();
43 return Filename;
44}
45/// \name URI
46/// @{
47
48/// \internal
49/// \brief
50/// Return the RFC3986 encoding of the input character.
51///
52/// \param C Character to encode to RFC3986.
53///
54/// \return The RFC3986 representation of \c C.
55static std::string percentEncodeURICharacter(char C) {
56 // RFC 3986 claims alpha, numeric, and this handful of
57 // characters are not reserved for the path component and
58 // should be written out directly. Otherwise, percent
59 // encode the character and write that out instead of the
60 // reserved character.
61 if (llvm::isAlnum(C) ||
62 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
63 return std::string(&C, 1);
64 return "%" + llvm::toHex(StringRef(&C, 1));
65}
66
67/// \internal
68/// \brief Return a URI representing the given file name.
69///
70/// \param Filename The filename to be represented as URI.
71///
72/// \return RFC3986 URI representing the input file name.
73static std::string fileNameToURI(StringRef Filename) {
74 SmallString<32> Ret = StringRef("file://");
75
76 // Get the root name to see if it has a URI authority.
77 StringRef Root = sys::path::root_name(Filename);
78 if (Root.startswith("//")) {
79 // There is an authority, so add it to the URI.
80 Ret += Root.drop_front(2).str();
81 } else if (!Root.empty()) {
82 // There is no authority, so end the component and add the root to the URI.
83 Ret += Twine("/" + Root).str();
84 }
85
86 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
87 assert(Iter != End && "Expected there to be a non-root path component.");
88 // Add the rest of the path components, encoding any reserved characters;
89 // we skip past the first path component, as it was handled it above.
90 for (StringRef Component : llvm::make_range(++Iter, End)) {
91 // For reasons unknown to me, we may get a backslash with Windows native
92 // paths for the initial backslash following the drive component, which
93 // we need to ignore as a URI path part.
94 if (Component == "\\")
95 continue;
96
97 // Add the separator between the previous path part and the one being
98 // currently processed.
99 Ret += "/";
100
101 // URI encode the part.
102 for (char C : Component) {
104 }
105 }
106
107 return std::string(Ret);
108}
109/// @}
110
111/// \brief Calculate the column position expressed in the number of UTF-8 code
112/// points from column start to the source location
113///
114/// \param Loc The source location whose column needs to be calculated.
115/// \param TokenLen Optional hint for when the token is multiple bytes long.
116///
117/// \return The column number as a UTF-8 aware byte offset from column start to
118/// the effective source location.
119static unsigned int adjustColumnPos(FullSourceLoc Loc,
120 unsigned int TokenLen = 0) {
121 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
122
123 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
124 std::optional<MemoryBufferRef> Buf =
125 Loc.getManager().getBufferOrNone(LocInfo.first);
126 assert(Buf && "got an invalid buffer for the location's file");
127 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
128 "token extends past end of buffer?");
129
130 // Adjust the offset to be the start of the line, since we'll be counting
131 // Unicode characters from there until our column offset.
132 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
133 unsigned int Ret = 1;
134 while (Off < (LocInfo.second + TokenLen)) {
135 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
136 Ret++;
137 }
138
139 return Ret;
140}
141
142/// \name SARIF Utilities
143/// @{
144
145/// \internal
146json::Object createMessage(StringRef Text) {
147 return json::Object{{"text", Text.str()}};
148}
149
150/// \internal
151/// \pre CharSourceRange must be a token range
152static json::Object createTextRegion(const SourceManager &SM,
153 const CharSourceRange &R) {
154 FullSourceLoc BeginCharLoc{R.getBegin(), SM};
155 FullSourceLoc EndCharLoc{R.getEnd(), SM};
156 json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
157 {"startColumn", adjustColumnPos(BeginCharLoc)}};
158
159 if (BeginCharLoc == EndCharLoc) {
160 Region["endColumn"] = adjustColumnPos(BeginCharLoc);
161 } else {
162 Region["endLine"] = EndCharLoc.getExpansionLineNumber();
163 Region["endColumn"] = adjustColumnPos(EndCharLoc);
164 }
165 return Region;
166}
167
168static json::Object createLocation(json::Object &&PhysicalLocation,
169 StringRef Message = "") {
170 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
171 if (!Message.empty())
172 Ret.insert({"message", createMessage(Message)});
173 return Ret;
174}
175
177 switch (I) {
178 case ThreadFlowImportance::Important:
179 return "important";
180 case ThreadFlowImportance::Essential:
181 return "essential";
182 case ThreadFlowImportance::Unimportant:
183 return "unimportant";
184 }
185 llvm_unreachable("Fully covered switch is not so fully covered");
186}
187
189 switch (R) {
190 case SarifResultLevel::None:
191 return "none";
192 case SarifResultLevel::Note:
193 return "note";
194 case SarifResultLevel::Warning:
195 return "warning";
196 case SarifResultLevel::Error:
197 return "error";
198 }
199 llvm_unreachable("Potentially un-handled SarifResultLevel. "
200 "Is the switch not fully covered?");
201}
202
203static json::Object
204createThreadFlowLocation(json::Object &&Location,
205 const ThreadFlowImportance &Importance) {
206 return json::Object{{"location", std::move(Location)},
207 {"importance", importanceToStr(Importance)}};
208}
209/// @}
210
211json::Object
212SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
213 assert(R.isValid() &&
214 "Cannot create a physicalLocation from invalid SourceRange!");
215 assert(R.isCharRange() &&
216 "Cannot create a physicalLocation from a token range!");
217 FullSourceLoc Start{R.getBegin(), SourceMgr};
218 OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();
219 assert(FE && "Diagnostic does not exist within a valid file!");
220
221 const std::string &FileURI = fileNameToURI(getFileName(*FE));
222 auto I = CurrentArtifacts.find(FileURI);
223
224 if (I == CurrentArtifacts.end()) {
225 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
226 const SarifArtifactLocation &Location =
227 SarifArtifactLocation::create(FileURI).setIndex(Idx);
228 const SarifArtifact &Artifact = SarifArtifact::create(Location)
229 .setRoles({"resultFile"})
230 .setLength(FE->getSize())
231 .setMimeType("text/plain");
232 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
233 // If inserted, ensure the original iterator points to the newly inserted
234 // element, so it can be used downstream.
235 if (StatusIter.second)
236 I = StatusIter.first;
237 }
238 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
239 const SarifArtifactLocation &Location = I->second.Location;
240 json::Object ArtifactLocationObject{{"uri", Location.URI}};
241 if (Location.Index.has_value())
242 ArtifactLocationObject["index"] = *Location.Index;
243 return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
244 {"region", createTextRegion(SourceMgr, R)}}};
245}
246
247json::Object &SarifDocumentWriter::getCurrentTool() {
248 assert(!Closed && "SARIF Document is closed. "
249 "Need to call createRun() before using getcurrentTool!");
250
251 // Since Closed = false here, expect there to be at least 1 Run, anything
252 // else is an invalid state.
253 assert(!Runs.empty() && "There are no runs associated with the document!");
254
255 return *Runs.back().getAsObject()->get("tool")->getAsObject();
256}
257
258void SarifDocumentWriter::reset() {
259 CurrentRules.clear();
260 CurrentArtifacts.clear();
261}
262
264 // Exit early if trying to close a closed Document.
265 if (Closed) {
266 reset();
267 return;
268 }
269
270 // Since Closed = false here, expect there to be at least 1 Run, anything
271 // else is an invalid state.
272 assert(!Runs.empty() && "There are no runs associated with the document!");
273
274 // Flush all the rules.
275 json::Object &Tool = getCurrentTool();
276 json::Array Rules;
277 for (const SarifRule &R : CurrentRules) {
278 json::Object Config{
279 {"enabled", R.DefaultConfiguration.Enabled},
280 {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
281 {"rank", R.DefaultConfiguration.Rank}};
282 json::Object Rule{
283 {"name", R.Name},
284 {"id", R.Id},
285 {"fullDescription", json::Object{{"text", R.Description}}},
286 {"defaultConfiguration", std::move(Config)}};
287 if (!R.HelpURI.empty())
288 Rule["helpUri"] = R.HelpURI;
289 Rules.emplace_back(std::move(Rule));
290 }
291 json::Object &Driver = *Tool.getObject("driver");
292 Driver["rules"] = std::move(Rules);
293
294 // Flush all the artifacts.
295 json::Object &Run = getCurrentRun();
296 json::Array *Artifacts = Run.getArray("artifacts");
298 for (const auto &[K, V] : CurrentArtifacts)
299 Vec.emplace_back(K, V);
300 llvm::sort(Vec, llvm::less_first());
301 for (const auto &[_, A] : Vec) {
302 json::Object Loc{{"uri", A.Location.URI}};
303 if (A.Location.Index.has_value()) {
304 Loc["index"] = static_cast<int64_t>(*A.Location.Index);
305 }
306 json::Object Artifact;
307 Artifact["location"] = std::move(Loc);
308 if (A.Length.has_value())
309 Artifact["length"] = static_cast<int64_t>(*A.Length);
310 if (!A.Roles.empty())
311 Artifact["roles"] = json::Array(A.Roles);
312 if (!A.MimeType.empty())
313 Artifact["mimeType"] = A.MimeType;
314 if (A.Offset.has_value())
315 Artifact["offset"] = *A.Offset;
316 Artifacts->push_back(json::Value(std::move(Artifact)));
317 }
318
319 // Clear, reset temporaries before next run.
320 reset();
321
322 // Mark the document as closed.
323 Closed = true;
324}
325
326json::Array
327SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
328 json::Object Ret{{"locations", json::Array{}}};
329 json::Array Locs;
330 for (const auto &ThreadFlow : ThreadFlows) {
331 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
332 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
333 Locs.emplace_back(
334 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
335 }
336 Ret["locations"] = std::move(Locs);
337 return json::Array{std::move(Ret)};
338}
339
340json::Object
341SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
342 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
343}
344
345void SarifDocumentWriter::createRun(StringRef ShortToolName,
346 StringRef LongToolName,
347 StringRef ToolVersion) {
348 // Clear resources associated with a previous run.
349 endRun();
350
351 // Signify a new run has begun.
352 Closed = false;
353
354 json::Object Tool{
355 {"driver",
356 json::Object{{"name", ShortToolName},
357 {"fullName", LongToolName},
358 {"language", "en-US"},
359 {"version", ToolVersion},
360 {"informationUri",
361 "https://clang.llvm.org/docs/UsersManual.html"}}}};
362 json::Object TheRun{{"tool", std::move(Tool)},
363 {"results", {}},
364 {"artifacts", {}},
365 {"columnKind", "unicodeCodePoints"}};
366 Runs.emplace_back(std::move(TheRun));
367}
368
369json::Object &SarifDocumentWriter::getCurrentRun() {
370 assert(!Closed &&
371 "SARIF Document is closed. "
372 "Can only getCurrentRun() if document is opened via createRun(), "
373 "create a run first");
374
375 // Since Closed = false here, expect there to be at least 1 Run, anything
376 // else is an invalid state.
377 assert(!Runs.empty() && "There are no runs associated with the document!");
378 return *Runs.back().getAsObject();
379}
380
382 size_t Ret = CurrentRules.size();
383 CurrentRules.emplace_back(Rule);
384 return Ret;
385}
386
388 size_t RuleIdx = Result.RuleIdx;
389 assert(RuleIdx < CurrentRules.size() &&
390 "Trying to reference a rule that doesn't exist");
391 const SarifRule &Rule = CurrentRules[RuleIdx];
392 assert(Rule.DefaultConfiguration.Enabled &&
393 "Cannot add a result referencing a disabled Rule");
394 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
395 {"ruleIndex", static_cast<int64_t>(RuleIdx)},
396 {"ruleId", Rule.Id}};
397 if (!Result.Locations.empty()) {
398 json::Array Locs;
399 for (auto &Range : Result.Locations) {
400 Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
401 }
402 Ret["locations"] = std::move(Locs);
403 }
404 if (!Result.ThreadFlows.empty())
405 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
406
407 Ret["level"] = resultLevelToStr(
408 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
409
410 json::Object &Run = getCurrentRun();
411 json::Array *Results = Run.getArray("results");
412 Results->emplace_back(std::move(Ret));
413}
414
416 // Flush all temporaries to their destinations if needed.
417 endRun();
418
419 json::Object Doc{
420 {"$schema", SchemaURI},
421 {"version", SchemaVersion},
422 };
423 if (!Runs.empty())
424 Doc["runs"] = json::Array(Runs);
425 return Doc;
426}
#define V(N, I)
Definition: ASTContext.h:3233
#define SM(sm)
Definition: Cuda.cpp:80
StringRef Text
Definition: Format.cpp:2937
StringRef Filename
Definition: Format.cpp:2936
unsigned Iter
Definition: HTMLLogger.cpp:151
static StringRef importanceToStr(ThreadFlowImportance I)
Definition: Sarif.cpp:176
static StringRef getFileName(FileEntryRef FE)
Definition: Sarif.cpp:39
json::Object createMessage(StringRef Text)
Definition: Sarif.cpp:146
static unsigned int adjustColumnPos(FullSourceLoc Loc, unsigned int TokenLen=0)
Calculate the column position expressed in the number of UTF-8 code points from column start to the s...
Definition: Sarif.cpp:119
static std::string percentEncodeURICharacter(char C)
Definition: Sarif.cpp:55
static json::Object createThreadFlowLocation(json::Object &&Location, const ThreadFlowImportance &Importance)
Definition: Sarif.cpp:204
static json::Object createLocation(json::Object &&PhysicalLocation, StringRef Message="")
Definition: Sarif.cpp:168
static json::Object createTextRegion(const SourceManager &SM, const CharSourceRange &R)
Definition: Sarif.cpp:152
static std::string fileNameToURI(StringRef Filename)
Definition: Sarif.cpp:73
static StringRef resultLevelToStr(SarifResultLevel R)
Definition: Sarif.cpp:188
Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Represents a character-granular source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
const FileEntry & getFileEntry() const
Definition: FileEntry.h:70
off_t getSize() const
Definition: FileEntry.h:447
StringRef getName() const
The name of this FileEntry.
Definition: FileEntry.h:61
StringRef tryGetRealPathName() const
Definition: FileEntry.h:431
A SourceLocation and its associated SourceManager.
std::pair< FileID, unsigned > getDecomposedExpansionLoc() const
Decompose the underlying SourceLocation into a raw (FileID + Offset) pair, after walking through all ...
unsigned getExpansionColumnNumber(bool *Invalid=nullptr) const
const SourceManager & getManager() const
void createRun(const llvm::StringRef ShortToolName, const llvm::StringRef LongToolName, const llvm::StringRef ToolVersion=CLANG_VERSION_STRING)
Create a new run with which any upcoming analysis will be associated.
Definition: Sarif.cpp:345
size_t createRule(const SarifRule &Rule)
Associate the given rule with the current run.
Definition: Sarif.cpp:381
llvm::json::Object createDocument()
Return the SARIF document in its current state.
Definition: Sarif.cpp:415
void endRun()
If there is a current run, end it.
Definition: Sarif.cpp:263
void appendResult(const SarifResult &SarifResult)
Append a new result to the currently in-flight run.
Definition: Sarif.cpp:387
A SARIF result (also called a "reporting item") is a unit of output produced when one of the tool's r...
Definition: Sarif.h:315
A SARIF rule (reportingDescriptor object) contains information that describes a reporting item genera...
Definition: Sarif.h:257
This class handles loading and caching of source files into memory.
std::optional< llvm::MemoryBufferRef > getBufferOrNone(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
A thread flow is a sequence of code locations that specify a possible path through a single thread of...
Definition: Sarif.h:175
SarifArtifactLocation setIndex(uint32_t Idx)
Definition: Sarif.h:86
Since every clang artifact MUST have a location (there being no nested artifacts),...
Definition: Sarif.h:104
SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType)
Definition: Sarif.h:138
SarifArtifact setRoles(std::initializer_list< llvm::StringRef > ArtifactRoles)
Definition: Sarif.h:133
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:207
SarifResultLevel
The level of severity associated with a SarifResult.
Definition: Sarif.h:165
ThreadFlowImportance
Definition: Sarif.h:146
@ C
Languages that the frontend can parse and compile.
@ Result
The result type of a method or function.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
YAML serialization mapping.
Definition: Dominators.h:30