clang-tools 20.0.0git
SerializationTests.cpp
Go to the documentation of this file.
1//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Headers.h"
10#include "RIFF.h"
11#include "index/Serialization.h"
12#include "support/Logger.h"
13#include "clang/Tooling/CompilationDatabase.h"
14#include "llvm/ADT/StringExtras.h"
15#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
16#include "llvm/Support/Compression.h"
17#include "llvm/Support/Error.h"
18#include "llvm/Support/ScopedPrinter.h"
19#include "gmock/gmock.h"
20#include "gtest/gtest.h"
21#ifdef LLVM_ON_UNIX
22#include <sys/resource.h>
23#endif
24
25using ::testing::ElementsAre;
26using ::testing::Pair;
27using ::testing::UnorderedElementsAre;
28using ::testing::UnorderedElementsAreArray;
29
30namespace clang {
31namespace clangd {
32namespace {
33
34const char *YAML = R"(
35---
36!Symbol
37ID: 057557CEBF6E6B2D
38Name: 'Foo1'
39Scope: 'clang::'
40SymInfo:
41 Kind: Function
42 Lang: Cpp
43CanonicalDeclaration:
44 FileURI: file:///path/foo.h
45 Start:
46 Line: 1
47 Column: 0
48 End:
49 Line: 1
50 Column: 1
51Flags: 129
52Documentation: 'Foo doc'
53ReturnType: 'int'
54IncludeHeaders:
55 - Header: 'include1'
56 References: 7
57 Directives: [ Include ]
58 - Header: 'include2'
59 References: 3
60 Directives: [ Import ]
61 - Header: 'include3'
62 References: 2
63 Directives: [ Include, Import ]
64 - Header: 'include4'
65 References: 1
66 Directives: [ ]
67...
68---
69!Symbol
70ID: 057557CEBF6E6B2E
71Name: 'Foo2'
72Scope: 'clang::'
73SymInfo:
74 Kind: Function
75 Lang: Cpp
76CanonicalDeclaration:
77 FileURI: file:///path/bar.h
78 Start:
79 Line: 1
80 Column: 0
81 End:
82 Line: 1
83 Column: 1
84Flags: 2
85Signature: '-sig'
86CompletionSnippetSuffix: '-snippet'
87...
88!Refs
89ID: 057557CEBF6E6B2D
90References:
91 - Kind: 4
92 Location:
93 FileURI: file:///path/foo.cc
94 Start:
95 Line: 5
96 Column: 3
97 End:
98 Line: 5
99 Column: 8
100...
101--- !Relations
102Subject:
103 ID: 6481EE7AF2841756
104Predicate: 0
105Object:
106 ID: 6512AEC512EA3A2D
107...
108--- !Cmd
109Directory: 'testdir'
110CommandLine:
111 - 'cmd1'
112 - 'cmd2'
113...
114--- !Source
115URI: 'file:///path/source1.cpp'
116Flags: 1
117Digest: EED8F5EAF25C453C
118DirectIncludes:
119 - 'file:///path/inc1.h'
120 - 'file:///path/inc2.h'
121...
122)";
123
124MATCHER_P(id, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
125MATCHER_P(qName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
126MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References,
127 SupportedDirectives, "") {
128 return (arg.IncludeHeader == IncludeHeader) &&
129 (arg.References == References) &&
130 (arg.SupportedDirectives == SupportedDirectives);
131}
132
133auto readIndexFile(llvm::StringRef Text) {
135}
136
137TEST(SerializationTest, NoCrashOnEmptyYAML) {
138 EXPECT_TRUE(bool(readIndexFile("")));
139}
140
141TEST(SerializationTest, YAMLConversions) {
142 auto ParsedYAML = readIndexFile(YAML);
143 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
144 ASSERT_TRUE(bool(ParsedYAML->Symbols));
145 EXPECT_THAT(
146 *ParsedYAML->Symbols,
147 UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E")));
148
149 auto Sym1 = *ParsedYAML->Symbols->find(
150 cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
151 auto Sym2 = *ParsedYAML->Symbols->find(
152 cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
153
154 EXPECT_THAT(Sym1, qName("clang::Foo1"));
155 EXPECT_EQ(Sym1.Signature, "");
156 EXPECT_EQ(Sym1.Documentation, "Foo doc");
157 EXPECT_EQ(Sym1.ReturnType, "int");
158 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
159 EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static);
160 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
161 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
162 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
163 EXPECT_THAT(
164 Sym1.IncludeHeaders,
165 UnorderedElementsAre(
166 IncludeHeaderWithRefAndDirectives("include1", 7u, Symbol::Include),
167 IncludeHeaderWithRefAndDirectives("include2", 3u, Symbol::Import),
168 IncludeHeaderWithRefAndDirectives("include3", 2u,
170 IncludeHeaderWithRefAndDirectives("include4", 1u, Symbol::Invalid)));
171
172 EXPECT_THAT(Sym2, qName("clang::Foo2"));
173 EXPECT_EQ(Sym2.Signature, "-sig");
174 EXPECT_EQ(Sym2.ReturnType, "");
175 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
176 "file:///path/bar.h");
177 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
178 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
179
180 ASSERT_TRUE(bool(ParsedYAML->Refs));
181 EXPECT_THAT(
182 *ParsedYAML->Refs,
183 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
184 ::testing::SizeIs(1))));
185 auto Ref1 = ParsedYAML->Refs->begin()->second.front();
186 EXPECT_EQ(Ref1.Kind, RefKind::Reference);
187 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
188
189 SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
190 SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
191 ASSERT_TRUE(bool(ParsedYAML->Relations));
192 EXPECT_THAT(
193 *ParsedYAML->Relations,
194 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
195
196 ASSERT_TRUE(bool(ParsedYAML->Cmd));
197 auto &Cmd = *ParsedYAML->Cmd;
198 ASSERT_EQ(Cmd.Directory, "testdir");
199 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
200
201 ASSERT_TRUE(bool(ParsedYAML->Sources));
202 const auto *URI = "file:///path/source1.cpp";
203 ASSERT_TRUE(ParsedYAML->Sources->count(URI));
204 auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
205 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
206 EXPECT_THAT(IGNDeserialized.DirectIncludes,
207 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
208 EXPECT_EQ(IGNDeserialized.URI, URI);
209 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
210}
211
212std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) {
213 std::vector<std::string> Result;
214 for (const auto &Sym : Slab)
215 Result.push_back(toYAML(Sym));
216 return Result;
217}
218std::vector<std::string> yamlFromRefs(const RefSlab &Slab) {
219 std::vector<std::string> Result;
220 for (const auto &Refs : Slab)
221 Result.push_back(toYAML(Refs));
222 return Result;
223}
224
225std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) {
226 std::vector<std::string> Result;
227 for (const auto &Rel : Slab)
228 Result.push_back(toYAML(Rel));
229 return Result;
230}
231
232TEST(SerializationTest, BinaryConversions) {
233 auto In = readIndexFile(YAML);
234 EXPECT_TRUE(bool(In)) << In.takeError();
235
236 // Write to binary format, and parse again.
237 IndexFileOut Out(*In);
238 Out.Format = IndexFileFormat::RIFF;
239 std::string Serialized = llvm::to_string(Out);
240
241 auto In2 = readIndexFile(Serialized);
242 ASSERT_TRUE(bool(In2)) << In2.takeError();
243 ASSERT_TRUE(In2->Symbols);
244 ASSERT_TRUE(In2->Refs);
245 ASSERT_TRUE(In2->Relations);
246
247 // Assert the YAML serializations match, for nice comparisons and diffs.
248 EXPECT_THAT(yamlFromSymbols(*In2->Symbols),
249 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
250 EXPECT_THAT(yamlFromRefs(*In2->Refs),
251 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
252 EXPECT_THAT(yamlFromRelations(*In2->Relations),
253 UnorderedElementsAreArray(yamlFromRelations(*In->Relations)));
254}
255
256TEST(SerializationTest, SrcsTest) {
257 auto In = readIndexFile(YAML);
258 EXPECT_TRUE(bool(In)) << In.takeError();
259
260 std::string TestContent("TestContent");
261 IncludeGraphNode IGN;
262 IGN.Digest = digest(TestContent);
263 IGN.DirectIncludes = {"inc1", "inc2"};
264 IGN.URI = "URI";
267 IncludeGraph Sources;
268 Sources[IGN.URI] = IGN;
269 // Write to binary format, and parse again.
270 IndexFileOut Out(*In);
271 Out.Format = IndexFileFormat::RIFF;
272 Out.Sources = &Sources;
273 {
274 std::string Serialized = llvm::to_string(Out);
275
276 auto In = readIndexFile(Serialized);
277 ASSERT_TRUE(bool(In)) << In.takeError();
278 ASSERT_TRUE(In->Symbols);
279 ASSERT_TRUE(In->Refs);
280 ASSERT_TRUE(In->Sources);
281 ASSERT_TRUE(In->Sources->count(IGN.URI));
282 // Assert the YAML serializations match, for nice comparisons and diffs.
283 EXPECT_THAT(yamlFromSymbols(*In->Symbols),
284 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
285 EXPECT_THAT(yamlFromRefs(*In->Refs),
286 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
287 auto IGNDeserialized = In->Sources->lookup(IGN.URI);
288 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
289 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
290 EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
291 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
292 }
293}
294
295TEST(SerializationTest, CmdlTest) {
296 auto In = readIndexFile(YAML);
297 EXPECT_TRUE(bool(In)) << In.takeError();
298
299 tooling::CompileCommand Cmd;
300 Cmd.Directory = "testdir";
301 Cmd.CommandLine.push_back("cmd1");
302 Cmd.CommandLine.push_back("cmd2");
303 Cmd.Filename = "ignored";
304 Cmd.Heuristic = "ignored";
305 Cmd.Output = "ignored";
306
307 IndexFileOut Out(*In);
308 Out.Format = IndexFileFormat::RIFF;
309 Out.Cmd = &Cmd;
310 {
311 std::string Serialized = llvm::to_string(Out);
312
313 auto In = readIndexFile(Serialized);
314 ASSERT_TRUE(bool(In)) << In.takeError();
315 ASSERT_TRUE(In->Cmd);
316
317 const tooling::CompileCommand &SerializedCmd = *In->Cmd;
318 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
319 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
320 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
321 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
322 EXPECT_NE(SerializedCmd.Output, Cmd.Output);
323 }
324}
325
326// rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD.
327// Sanitizers use a lot of address space, so we can't apply strict limits.
328#if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \
329 !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD
330class ScopedMemoryLimit {
331 struct rlimit OriginalLimit;
332 bool Succeeded = false;
333
334public:
335 ScopedMemoryLimit(rlim_t Bytes) {
336 if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
337 struct rlimit NewLimit = OriginalLimit;
338 NewLimit.rlim_cur = Bytes;
339 Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
340 }
341 if (!Succeeded)
342 log("Failed to set rlimit");
343 }
344
345 ~ScopedMemoryLimit() {
346 if (Succeeded)
347 setrlimit(RLIMIT_AS, &OriginalLimit);
348 }
349};
350#else
351class ScopedMemoryLimit {
352public:
353 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
354};
355#endif
356
357// Test that our deserialization detects invalid array sizes without allocating.
358// If this detection fails, the test should allocate a huge array and crash.
359TEST(SerializationTest, NoCrashOnBadArraySize) {
360 // This test is tricky because we need to construct a subtly invalid file.
361 // First, create a valid serialized file.
362 auto In = readIndexFile(YAML);
363 ASSERT_FALSE(!In) << In.takeError();
364 IndexFileOut Out(*In);
365 Out.Format = IndexFileFormat::RIFF;
366 std::string Serialized = llvm::to_string(Out);
367
368 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
369 auto Parsed = riff::readFile(Serialized);
370 ASSERT_FALSE(!Parsed) << Parsed.takeError();
371 auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
372 return C.ID == riff::fourCC("srcs");
373 });
374 ASSERT_NE(Srcs, Parsed->Chunks.end());
375
376 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
377 // The node has:
378 // - 1 byte: flags (1)
379 // - varint(stringID): URI
380 // - 8 byte: file digest
381 // - varint: DirectIncludes.length
382 // - repeated varint(stringID): DirectIncludes
383 // We want to set DirectIncludes.length to a huge number.
384 // The offset isn't trivial to find, so we use the file digest.
385 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
386 unsigned Pos = Srcs->Data.find_first_of(FileDigest);
387 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
388 Pos += FileDigest.size();
389
390 // Varints are little-endian base-128 numbers, where the top-bit of each byte
391 // indicates whether there are more. ffffffff0f -> 0xffffffff.
392 std::string CorruptSrcs =
393 (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
394 "some_random_garbage")
395 .str();
396 Srcs->Data = CorruptSrcs;
397
398 // Try to crash rather than hang on large allocation.
399 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
400
401 std::string CorruptFile = llvm::to_string(*Parsed);
402 auto CorruptParsed = readIndexFile(CorruptFile);
403 ASSERT_TRUE(!CorruptParsed);
404 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
405 "malformed or truncated include uri");
406}
407
408// Check we detect invalid string table size size without allocating it first.
409// If this detection fails, the test should allocate a huge array and crash.
410TEST(SerializationTest, NoCrashOnBadStringTableSize) {
411 if (!llvm::compression::zlib::isAvailable()) {
412 log("skipping test, no zlib");
413 return;
414 }
415
416 // First, create a valid serialized file.
417 auto In = readIndexFile(YAML);
418 ASSERT_FALSE(!In) << In.takeError();
419 IndexFileOut Out(*In);
420 Out.Format = IndexFileFormat::RIFF;
421 std::string Serialized = llvm::to_string(Out);
422
423 // Low-level parse it again, we're going to replace the `stri` chunk.
424 auto Parsed = riff::readFile(Serialized);
425 ASSERT_FALSE(!Parsed) << Parsed.takeError();
426 auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
427 return C.ID == riff::fourCC("stri");
428 });
429 ASSERT_NE(Stri, Parsed->Chunks.end());
430
431 // stri consists of an 8 byte uncompressed-size, and then compressed data.
432 // We'll claim our small amount of data expands to 4GB
433 std::string CorruptStri =
434 (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
435 Stri->Data = CorruptStri;
436 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
437
438 // Try to crash rather than hang on large allocation.
439 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
440
441 std::string CorruptFile = llvm::to_string(*Parsed);
442 auto CorruptParsed = readIndexFile(CorruptFile);
443 ASSERT_TRUE(!CorruptParsed);
444 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
445 testing::HasSubstr("bytes is implausible"));
446}
447
448} // namespace
449} // namespace clangd
450} // namespace clang
Bracket::Index Pair
Definition: Bracket.cpp:80
llvm::SmallString< 256U > Name
CompiledFragmentImpl & Out
const Criteria C
size_t Pos
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:37
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:48
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
FileDigest digest(llvm::StringRef Content)
Definition: SourceCode.cpp:565
std::array< uint8_t, 8 > FileDigest
Definition: SourceCode.h:42
MATCHER_P(named, N, "")
TEST(BackgroundQueueTest, Priority)
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:101
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
std::string toYAML(const Symbol &)
std::array< uint8_t, 20 > SymbolID
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
@ IndexedForCodeCompletion
Whether or not this symbol is meant to be used for the code completion.
Definition: Symbol.h:141
@ Deprecated
Indicates if the symbol is deprecated.
Definition: Symbol.h:143
@ Include
#include "header.h"
Definition: Symbol.h:93
@ Import
#import "header.h"
Definition: Symbol.h:95