clang-tools 19.0.0git
SourceCode.h
Go to the documentation of this file.
1//===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Various code that examines C++ source code without using heavy AST machinery
10// (and often not even the lexer). To be used sparingly!
11//
12//===----------------------------------------------------------------------===//
13#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
14#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
15
16#include "Protocol.h"
17#include "support/Context.h"
19#include "clang/Basic/CharInfo.h"
20#include "clang/Basic/Diagnostic.h"
21#include "clang/Basic/LangOptions.h"
22#include "clang/Basic/SourceLocation.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Format/Format.h"
25#include "clang/Lex/HeaderSearch.h"
26#include "clang/Tooling/Core/Replacement.h"
27#include "clang/Tooling/Syntax/Tokens.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSet.h"
30#include "llvm/Support/Error.h"
31#include <optional>
32#include <string>
33
34namespace clang {
35class SourceManager;
36
37namespace clangd {
38
39// We tend to generate digests for source codes in a lot of different places.
40// This represents the type for those digests to prevent us hard coding details
41// of hashing function at every place that needs to store this information.
42using FileDigest = std::array<uint8_t, 8>;
43FileDigest digest(StringRef Content);
44std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
45
46// This context variable controls the behavior of functions in this file
47// that convert between LSP offsets and native clang byte offsets.
48// If not set, defaults to UTF-16 for backwards-compatibility.
50
51// Counts the number of UTF-16 code units needed to represent a string (LSP
52// specifies string lengths in UTF-16 code units).
53// Use of UTF-16 may be overridden by kCurrentOffsetEncoding.
54size_t lspLength(StringRef Code);
55
56/// Turn a [line, column] pair into an offset in Code.
57///
58/// If P.character exceeds the line length, returns the offset at end-of-line.
59/// (If !AllowColumnsBeyondLineLength, then returns an error instead).
60/// If the line number is out of range, returns an error.
61///
62/// The returned value is in the range [0, Code.size()].
63llvm::Expected<size_t>
64positionToOffset(llvm::StringRef Code, Position P,
65 bool AllowColumnsBeyondLineLength = true);
66
67/// Turn an offset in Code into a [line, column] pair.
68/// The offset must be in range [0, Code.size()].
69Position offsetToPosition(llvm::StringRef Code, size_t Offset);
70
71/// Turn a SourceLocation into a [line, column] pair.
72/// FIXME: This should return an error if the location is invalid.
73Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
74
75/// Return the file location, corresponding to \p P. Note that one should take
76/// care to avoid comparing the result with expansion locations.
77llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
78 Position P);
79
80/// Returns true iff \p Loc is inside the main file. This function handles
81/// file & macro locations. For macro locations, returns iff the macro is being
82/// expanded inside the main file.
83///
84/// The function is usually used to check whether a declaration is inside the
85/// the main file.
86bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM);
87
88/// Returns the #include location through which IncludedFIle was loaded.
89/// Where SM.getIncludeLoc() returns the location of the *filename*, which may
90/// be in a macro, includeHashLoc() returns the location of the #.
91SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM);
92
93/// Returns true if the token at Loc is spelled in the source code.
94/// This is not the case for:
95/// * symbols formed via macro concatenation, the spelling location will
96/// be "<scratch space>"
97/// * symbols controlled and defined by a compile command-line option
98/// `-DName=foo`, the spelling location will be "<command line>".
99bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM);
100
101/// Turns a token range into a half-open range and checks its correctness.
102/// The resulting range will have only valid source location on both sides, both
103/// of which are file locations.
104///
105/// File locations always point to a particular offset in a file, i.e. they
106/// never refer to a location inside a macro expansion. Turning locations from
107/// macro expansions into file locations is ambiguous - one can use
108/// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function
109/// calls SourceManager::getFileLoc on both ends of \p R to do the conversion.
110///
111/// User input (e.g. cursor position) is expressed as a file location, so this
112/// function can be viewed as a way to normalize the ranges used in the clang
113/// AST so that they are comparable with ranges coming from the user input.
114std::optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
115 const LangOptions &LangOpts,
116 SourceRange R);
117
118/// Returns true iff all of the following conditions hold:
119/// - start and end locations are valid,
120/// - start and end locations are file locations from the same file
121/// (i.e. expansion locations are not taken into account).
122/// - start offset <= end offset.
123/// FIXME: introduce a type for source range with this invariant.
124bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
125
126/// Returns the source code covered by the source range.
127/// EXPECTS: isValidFileRange(R) == true.
128llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
129
130// Converts a half-open clang source range to an LSP range.
131// Note that clang also uses closed source ranges, which this can't handle!
132Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
133
134// Expand range `A` to also contain `B`.
135void unionRanges(Range &A, Range B);
136
137// Converts an offset to a clang line/column (1-based, columns are bytes).
138// The offset must be in range [0, Code.size()].
139// Prefer to use SourceManager if one is available.
140std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
141 size_t Offset);
142
143/// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
144/// qualifier.
145std::pair<llvm::StringRef, llvm::StringRef>
146splitQualifiedName(llvm::StringRef QName);
147
148TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R);
149
150std::vector<TextEdit> replacementsToEdits(StringRef Code,
151 const tooling::Replacements &Repls);
152
153TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
154 const LangOptions &L);
155
156/// Get the canonical path of \p F. This means:
157///
158/// - Absolute path
159/// - Symlinks resolved
160/// - No "." or ".." component
161/// - No duplicate or trailing directory separator
162///
163/// This function should be used when paths needs to be used outside the
164/// component that generate it, so that paths are normalized as much as
165/// possible.
166std::optional<std::string> getCanonicalPath(const FileEntryRef F,
167 FileManager &FileMgr);
168
169/// Choose the clang-format style we should apply to a certain file.
170/// This will usually use FS to look for .clang-format directories.
171/// FIXME: should we be caching the .clang-format file search?
172/// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
173/// though the latter may have been overridden in main()!
174format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
175 llvm::StringRef Content,
176 const ThreadsafeFS &TFS);
177
178/// Cleanup and format the given replacements.
179llvm::Expected<tooling::Replacements>
180cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
181 const format::FormatStyle &Style);
182
183/// A set of edits generated for a single file. Can verify whether it is safe to
184/// apply these edits to a code block.
185struct Edit {
186 tooling::Replacements Replacements;
187 std::string InitialCode;
188
189 Edit() = default;
190
191 Edit(llvm::StringRef Code, tooling::Replacements Reps)
192 : Replacements(std::move(Reps)), InitialCode(Code) {}
193
194 /// Returns the file contents after changes are applied.
195 llvm::Expected<std::string> apply() const;
196
197 /// Represents Replacements as TextEdits that are available for use in LSP.
198 std::vector<TextEdit> asTextEdits() const;
199
200 /// Checks whether the Replacements are applicable to given Code.
201 bool canApplyTo(llvm::StringRef Code) const;
202};
203/// A mapping from absolute file path (the one used for accessing the underlying
204/// VFS) to edits.
205using FileEdits = llvm::StringMap<Edit>;
206
207/// Formats the edits and code around it according to Style. Changes
208/// Replacements to formatted ones if succeeds.
209llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style);
210
211/// Apply an incremental update to a text document.
212llvm::Error applyChange(std::string &Contents,
213 const TextDocumentContentChangeEvent &Change);
214
215/// Collects identifiers with counts in the source code.
216llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
217 const format::FormatStyle &Style);
218
219/// Collects all ranges of the given identifier in the source code.
220std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
221 llvm::StringRef Content,
222 const LangOptions &LangOpts);
223
224/// Collects words from the source code.
225/// Unlike collectIdentifiers:
226/// - also finds text in comments:
227/// - splits text into words
228/// - drops stopwords like "get" and "for"
229llvm::StringSet<> collectWords(llvm::StringRef Content);
230
231// Something that looks like a word in the source code.
232// Could be a "real" token that's "live" in the AST, a spelled token consumed by
233// the preprocessor, or part of a spelled token (e.g. word in a comment).
235 // (Spelling) location of the start of the word.
236 SourceLocation Location;
237 // The range of the word itself, excluding any quotes.
238 // This is a subrange of the file buffer.
239 llvm::StringRef Text;
240 // Whether this word is likely to refer to an identifier. True if:
241 // - the word is a spelled identifier token
242 // - Text is identifier-like (e.g. "foo_bar")
243 // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`")
244 bool LikelyIdentifier = false;
245 // Set if the word is contained in a token spelled in the file.
246 // (This should always be true, but comments aren't retained by TokenBuffer).
247 const syntax::Token *PartOfSpelledToken = nullptr;
248 // Set if the word is exactly a token spelled in the file.
249 const syntax::Token *SpelledToken = nullptr;
250 // Set if the word is a token spelled in the file, and that token survives
251 // preprocessing to emit an expanded token spelled the same way.
252 const syntax::Token *ExpandedToken = nullptr;
253
254 // Find the unique word that contains SpelledLoc or starts/ends there.
255 static std::optional<SpelledWord> touching(SourceLocation SpelledLoc,
256 const syntax::TokenBuffer &TB,
257 const LangOptions &LangOpts);
258};
259
260/// Return true if the \p TokenName is in the list of reversed keywords of the
261/// language.
262bool isKeyword(llvm::StringRef TokenName, const LangOptions &LangOpts);
263
264/// Heuristically determine namespaces visible at a point, without parsing Code.
265/// This considers using-directives and enclosing namespace-declarations that
266/// are visible (and not obfuscated) in the file itself (not headers).
267/// Code should be truncated at the point of interest.
268///
269/// The returned vector is always non-empty.
270/// - The first element is the namespace that encloses the point: a declaration
271/// near the point would be within this namespace.
272/// - The elements are the namespaces in scope at the point: an unqualified
273/// lookup would search within these namespaces.
274///
275/// Using directives are resolved against all enclosing scopes, but no other
276/// namespace directives.
277///
278/// example:
279/// using namespace a;
280/// namespace foo {
281/// using namespace b;
282///
283/// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::".
284std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
285 const LangOptions &LangOpts);
286
287/// Represents locations that can accept a definition.
289 /// Namespace that owns all of the EligiblePoints, e.g.
290 /// namespace a{ namespace b {^ void foo();^} }
291 /// It will be “a::b” for both carrot locations.
293 /// Offsets into the code marking eligible points to insert a function
294 /// definition.
295 std::vector<Position> EligiblePoints;
296};
297
298/// Returns most eligible region to insert a definition for \p
299/// FullyQualifiedName in the \p Code.
300/// Pseudo parses \pCode under the hood to determine namespace decls and
301/// possible insertion points. Choses the region that matches the longest prefix
302/// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces.
303/// \p FullyQualifiedName should not contain anonymous namespaces.
305 llvm::StringRef FullyQualifiedName,
306 const LangOptions &LangOpts);
307
309 llvm::StringRef Name;
310 const MacroInfo *Info;
311 /// Location of the identifier that names the macro.
312 /// Unlike Info->Location, this translates preamble-patch locations to
313 /// main-file locations.
314 SourceLocation NameLoc;
315};
316/// Gets the macro referenced by \p SpelledTok. It must be a spelled token
317/// aligned to the beginning of an identifier.
318std::optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
319 Preprocessor &PP);
320
321/// Infers whether this is a header from the FileName and LangOpts (if
322/// presents).
323bool isHeaderFile(llvm::StringRef FileName,
324 std::optional<LangOptions> LangOpts = std::nullopt);
325
326/// Returns true if the given location is in a generated protobuf file.
327bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
328
329/// Returns true if Name is reserved, like _Foo or __Vector_base.
330inline bool isReservedName(llvm::StringRef Name) {
331 // This doesn't catch all cases, but the most common.
332 return Name.size() >= 2 && Name[0] == '_' &&
333 (isUppercase(Name[1]) || Name[1] == '_');
334}
335
336/// Translates locations inside preamble patch to their main-file equivalent
337/// using presumed locations. Returns \p Loc if it isn't inside preamble patch.
338SourceLocation translatePreamblePatchLocation(SourceLocation Loc,
339 const SourceManager &SM);
340
341/// Returns the range starting at offset and spanning the whole line. Escaped
342/// newlines are not handled.
343clangd::Range rangeTillEOL(llvm::StringRef Code, unsigned HashOffset);
344} // namespace clangd
345} // namespace clang
346#endif
const Expr * E
llvm::SmallString< 256U > Name
Replacements Replaces
Definition: ClangTidy.cpp:306
size_t Offset
std::string Code
StringRef FileName
SourceLocation Loc
size_t HashOffset
const google::protobuf::Message & M
Definition: Server.cpp:309
std::optional< FixItHint > FixIt
syntax::Token SpelledTok
Definition: XRefs.cpp:872
Values in a Context are indexed by typed keys.
Definition: Context.h:40
Wrapper for vfs::FileSystem for use in multithreaded programs like clangd.
Definition: ThreadsafeFS.h:26
bool isValidFileRange(const SourceManager &Mgr, SourceRange R)
Returns true iff all of the following conditions hold:
Definition: SourceCode.cpp:249
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:497
std::optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:430
Range halfOpenToRange(const SourceManager &SM, CharSourceRange R)
Definition: SourceCode.cpp:472
SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM)
Returns the #include location through which IncludedFIle was loaded.
Definition: SourceCode.cpp:264
llvm::Error applyChange(std::string &Contents, const TextDocumentContentChangeEvent &Change)
Apply an incremental update to a text document.
TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, const LangOptions &L)
Definition: SourceCode.cpp:556
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
Definition: SourceCode.cpp:202
size_t lspLength(llvm::StringRef Code)
Definition: SourceCode.cpp:149
bool isReservedName(llvm::StringRef Name)
Returns true if Name is reserved, like _Foo or __Vector_base.
Definition: SourceCode.h:330
format::FormatStyle getFormatStyleForFile(llvm::StringRef File, llvm::StringRef Content, const ThreadsafeFS &TFS)
Choose the clang-format style we should apply to a certain file.
Definition: SourceCode.cpp:583
Key< OffsetEncoding > kCurrentOffsetEncoding
Definition: SourceCode.cpp:142
FileDigest digest(llvm::StringRef Content)
Definition: SourceCode.cpp:565
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:423
llvm::StringMap< Edit > FileEdits
A mapping from absolute file path (the one used for accessing the underlying VFS) to edits.
Definition: SourceCode.h:205
llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style)
Formats the edits and code around it according to Style.
void unionRanges(Range &A, Range B)
Definition: SourceCode.cpp:480
std::array< uint8_t, 8 > FileDigest
Definition: SourceCode.h:42
SourceLocation translatePreamblePatchLocation(SourceLocation Loc, const SourceManager &SM)
Translates locations inside preamble patch to their main-file equivalent using presumed locations.
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:214
llvm::StringMap< unsigned > collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style)
Collects identifiers with counts in the source code.
Definition: SourceCode.cpp:618
std::optional< DefinedMacro > locateMacroAt(const syntax::Token &SpelledTok, Preprocessor &PP)
Gets the macro referenced by SpelledTok.
Definition: SourceCode.cpp:985
std::optional< FileDigest > digestFile(const SourceManager &SM, FileID FID)
Definition: SourceCode.cpp:575
std::vector< std::string > visibleNamespaces(llvm::StringRef Code, const LangOptions &LangOpts)
Heuristically determine namespaces visible at a point, without parsing Code.
Definition: SourceCode.cpp:809
std::optional< std::string > getCanonicalPath(const FileEntryRef F, FileManager &FileMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:520
EligibleRegion getEligiblePoints(llvm::StringRef Code, llvm::StringRef FullyQualifiedName, const LangOptions &LangOpts)
Returns most eligible region to insert a definition for FullyQualifiedName in the Code.
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength)
Turn a [line, column] pair into an offset in Code.
Definition: SourceCode.cpp:173
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
Definition: SourceCode.cpp:452
std::vector< Range > collectIdentifierRanges(llvm::StringRef Identifier, llvm::StringRef Content, const LangOptions &LangOpts)
Collects all ranges of the given identifier in the source code.
Definition: SourceCode.cpp:632
llvm::StringSet collectWords(llvm::StringRef Content)
Collects words from the source code.
Definition: SourceCode.cpp:857
llvm::Expected< SourceLocation > sourceLocationInMainFile(const SourceManager &SM, Position P)
Return the file location, corresponding to P.
Definition: SourceCode.cpp:462
bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts)
Return true if the TokenName is in the list of reversed keywords of the language.
Definition: SourceCode.cpp:645
llvm::Expected< tooling::Replacements > cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, const format::FormatStyle &Style)
Cleanup and format the given replacements.
Definition: SourceCode.cpp:598
std::pair< size_t, size_t > offsetToClangLineColumn(llvm::StringRef Code, size_t Offset)
Definition: SourceCode.cpp:487
bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM)
Returns true if the token at Loc is spelled in the source code.
Definition: SourceCode.cpp:231
clangd::Range rangeTillEOL(llvm::StringRef Code, unsigned HashOffset)
Returns the range starting at offset and spanning the whole line.
bool isProtoFile(SourceLocation Loc, const SourceManager &SM)
Returns true if the given location is in a generated protobuf file.
bool isHeaderFile(llvm::StringRef FileName, std::optional< LangOptions > LangOpts)
Infers whether this is a header from the FileName and LangOpts (if presents).
std::vector< TextEdit > replacementsToEdits(llvm::StringRef Code, const tooling::Replacements &Repls)
Definition: SourceCode.cpp:512
TextEdit replacementToEdit(llvm::StringRef Code, const tooling::Replacement &R)
Definition: SourceCode.cpp:504
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
SourceLocation NameLoc
Location of the identifier that names the macro.
Definition: SourceCode.h:314
const MacroInfo * Info
Definition: SourceCode.h:310
A set of edits generated for a single file.
Definition: SourceCode.h:185
tooling::Replacements Replacements
Definition: SourceCode.h:186
Edit(llvm::StringRef Code, tooling::Replacements Reps)
Definition: SourceCode.h:191
std::string InitialCode
Definition: SourceCode.h:187
std::vector< TextEdit > asTextEdits() const
Represents Replacements as TextEdits that are available for use in LSP.
llvm::Expected< std::string > apply() const
Returns the file contents after changes are applied.
bool canApplyTo(llvm::StringRef Code) const
Checks whether the Replacements are applicable to given Code.
Represents locations that can accept a definition.
Definition: SourceCode.h:288
std::vector< Position > EligiblePoints
Offsets into the code marking eligible points to insert a function definition.
Definition: SourceCode.h:295
std::string EnclosingNamespace
Namespace that owns all of the EligiblePoints, e.g.
Definition: SourceCode.h:292
llvm::StringRef Text
Definition: SourceCode.h:239
static std::optional< SpelledWord > touching(SourceLocation SpelledLoc, const syntax::TokenBuffer &TB, const LangOptions &LangOpts)
Definition: SourceCode.cpp:935
const syntax::Token * ExpandedToken
Definition: SourceCode.h:252
const syntax::Token * PartOfSpelledToken
Definition: SourceCode.h:247
const syntax::Token * SpelledToken
Definition: SourceCode.h:249
SourceLocation Location
Definition: SourceCode.h:236