clang-tools  11.0.0git
SourceCode.h
Go to the documentation of this file.
1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Various code that examines C++ source code without using heavy AST machinery
10 // (and often not even the lexer). To be used sparingly!
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
14 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
15 
16 #include "Protocol.h"
17 #include "support/Context.h"
18 #include "support/ThreadsafeFS.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/SHA1.h"
30 #include <string>
31 
32 namespace clang {
33 class SourceManager;
34 
35 namespace clangd {
36 
37 // We tend to generate digests for source codes in a lot of different places.
38 // This represents the type for those digests to prevent us hard coding details
39 // of hashing function at every place that needs to store this information.
40 using FileDigest = std::array<uint8_t, 8>;
41 FileDigest digest(StringRef Content);
42 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
43 
44 // This context variable controls the behavior of functions in this file
45 // that convert between LSP offsets and native clang byte offsets.
46 // If not set, defaults to UTF-16 for backwards-compatibility.
48 
49 // Counts the number of UTF-16 code units needed to represent a string (LSP
50 // specifies string lengths in UTF-16 code units).
51 // Use of UTF-16 may be overridden by kCurrentOffsetEncoding.
52 size_t lspLength(StringRef Code);
53 
54 /// Turn a [line, column] pair into an offset in Code.
55 ///
56 /// If P.character exceeds the line length, returns the offset at end-of-line.
57 /// (If !AllowColumnsBeyondLineLength, then returns an error instead).
58 /// If the line number is out of range, returns an error.
59 ///
60 /// The returned value is in the range [0, Code.size()].
61 llvm::Expected<size_t>
62 positionToOffset(llvm::StringRef Code, Position P,
63  bool AllowColumnsBeyondLineLength = true);
64 
65 /// Turn an offset in Code into a [line, column] pair.
66 /// The offset must be in range [0, Code.size()].
67 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
68 
69 /// Turn a SourceLocation into a [line, column] pair.
70 /// FIXME: This should return an error if the location is invalid.
71 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
72 
73 /// Return the file location, corresponding to \p P. Note that one should take
74 /// care to avoid comparing the result with expansion locations.
75 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
76  Position P);
77 
78 /// Returns true iff \p Loc is inside the main file. This function handles
79 /// file & macro locations. For macro locations, returns iff the macro is being
80 /// expanded inside the main file.
81 ///
82 /// The function is usually used to check whether a declaration is inside the
83 /// the main file.
84 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM);
85 
86 /// Returns the #include location through which IncludedFIle was loaded.
87 /// Where SM.getIncludeLoc() returns the location of the *filename*, which may
88 /// be in a macro, includeHashLoc() returns the location of the #.
89 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM);
90 
91 /// Returns true if the token at Loc is spelled in the source code.
92 /// This is not the case for:
93 /// * symbols formed via macro concatenation, the spelling location will
94 /// be "<scratch space>"
95 /// * symbols controlled and defined by a compile command-line option
96 /// `-DName=foo`, the spelling location will be "<command line>".
97 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM);
98 
99 /// Turns a token range into a half-open range and checks its correctness.
100 /// The resulting range will have only valid source location on both sides, both
101 /// of which are file locations.
102 ///
103 /// File locations always point to a particular offset in a file, i.e. they
104 /// never refer to a location inside a macro expansion. Turning locations from
105 /// macro expansions into file locations is ambiguous - one can use
106 /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function
107 /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion.
108 ///
109 /// User input (e.g. cursor position) is expressed as a file location, so this
110 /// function can be viewed as a way to normalize the ranges used in the clang
111 /// AST so that they are comparable with ranges coming from the user input.
112 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
113  const LangOptions &LangOpts,
114  SourceRange R);
115 
116 /// Returns true iff all of the following conditions hold:
117 /// - start and end locations are valid,
118 /// - start and end locations are file locations from the same file
119 /// (i.e. expansion locations are not taken into account).
120 /// - start offset <= end offset.
121 /// FIXME: introduce a type for source range with this invariant.
122 bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
123 
124 /// Returns the source code covered by the source range.
125 /// EXPECTS: isValidFileRange(R) == true.
126 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
127 
128 // Converts a half-open clang source range to an LSP range.
129 // Note that clang also uses closed source ranges, which this can't handle!
130 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
131 
132 // Converts an offset to a clang line/column (1-based, columns are bytes).
133 // The offset must be in range [0, Code.size()].
134 // Prefer to use SourceManager if one is available.
135 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
136  size_t Offset);
137 
138 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
139 /// qualifier.
140 std::pair<llvm::StringRef, llvm::StringRef>
141 splitQualifiedName(llvm::StringRef QName);
142 
143 TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R);
144 
145 std::vector<TextEdit> replacementsToEdits(StringRef Code,
146  const tooling::Replacements &Repls);
147 
148 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
149  const LangOptions &L);
150 
151 /// Get the canonical path of \p F. This means:
152 ///
153 /// - Absolute path
154 /// - Symlinks resolved
155 /// - No "." or ".." component
156 /// - No duplicate or trailing directory separator
157 ///
158 /// This function should be used when paths needs to be used outside the
159 /// component that generate it, so that paths are normalized as much as
160 /// possible.
161 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
162  const SourceManager &SourceMgr);
163 
164 /// Choose the clang-format style we should apply to a certain file.
165 /// This will usually use FS to look for .clang-format directories.
166 /// FIXME: should we be caching the .clang-format file search?
167 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
168 /// though the latter may have been overridden in main()!
169 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
170  llvm::StringRef Content,
171  const ThreadsafeFS &TFS);
172 
173 /// Cleanup and format the given replacements.
174 llvm::Expected<tooling::Replacements>
175 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
176  const format::FormatStyle &Style);
177 
178 /// A set of edits generated for a single file. Can verify whether it is safe to
179 /// apply these edits to a code block.
180 struct Edit {
181  tooling::Replacements Replacements;
182  std::string InitialCode;
183 
184  Edit(llvm::StringRef Code, tooling::Replacements Reps)
185  : Replacements(std::move(Reps)), InitialCode(Code) {}
186 
187  /// Returns the file contents after changes are applied.
188  llvm::Expected<std::string> apply() const;
189 
190  /// Represents Replacements as TextEdits that are available for use in LSP.
191  std::vector<TextEdit> asTextEdits() const;
192 
193  /// Checks whether the Replacements are applicable to given Code.
194  bool canApplyTo(llvm::StringRef Code) const;
195 };
196 /// A mapping from absolute file path (the one used for accessing the underlying
197 /// VFS) to edits.
198 using FileEdits = llvm::StringMap<Edit>;
199 
200 /// Formats the edits and code around it according to Style. Changes
201 /// Replacements to formatted ones if succeeds.
203 
204 /// Collects identifiers with counts in the source code.
205 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
206  const format::FormatStyle &Style);
207 
208 /// Collects all ranges of the given identifier in the source code.
209 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
210  llvm::StringRef Content,
211  const LangOptions &LangOpts);
212 
213 /// Collects words from the source code.
214 /// Unlike collectIdentifiers:
215 /// - also finds text in comments:
216 /// - splits text into words
217 /// - drops stopwords like "get" and "for"
218 llvm::StringSet<> collectWords(llvm::StringRef Content);
219 
220 // Something that looks like a word in the source code.
221 // Could be a "real" token that's "live" in the AST, a spelled token consumed by
222 // the preprocessor, or part of a spelled token (e.g. word in a comment).
223 struct SpelledWord {
224  // (Spelling) location of the start of the word.
225  SourceLocation Location;
226  // The range of the word itself, excluding any quotes.
227  // This is a subrange of the file buffer.
228  llvm::StringRef Text;
229  // Whether this word is likely to refer to an identifier. True if:
230  // - the word is a spelled identifier token
231  // - Text is identifier-like (e.g. "foo_bar")
232  // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`")
233  bool LikelyIdentifier = false;
234  // Set if the word is contained in a token spelled in the file.
235  // (This should always be true, but comments aren't retained by TokenBuffer).
236  const syntax::Token *PartOfSpelledToken = nullptr;
237  // Set if the word is exactly a token spelled in the file.
238  const syntax::Token *SpelledToken = nullptr;
239  // Set if the word is a token spelled in the file, and that token survives
240  // preprocessing to emit an expanded token spelled the same way.
241  const syntax::Token *ExpandedToken = nullptr;
242 
243  // Find the unique word that contains SpelledLoc or starts/ends there.
244  static llvm::Optional<SpelledWord> touching(SourceLocation SpelledLoc,
245  const syntax::TokenBuffer &TB,
246  const LangOptions &LangOpts);
247 };
248 
249 /// Heuristically determine namespaces visible at a point, without parsing Code.
250 /// This considers using-directives and enclosing namespace-declarations that
251 /// are visible (and not obfuscated) in the file itself (not headers).
252 /// Code should be truncated at the point of interest.
253 ///
254 /// The returned vector is always non-empty.
255 /// - The first element is the namespace that encloses the point: a declaration
256 /// near the point would be within this namespace.
257 /// - The elements are the namespaces in scope at the point: an unqualified
258 /// lookup would search within these namespaces.
259 ///
260 /// Using directives are resolved against all enclosing scopes, but no other
261 /// namespace directives.
262 ///
263 /// example:
264 /// using namespace a;
265 /// namespace foo {
266 /// using namespace b;
267 ///
268 /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::".
269 std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
270  const LangOptions &LangOpts);
271 
272 /// Represents locations that can accept a definition.
274  /// Namespace that owns all of the EligiblePoints, e.g.
275  /// namespace a{ namespace b {^ void foo();^} }
276  /// It will be “a::b” for both carrot locations.
277  std::string EnclosingNamespace;
278  /// Offsets into the code marking eligible points to insert a function
279  /// definition.
280  std::vector<Position> EligiblePoints;
281 };
282 
283 /// Returns most eligible region to insert a definition for \p
284 /// FullyQualifiedName in the \p Code.
285 /// Pseudo parses \pCode under the hood to determine namespace decls and
286 /// possible insertion points. Choses the region that matches the longest prefix
287 /// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces.
288 /// \p FullyQualifiedName should not contain anonymous namespaces.
289 EligibleRegion getEligiblePoints(llvm::StringRef Code,
290  llvm::StringRef FullyQualifiedName,
291  const LangOptions &LangOpts);
292 
293 struct DefinedMacro {
294  llvm::StringRef Name;
295  const MacroInfo *Info;
296  /// Location of the identifier that names the macro.
297  /// Unlike Info->Location, this translates preamble-patch locations to
298  /// main-file locations.
299  SourceLocation NameLoc;
300 };
301 /// Gets the macro referenced by \p SpelledTok. It must be a spelled token
302 /// aligned to the beginning of an identifier.
303 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
304  Preprocessor &PP);
305 
306 /// Infers whether this is a header from the FileName and LangOpts (if
307 /// presents).
308 bool isHeaderFile(llvm::StringRef FileName,
309  llvm::Optional<LangOptions> LangOpts = llvm::None);
310 
311 /// Returns true if the given location is in a generated protobuf file.
312 bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
313 
314 } // namespace clangd
315 } // namespace clang
316 #endif
SourceLocation Loc
&#39;#&#39; location in the include directive
llvm::StringSet collectWords(llvm::StringRef Content)
Collects words from the source code.
Definition: SourceCode.cpp:840
bool isProtoFile(SourceLocation Loc, const SourceManager &SM)
Returns true if the given location is in a generated protobuf file.
std::string Code
llvm::Expected< tooling::Replacements > cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, const format::FormatStyle &Style)
Cleanup and format the given replacements.
Definition: SourceCode.cpp:593
SourceLocation NameLoc
Location of the identifier that names the macro.
Definition: SourceCode.h:299
size_t lspLength(llvm::StringRef Code)
Definition: SourceCode.cpp:151
const MacroInfo * Info
Definition: SourceCode.h:295
std::array< uint8_t, 8 > FileDigest
Definition: SourceCode.h:40
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:489
llvm::StringRef Text
Definition: SourceCode.h:228
llvm::SourceMgr * SourceMgr
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:421
llvm::Expected< std::string > apply() const
Returns the file contents after changes are applied.
Definition: SourceCode.cpp:990
std::vector< std::string > visibleNamespaces(llvm::StringRef Code, const LangOptions &LangOpts)
Heuristically determine namespaces visible at a point, without parsing Code.
Definition: SourceCode.cpp:792
Values in a Context are indexed by typed keys.
Definition: Context.h:40
Documents should not be synced at all.
std::string InitialCode
Definition: SourceCode.h:182
bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM)
Returns true if the token at Loc is spelled in the source code.
Definition: SourceCode.cpp:237
llvm::Expected< SourceLocation > sourceLocationInMainFile(const SourceManager &SM, Position P)
Return the file location, corresponding to P.
Definition: SourceCode.cpp:461
bool canApplyTo(llvm::StringRef Code) const
Checks whether the Replacements are applicable to given Code.
Definition: SourceCode.cpp:998
bool isValidFileRange(const SourceManager &Mgr, SourceRange R)
Returns true iff all of the following conditions hold:
Definition: SourceCode.cpp:247
std::pair< size_t, size_t > offsetToClangLineColumn(llvm::StringRef Code, size_t Offset)
Definition: SourceCode.cpp:479
TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, const LangOptions &L)
Definition: SourceCode.cpp:551
std::string QName
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
Definition: SourceCode.cpp:208
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength)
Turn a [line, column] pair into an offset in Code.
Definition: SourceCode.cpp:175
std::string EnclosingNamespace
Namespace that owns all of the EligiblePoints, e.g.
Definition: SourceCode.h:277
syntax::Token SpelledTok
Definition: XRefs.cpp:651
Key< OffsetEncoding > kCurrentOffsetEncoding
Definition: SourceCode.cpp:144
format::FormatStyle getFormatStyleForFile(llvm::StringRef File, llvm::StringRef Content, const ThreadsafeFS &TFS)
Choose the clang-format style we should apply to a certain file.
Definition: SourceCode.cpp:578
PathRef FileName
Represents locations that can accept a definition.
Definition: SourceCode.h:273
llvm::Optional< FileDigest > digestFile(const SourceManager &SM, FileID FID)
Definition: SourceCode.cpp:570
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:220
tooling::Replacements Replacements
Definition: SourceCode.h:181
llvm::StringMap< Edit > FileEdits
A mapping from absolute file path (the one used for accessing the underlying VFS) to edits...
Definition: SourceCode.h:198
llvm::Optional< DefinedMacro > locateMacroAt(const syntax::Token &SpelledTok, Preprocessor &PP)
Gets the macro referenced by SpelledTok.
Definition: SourceCode.cpp:968
FileDigest digest(llvm::StringRef Content)
Definition: SourceCode.cpp:560
llvm::Optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:428
SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM)
Returns the #include location through which IncludedFIle was loaded.
Definition: SourceCode.cpp:262
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
Definition: SourceCode.cpp:450
size_t Offset
Edit(llvm::StringRef Code, tooling::Replacements Reps)
Definition: SourceCode.h:184
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Wrapper for vfs::FileSystem for use in multithreaded programs like clangd.
Definition: ThreadsafeFS.h:28
TextEdit replacementToEdit(llvm::StringRef Code, const tooling::Replacement &R)
Definition: SourceCode.cpp:496
llvm::Optional< std::string > getCanonicalPath(const FileEntry *F, const SourceManager &SourceMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:512
EligibleRegion getEligiblePoints(llvm::StringRef Code, llvm::StringRef FullyQualifiedName, const LangOptions &LangOpts)
Returns most eligible region to insert a definition for FullyQualifiedName in the Code...
std::vector< TextEdit > replacementsToEdits(llvm::StringRef Code, const tooling::Replacements &Repls)
Definition: SourceCode.cpp:504
std::vector< TextEdit > asTextEdits() const
Represents Replacements as TextEdits that are available for use in LSP.
Definition: SourceCode.cpp:994
bool isHeaderFile(llvm::StringRef FileName, llvm::Optional< LangOptions > LangOpts)
Infers whether this is a header from the FileName and LangOpts (if presents).
const Expr * E
llvm::Optional< FixItHint > FixIt
std::vector< Range > collectIdentifierRanges(llvm::StringRef Identifier, llvm::StringRef Content, const LangOptions &LangOpts)
Collects all ranges of the given identifier in the source code.
Definition: SourceCode.cpp:627
llvm::StringMap< unsigned > collectIdentifiers(llvm::StringRef Content, const format::FormatStyle &Style)
Collects identifiers with counts in the source code.
Definition: SourceCode.cpp:613
A set of edits generated for a single file.
Definition: SourceCode.h:180
llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style)
Formats the edits and code around it according to Style.
std::vector< Position > EligiblePoints
Offsets into the code marking eligible points to insert a function definition.
Definition: SourceCode.h:280
Range halfOpenToRange(const SourceManager &SM, CharSourceRange R)
Definition: SourceCode.cpp:471
SourceLocation Location
Definition: SourceCode.h:225
static cl::opt< std::string > FormatStyle("format-style", cl::desc(R"( Style for formatting code around applied fixes: - 'none' (default) turns off formatting - 'file' (literally 'file', not a placeholder) uses .clang-format file in the closest parent directory - '{ <json> }' specifies options inline, e.g. -format-style='{BasedOnStyle: llvm, IndentWidth: 8}' - 'llvm', 'google', 'webkit', 'mozilla' See clang-format documentation for the up-to-date information about formatting styles and options. This option overrides the 'FormatStyle` option in .clang-tidy file, if any. )"), cl::init("none"), cl::cat(ClangTidyCategory))