clang-tools 22.0.0git
SemanticSelection.cpp
Go to the documentation of this file.
1//===--- SemanticSelection.cpp -----------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SemanticSelection.h"
10#include "ParsedAST.h"
11#include "Protocol.h"
12#include "Selection.h"
13#include "SourceCode.h"
14#include "support/Bracket.h"
16#include "support/Token.h"
17#include "clang/AST/DeclBase.h"
18#include "clang/Basic/SourceLocation.h"
19#include "clang/Basic/SourceManager.h"
20#include "clang/Basic/TokenKinds.h"
21#include "clang/Tooling/Syntax/BuildTree.h"
22#include "clang/Tooling/Syntax/Nodes.h"
23#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
24#include "clang/Tooling/Syntax/Tree.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/Error.h"
29#include <optional>
30#include <queue>
31#include <vector>
32
33namespace clang {
34namespace clangd {
35namespace {
36
37// Adds Range \p R to the Result if it is distinct from the last added Range.
38// Assumes that only consecutive ranges can coincide.
39void addIfDistinct(const Range &R, std::vector<Range> &Result) {
40 if (Result.empty() || Result.back() != R) {
41 Result.push_back(R);
42 }
43}
44
45} // namespace
46
47llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos) {
48 std::vector<Range> Ranges;
49 const auto &SM = AST.getSourceManager();
50 const auto &LangOpts = AST.getLangOpts();
51
52 auto FID = SM.getMainFileID();
53 auto Offset = positionToOffset(SM.getBufferData(FID), Pos);
54 if (!Offset) {
55 return Offset.takeError();
56 }
57
58 // Get node under the cursor.
60 AST.getASTContext(), AST.getTokens(), *Offset, *Offset);
61 for (const auto *Node = ST.commonAncestor(); Node != nullptr;
62 Node = Node->Parent) {
63 if (const Decl *D = Node->ASTNode.get<Decl>()) {
64 if (llvm::isa<TranslationUnitDecl>(D)) {
65 break;
66 }
67 }
68
69 auto SR = toHalfOpenFileRange(SM, LangOpts, Node->ASTNode.getSourceRange());
70 if (!SR || SM.getFileID(SR->getBegin()) != SM.getMainFileID()) {
71 continue;
72 }
73 Range R;
74 R.start = sourceLocToPosition(SM, SR->getBegin());
75 R.end = sourceLocToPosition(SM, SR->getEnd());
76 addIfDistinct(R, Ranges);
77 }
78
79 if (Ranges.empty()) {
80 // LSP provides no way to signal "the point is not within a semantic range".
81 // Return an empty range at the point.
83 Empty.range.start = Empty.range.end = Pos;
84 return std::move(Empty);
85 }
86
87 // Convert to the LSP linked-list representation.
89 Head.range = std::move(Ranges.front());
91 for (auto &Range :
92 llvm::MutableArrayRef(Ranges.data(), Ranges.size()).drop_front()) {
93 Tail->parent = std::make_unique<SelectionRange>();
94 Tail = Tail->parent.get();
95 Tail->range = std::move(Range);
96 }
97
98 return std::move(Head);
99}
100
102 // Record the token range of a region:
103 //
104 // #pragma region name[[
105 // ...
106 // ]]#pragma endregion
107 std::vector<Token::Range> &Ranges;
108 const TokenStream &Code;
109 // Stack of starting token (the name of the region) indices for nested #pragma
110 // region.
111 std::vector<unsigned> Stack;
112
113public:
114 PragmaRegionFinder(std::vector<Token::Range> &Ranges, const TokenStream &Code)
115 : Ranges(Ranges), Code(Code) {}
116
117 void walk(const DirectiveTree &T) {
118 for (const auto &C : T.Chunks)
119 std::visit(*this, C);
120 }
121
122 void operator()(const DirectiveTree::Code &C) {}
123
124 void operator()(const DirectiveTree::Directive &D) {
125 // Get the tokens that make up this directive.
126 auto Tokens = Code.tokens(D.Tokens);
127 if (Tokens.empty())
128 return;
129 const Token &HashToken = Tokens.front();
130 assert(HashToken.Kind == tok::hash);
131 const Token &Pragma = HashToken.nextNC();
132 if (Pragma.text() != "pragma")
133 return;
134 const Token &Value = Pragma.nextNC();
135
136 // Handle "#pragma region name"
137 if (Value.text() == "region") {
138 // Find the last token at the same line.
139 const Token *T = &Value.next();
140 while (T < Tokens.end() && T->Line == Pragma.Line)
141 T = &T->next();
142 --T;
143 Stack.push_back(T->OriginalIndex);
144 return;
145 }
146
147 // Handle "#pragma endregion"
148 if (Value.text() == "endregion") {
149 if (Stack.empty())
150 return; // unmatched end region; ignore.
151
152 unsigned StartIdx = Stack.back();
153 Stack.pop_back();
154 Ranges.push_back(Token::Range{StartIdx, HashToken.OriginalIndex});
155 }
156 }
157
158 void operator()(const DirectiveTree::Conditional &C) {
159 // C.Branches needs to see the DirectiveTree definition, otherwise build
160 // fails in C++20.
161 [[maybe_unused]] DirectiveTree Dummy;
162 for (const auto &[_, SubTree] : C.Branches)
163 walk(SubTree);
164 }
165};
166
167// FIXME( usaxena95): Collect includes and other code regions (e.g.
168// public/private/protected sections of classes, control flow statement bodies).
169// Related issue: https://github.com/clangd/clangd/issues/310
170llvm::Expected<std::vector<FoldingRange>>
171getFoldingRanges(const std::string &Code, bool LineFoldingOnly) {
172 auto OrigStream = lex(Code, genericLangOpts());
173
174 auto DirectiveStructure = DirectiveTree::parse(OrigStream);
175 chooseConditionalBranches(DirectiveStructure, OrigStream);
176
177 std::vector<FoldingRange> Result;
178 auto AddFoldingRange = [&](Position Start, Position End,
179 llvm::StringLiteral Kind) {
180 if (Start.line >= End.line)
181 return;
182 FoldingRange FR;
183 FR.startLine = Start.line;
184 FR.startCharacter = Start.character;
185 FR.endLine = End.line;
186 FR.endCharacter = End.character;
187 FR.kind = Kind.str();
188 Result.push_back(FR);
189 };
190 auto OriginalToken = [&](const Token &T) {
191 return OrigStream.tokens()[T.OriginalIndex];
192 };
193 auto StartOffset = [&](const Token &T) {
194 return OriginalToken(T).text().data() - Code.data();
195 };
196 auto StartPosition = [&](const Token &T) {
197 return offsetToPosition(Code, StartOffset(T));
198 };
199 auto EndOffset = [&](const Token &T) {
200 return StartOffset(T) + OriginalToken(T).Length;
201 };
202 auto EndPosition = [&](const Token &T) {
203 return offsetToPosition(Code, EndOffset(T));
204 };
205
206 // Preprocessor directives
207 auto PPRanges = pairDirectiveRanges(DirectiveStructure, OrigStream);
208 for (const auto &R : PPRanges) {
209 auto BTok = OrigStream.tokens()[R.Begin];
210 auto ETok = OrigStream.tokens()[R.End];
211 if (ETok.Kind == tok::eof)
212 continue;
213 if (BTok.Line >= ETok.Line)
214 continue;
215
216 Position Start = EndPosition(BTok);
217 Position End = StartPosition(ETok);
218 if (LineFoldingOnly)
219 End.line--;
220 AddFoldingRange(Start, End, FoldingRange::REGION_KIND);
221 }
222
223 // FIXME: Provide ranges in the disabled-PP regions as well.
224 auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream);
225
226 auto ParseableStream = cook(Preprocessed, genericLangOpts());
227 pairBrackets(ParseableStream);
228
229 auto Tokens = ParseableStream.tokens();
230
231 // Brackets.
232 for (const auto &Tok : Tokens) {
233 if (auto *Paired = Tok.pair()) {
234 // Process only token at the start of the range. Avoid ranges on a single
235 // line.
236 if (Tok.Line < Paired->Line) {
237 Position Start = offsetToPosition(Code, 1 + StartOffset(Tok));
238 Position End = StartPosition(*Paired);
239 if (LineFoldingOnly)
240 End.line--;
241 AddFoldingRange(Start, End, FoldingRange::REGION_KIND);
242 }
243 }
244 }
245 auto IsBlockComment = [&](const Token &T) {
246 assert(T.Kind == tok::comment);
247 return OriginalToken(T).Length >= 2 &&
248 Code.substr(StartOffset(T), 2) == "/*";
249 };
250
251 // Multi-line comments.
252 for (auto *T = Tokens.begin(); T != Tokens.end();) {
253 if (T->Kind != tok::comment) {
254 T++;
255 continue;
256 }
257 Token *FirstComment = T;
258 // Show starting sentinals (// and /*) of the comment.
259 Position Start = offsetToPosition(Code, 2 + StartOffset(*FirstComment));
260 Token *LastComment = T;
261 Position End = EndPosition(*T);
262 while (T != Tokens.end() && T->Kind == tok::comment &&
263 StartPosition(*T).line <= End.line + 1) {
264 End = EndPosition(*T);
265 LastComment = T;
266 T++;
267 }
268 if (IsBlockComment(*FirstComment)) {
269 if (LineFoldingOnly)
270 // Show last line of a block comment.
271 End.line--;
272 if (IsBlockComment(*LastComment))
273 // Show ending sentinal "*/" of the block comment.
274 End.character -= 2;
275 }
276 AddFoldingRange(Start, End, FoldingRange::COMMENT_KIND);
277 }
278
279 // #pragma region
280 std::vector<Token::Range> Ranges;
281 PragmaRegionFinder(Ranges, OrigStream).walk(DirectiveStructure);
282 auto Ts = OrigStream.tokens();
283 for (const auto &R : Ranges) {
284 auto End = StartPosition(Ts[R.End]);
285 if (LineFoldingOnly)
286 End.line--;
287 AddFoldingRange(EndPosition(Ts[R.Begin]), End, FoldingRange::REGION_KIND);
288 }
289 return Result;
290}
291
292} // namespace clangd
293} // namespace clang
Stores and provides access to parsed AST.
Definition ParsedAST.h:46
PragmaRegionFinder(std::vector< Token::Range > &Ranges, const TokenStream &Code)
void walk(const DirectiveTree &T)
void operator()(const DirectiveTree::Directive &D)
void operator()(const DirectiveTree::Code &C)
void operator()(const DirectiveTree::Conditional &C)
static SelectionTree createRight(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End)
const Node * commonAncestor() const
A complete sequence of Tokens representing a source file.
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition AST.cpp:45
std::optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
static void lex(llvm::StringRef Code, const LangOptions &LangOpts, llvm::function_ref< void(const syntax::Token &, const SourceManager &SM)> Action)
std::vector< Token::Range > pairDirectiveRanges(const DirectiveTree &Tree, const TokenStream &Code)
Pairs preprocessor conditional directives and computes their token ranges.
llvm::Expected< std::vector< FoldingRange > > getFoldingRanges(const std::string &Code, bool LineFoldingOnly)
Returns a list of ranges whose contents might be collapsible in an editor.
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
llvm::Expected< SelectionRange > getSemanticRanges(ParsedAST &AST, Position Pos)
Returns the list of all interesting ranges around the Position Pos.
clang::LangOptions genericLangOpts(clang::Language Lang, clang::LangStandard::Kind Standard)
A generic lang options suitable for lexing/parsing a langage.
Definition Token.cpp:96
void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code)
Describes the structure of a source file, as seen by the preprocessor.
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength)
Turn a [line, column] pair into an offset in Code.
TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts)
Definition Lex.cpp:79
void pairBrackets(TokenStream &Stream)
Identifies bracket token in the stream which should be paired.
Definition Bracket.cpp:148
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Stores information about a region of code that can be folded.
Definition Protocol.h:1977
static const llvm::StringLiteral REGION_KIND
Definition Protocol.h:1983
static const llvm::StringLiteral COMMENT_KIND
Definition Protocol.h:1984
int line
Line position in a document (zero-based).
Definition Protocol.h:158
int character
Character offset on a line in a document (zero-based).
Definition Protocol.h:163
Position start
The range's start position.
Definition Protocol.h:187
Position end
The range's end position.
Definition Protocol.h:190
A half-open range of tokens within a stream.
A single C++ or preprocessor token.
Index OriginalIndex
Index into the original token stream (as raw-lexed from the source code).
StringRef text() const
The token text.
uint32_t Line
Zero-based line number for the start of the token.
clang::tok::TokenKind Kind
The type of token as determined by clang's lexer.
const Token & nextNC() const
Returns the next token in the stream, skipping over comments.