clang-tools 20.0.0git
SemanticSelection.cpp
Go to the documentation of this file.
1//===--- SemanticSelection.cpp -----------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SemanticSelection.h"
10#include "ParsedAST.h"
11#include "Protocol.h"
12#include "Selection.h"
13#include "SourceCode.h"
14#include "clang/AST/DeclBase.h"
15#include "clang/Basic/SourceLocation.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Tooling/Syntax/BuildTree.h"
18#include "clang/Tooling/Syntax/Nodes.h"
19#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
20#include "clang/Tooling/Syntax/Tree.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Casting.h"
24#include "llvm/Support/Error.h"
25#include "support/Bracket.h"
27#include "support/Token.h"
28#include <optional>
29#include <queue>
30#include <vector>
31
32namespace clang {
33namespace clangd {
34namespace {
35
36// Adds Range \p R to the Result if it is distinct from the last added Range.
37// Assumes that only consecutive ranges can coincide.
38void addIfDistinct(const Range &R, std::vector<Range> &Result) {
39 if (Result.empty() || Result.back() != R) {
40 Result.push_back(R);
41 }
42}
43
44std::optional<FoldingRange> toFoldingRange(SourceRange SR,
45 const SourceManager &SM) {
46 const auto Begin = SM.getDecomposedLoc(SR.getBegin()),
47 End = SM.getDecomposedLoc(SR.getEnd());
48 // Do not produce folding ranges if either range ends is not within the main
49 // file. Macros have their own FileID so this also checks if locations are not
50 // within the macros.
51 if ((Begin.first != SM.getMainFileID()) || (End.first != SM.getMainFileID()))
52 return std::nullopt;
53 FoldingRange Range;
54 Range.startCharacter = SM.getColumnNumber(Begin.first, Begin.second) - 1;
55 Range.startLine = SM.getLineNumber(Begin.first, Begin.second) - 1;
56 Range.endCharacter = SM.getColumnNumber(End.first, End.second) - 1;
57 Range.endLine = SM.getLineNumber(End.first, End.second) - 1;
58 return Range;
59}
60
61std::optional<FoldingRange>
62extractFoldingRange(const syntax::Node *Node,
63 const syntax::TokenBufferTokenManager &TM) {
64 if (const auto *Stmt = dyn_cast<syntax::CompoundStatement>(Node)) {
65 const auto *LBrace = cast_or_null<syntax::Leaf>(
66 Stmt->findChild(syntax::NodeRole::OpenParen));
67 // FIXME(kirillbobyrev): This should find the last child. Compound
68 // statements have only one pair of braces so this is valid but for other
69 // node kinds it might not be correct.
70 const auto *RBrace = cast_or_null<syntax::Leaf>(
71 Stmt->findChild(syntax::NodeRole::CloseParen));
72 if (!LBrace || !RBrace)
73 return std::nullopt;
74 // Fold the entire range within braces, including whitespace.
75 const SourceLocation LBraceLocInfo =
76 TM.getToken(LBrace->getTokenKey())->endLocation(),
77 RBraceLocInfo =
78 TM.getToken(RBrace->getTokenKey())->location();
79 auto Range = toFoldingRange(SourceRange(LBraceLocInfo, RBraceLocInfo),
80 TM.sourceManager());
81 // Do not generate folding range for compound statements without any
82 // nodes and newlines.
83 if (Range && Range->startLine != Range->endLine)
84 return Range;
85 }
86 return std::nullopt;
87}
88
89// Traverse the tree and collect folding ranges along the way.
90std::vector<FoldingRange>
91collectFoldingRanges(const syntax::Node *Root,
92 const syntax::TokenBufferTokenManager &TM) {
93 std::queue<const syntax::Node *> Nodes;
94 Nodes.push(Root);
95 std::vector<FoldingRange> Result;
96 while (!Nodes.empty()) {
97 const syntax::Node *Node = Nodes.front();
98 Nodes.pop();
99 const auto Range = extractFoldingRange(Node, TM);
100 if (Range)
101 Result.push_back(*Range);
102 if (const auto *T = dyn_cast<syntax::Tree>(Node))
103 for (const auto *NextNode = T->getFirstChild(); NextNode;
104 NextNode = NextNode->getNextSibling())
105 Nodes.push(NextNode);
106 }
107 return Result;
108}
109
110} // namespace
111
112llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos) {
113 std::vector<Range> Ranges;
114 const auto &SM = AST.getSourceManager();
115 const auto &LangOpts = AST.getLangOpts();
116
117 auto FID = SM.getMainFileID();
118 auto Offset = positionToOffset(SM.getBufferData(FID), Pos);
119 if (!Offset) {
120 return Offset.takeError();
121 }
122
123 // Get node under the cursor.
125 AST.getASTContext(), AST.getTokens(), *Offset, *Offset);
126 for (const auto *Node = ST.commonAncestor(); Node != nullptr;
127 Node = Node->Parent) {
128 if (const Decl *D = Node->ASTNode.get<Decl>()) {
129 if (llvm::isa<TranslationUnitDecl>(D)) {
130 break;
131 }
132 }
133
134 auto SR = toHalfOpenFileRange(SM, LangOpts, Node->ASTNode.getSourceRange());
135 if (!SR || SM.getFileID(SR->getBegin()) != SM.getMainFileID()) {
136 continue;
137 }
138 Range R;
139 R.start = sourceLocToPosition(SM, SR->getBegin());
140 R.end = sourceLocToPosition(SM, SR->getEnd());
141 addIfDistinct(R, Ranges);
142 }
143
144 if (Ranges.empty()) {
145 // LSP provides no way to signal "the point is not within a semantic range".
146 // Return an empty range at the point.
148 Empty.range.start = Empty.range.end = Pos;
149 return std::move(Empty);
150 }
151
152 // Convert to the LSP linked-list representation.
154 Head.range = std::move(Ranges.front());
156 for (auto &Range :
157 llvm::MutableArrayRef(Ranges.data(), Ranges.size()).drop_front()) {
158 Tail->parent = std::make_unique<SelectionRange>();
159 Tail = Tail->parent.get();
160 Tail->range = std::move(Range);
161 }
162
163 return std::move(Head);
164}
165
166// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and
167// other code regions (e.g. public/private/protected sections of classes,
168// control flow statement bodies).
169// Related issue: https://github.com/clangd/clangd/issues/310
170llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
171 syntax::Arena A;
172 syntax::TokenBufferTokenManager TM(AST.getTokens(), AST.getLangOpts(),
173 AST.getSourceManager());
174 const auto *SyntaxTree = syntax::buildSyntaxTree(A, TM, AST.getASTContext());
175 return collectFoldingRanges(SyntaxTree, TM);
176}
177
178// FIXME( usaxena95): Collect PP conditional regions, includes and other code
179// regions (e.g. public/private/protected sections of classes, control flow
180// statement bodies).
181// Related issue: https://github.com/clangd/clangd/issues/310
182llvm::Expected<std::vector<FoldingRange>>
183getFoldingRanges(const std::string &Code, bool LineFoldingOnly) {
184 auto OrigStream = lex(Code, genericLangOpts());
185
186 auto DirectiveStructure = DirectiveTree::parse(OrigStream);
187 chooseConditionalBranches(DirectiveStructure, OrigStream);
188
189 // FIXME: Provide ranges in the disabled-PP regions as well.
190 auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream);
191
192 auto ParseableStream = cook(Preprocessed, genericLangOpts());
193 pairBrackets(ParseableStream);
194
195 std::vector<FoldingRange> Result;
196 auto AddFoldingRange = [&](Position Start, Position End,
197 llvm::StringLiteral Kind) {
198 if (Start.line >= End.line)
199 return;
200 FoldingRange FR;
201 FR.startLine = Start.line;
202 FR.startCharacter = Start.character;
203 FR.endLine = End.line;
204 FR.endCharacter = End.character;
205 FR.kind = Kind.str();
206 Result.push_back(FR);
207 };
208 auto OriginalToken = [&](const Token &T) {
209 return OrigStream.tokens()[T.OriginalIndex];
210 };
211 auto StartOffset = [&](const Token &T) {
212 return OriginalToken(T).text().data() - Code.data();
213 };
214 auto StartPosition = [&](const Token &T) {
215 return offsetToPosition(Code, StartOffset(T));
216 };
217 auto EndOffset = [&](const Token &T) {
218 return StartOffset(T) + OriginalToken(T).Length;
219 };
220 auto EndPosition = [&](const Token &T) {
221 return offsetToPosition(Code, EndOffset(T));
222 };
223 auto Tokens = ParseableStream.tokens();
224 // Brackets.
225 for (const auto &Tok : Tokens) {
226 if (auto *Paired = Tok.pair()) {
227 // Process only token at the start of the range. Avoid ranges on a single
228 // line.
229 if (Tok.Line < Paired->Line) {
230 Position Start = offsetToPosition(Code, 1 + StartOffset(Tok));
231 Position End = StartPosition(*Paired);
232 if (LineFoldingOnly)
233 End.line--;
234 AddFoldingRange(Start, End, FoldingRange::REGION_KIND);
235 }
236 }
237 }
238 auto IsBlockComment = [&](const Token &T) {
239 assert(T.Kind == tok::comment);
240 return OriginalToken(T).Length >= 2 &&
241 Code.substr(StartOffset(T), 2) == "/*";
242 };
243 // Multi-line comments.
244 for (auto *T = Tokens.begin(); T != Tokens.end();) {
245 if (T->Kind != tok::comment) {
246 T++;
247 continue;
248 }
249 Token *FirstComment = T;
250 // Show starting sentinals (// and /*) of the comment.
251 Position Start = offsetToPosition(Code, 2 + StartOffset(*FirstComment));
252 Token *LastComment = T;
253 Position End = EndPosition(*T);
254 while (T != Tokens.end() && T->Kind == tok::comment &&
255 StartPosition(*T).line <= End.line + 1) {
256 End = EndPosition(*T);
257 LastComment = T;
258 T++;
259 }
260 if (IsBlockComment(*FirstComment)) {
261 if (LineFoldingOnly)
262 // Show last line of a block comment.
263 End.line--;
264 if (IsBlockComment(*LastComment))
265 // Show ending sentinal "*/" of the block comment.
266 End.character -= 2;
267 }
268 AddFoldingRange(Start, End, FoldingRange::COMMENT_KIND);
269 }
270 return Result;
271}
272
273} // namespace clangd
274} // namespace clang
const FunctionDecl * Decl
BindArgumentKind Kind
size_t Offset
ASTNode Root
Definition: DumpAST.cpp:343
CharSourceRange Range
SourceRange for the file name.
size_t Pos
::clang::DynTypedNode Node
Stores and provides access to parsed AST.
Definition: ParsedAST.h:46
static SelectionTree createRight(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End)
Definition: Selection.cpp:1067
const Node * commonAncestor() const
Definition: Selection.cpp:1097
std::optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:430
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
Turn an offset in Code into a [line, column] pair.
Definition: SourceCode.cpp:202
static void lex(llvm::StringRef Code, const LangOptions &LangOpts, llvm::function_ref< void(const syntax::Token &, const SourceManager &SM)> Action)
Definition: SourceCode.cpp:621
llvm::Expected< std::vector< FoldingRange > > getFoldingRanges(ParsedAST &AST)
Returns a list of ranges whose contents might be collapsible in an editor.
Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc)
Turn a SourceLocation into a [line, column] pair.
Definition: SourceCode.cpp:214
llvm::Expected< SelectionRange > getSemanticRanges(ParsedAST &AST, Position Pos)
Returns the list of all interesting ranges around the Position Pos.
clang::LangOptions genericLangOpts(clang::Language Lang, clang::LangStandard::Kind Standard)
A generic lang options suitable for lexing/parsing a langage.
Definition: Token.cpp:96
void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code)
Selects a "taken" branch for each conditional directive in the file.
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength)
Turn a [line, column] pair into an offset in Code.
Definition: SourceCode.cpp:173
TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts)
Definition: Lex.cpp:79
void pairBrackets(TokenStream &Stream)
Identifies bracket token in the stream which should be paired.
Definition: Bracket.cpp:148
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static DirectiveTree parse(const TokenStream &)
Extract preprocessor structure by examining the raw tokens.
Stores information about a region of code that can be folded.
Definition: Protocol.h:1960
static const llvm::StringLiteral REGION_KIND
Definition: Protocol.h:1966
static const llvm::StringLiteral COMMENT_KIND
Definition: Protocol.h:1967
Position start
The range's start position.
Definition: Protocol.h:187
Position end
The range's end position.
Definition: Protocol.h:190
A single C++ or preprocessor token.
Definition: support/Token.h:50
const Token * pair() const
Returns the bracket paired with this one, if any.
uint32_t Line
Zero-based line number for the start of the token.
Definition: support/Token.h:66