clang 19.0.0git
RangeSelector.cpp
Go to the documentation of this file.
1//===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/Expr.h"
11#include "clang/AST/TypeLoc.h"
14#include "clang/Lex/Lexer.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include <string>
20#include <utility>
21#include <vector>
22
23using namespace clang;
24using namespace transformer;
25
27using llvm::Error;
28using llvm::StringError;
29
31
32static Error invalidArgumentError(Twine Message) {
33 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
34}
35
36static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37 return invalidArgumentError("mismatched type (node id=" + ID +
38 " kind=" + Kind.asStringRef() + ")");
39}
40
41static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42 Twine ExpectedType) {
43 return invalidArgumentError("mismatched type: expected one of " +
44 ExpectedType + " (node id=" + ID +
45 " kind=" + Kind.asStringRef() + ")");
46}
47
48static Error missingPropertyError(StringRef ID, Twine Description,
49 StringRef Property) {
50 return invalidArgumentError(Description + " requires property '" + Property +
51 "' (node id=" + ID + ")");
52}
53
55 StringRef ID) {
56 auto &NodesMap = Nodes.getMap();
57 auto It = NodesMap.find(ID);
58 if (It == NodesMap.end())
59 return invalidArgumentError("ID not bound: " + ID);
60 return It->second;
61}
62
63// FIXME: handling of macros should be configurable.
65 const SourceManager &SM,
66 const LangOptions &LangOpts) {
67 if (Start.isInvalid() || Start.isMacroID())
68 return SourceLocation();
69
70 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
71 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72 return SourceLocation();
73
74 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
75}
76
77// Finds the start location of the previous token of kind \p TK.
78// FIXME: handling of macros should be configurable.
80 const SourceManager &SM,
81 const LangOptions &LangOpts,
82 tok::TokenKind TK) {
83 while (true) {
84 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85 if (L.isInvalid() || L.isMacroID())
86 return SourceLocation();
87
88 Token T;
89 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90 return SourceLocation();
91
92 if (T.is(TK))
93 return T.getLocation();
94
95 Start = L;
96 }
97}
98
100 const LangOptions &LangOpts) {
101 SourceLocation EndLoc =
102 E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
103 return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
104}
105
107 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
108 Expected<CharSourceRange> SelectedRange = Selector(Result);
109 if (!SelectedRange)
110 return SelectedRange.takeError();
111 return CharSourceRange::getCharRange(SelectedRange->getBegin());
112 };
113}
114
116 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
117 Expected<CharSourceRange> SelectedRange = Selector(Result);
118 if (!SelectedRange)
119 return SelectedRange.takeError();
120 SourceLocation End = SelectedRange->getEnd();
121 if (SelectedRange->isTokenRange()) {
122 // We need to find the actual (exclusive) end location from which to
123 // create a new source range. However, that's not guaranteed to be valid,
124 // even if the token location itself is valid. So, we create a token range
125 // consisting only of the last token, then map that range back to the
126 // source file. If that succeeds, we have a valid location for the end of
127 // the generated range.
129 CharSourceRange::getTokenRange(SelectedRange->getEnd()),
130 *Result.SourceManager, Result.Context->getLangOpts());
131 if (Range.isInvalid())
133 "after: can't resolve sub-range to valid source range");
134 End = Range.getEnd();
135 }
136
138 };
139}
140
142 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144 if (!Node)
145 return Node.takeError();
146 return (Node->get<Decl>() != nullptr ||
147 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
148 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
149 *Result.Context)
151 };
152}
153
155 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
156 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
157 if (!Node)
158 return Node.takeError();
159 return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
160 *Result.Context);
161 };
162}
163
165 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
166 Expected<CharSourceRange> BeginRange = Begin(Result);
167 if (!BeginRange)
168 return BeginRange.takeError();
169 Expected<CharSourceRange> EndRange = End(Result);
170 if (!EndRange)
171 return EndRange.takeError();
172 SourceLocation B = BeginRange->getBegin();
173 SourceLocation E = EndRange->getEnd();
174 // Note: we are precluding the possibility of sub-token ranges in the case
175 // that EndRange is a token range.
176 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
177 return invalidArgumentError("Bad range: out of order");
178 }
179 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
180 };
181}
182
184 std::string EndID) {
185 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
186}
187
189 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
190 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
191 if (!Node)
192 return Node.takeError();
193 if (auto *M = Node->get<clang::MemberExpr>())
195 M->getMemberNameInfo().getSourceRange());
196 return typeError(ID, Node->getNodeKind(), "MemberExpr");
197 };
198}
199
201 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
202 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
203 if (!N)
204 return N.takeError();
205 auto &Node = *N;
206 if (const auto *D = Node.get<NamedDecl>()) {
207 if (!D->getDeclName().isIdentifier())
208 return missingPropertyError(ID, "name", "identifier");
209 SourceLocation L = D->getLocation();
210 auto R = CharSourceRange::getTokenRange(L, L);
211 // Verify that the range covers exactly the name.
212 // FIXME: extend this code to support cases like `operator +` or
213 // `foo<int>` for which this range will be too short. Doing so will
214 // require subcasing `NamedDecl`, because it doesn't provide virtual
215 // access to the \c DeclarationNameInfo.
216 if (tooling::getText(R, *Result.Context) != D->getName())
217 return CharSourceRange();
218 return R;
219 }
220 if (const auto *E = Node.get<DeclRefExpr>()) {
221 if (!E->getNameInfo().getName().isIdentifier())
222 return missingPropertyError(ID, "name", "identifier");
223 SourceLocation L = E->getLocation();
225 }
226 if (const auto *I = Node.get<CXXCtorInitializer>()) {
227 if (!I->isMemberInitializer() && I->isWritten())
228 return missingPropertyError(ID, "name", "explicit member initializer");
229 SourceLocation L = I->getMemberLocation();
231 }
232 if (const auto *T = Node.get<TypeLoc>()) {
233 TypeLoc Loc = *T;
234 auto ET = Loc.getAs<ElaboratedTypeLoc>();
235 if (!ET.isNull())
236 Loc = ET.getNamedTypeLoc();
237 if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>();
238 !SpecLoc.isNull())
239 return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc());
241 }
242 return typeError(ID, Node.getNodeKind(),
243 "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
244 };
245}
246
247namespace {
248// FIXME: make this available in the public API for users to easily create their
249// own selectors.
250
251// Creates a selector from a range-selection function \p Func, which selects a
252// range that is relative to a bound node id. \c T is the node type expected by
253// \p Func.
254template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
255class RelativeSelector {
256 std::string ID;
257
258public:
259 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
260
261 Expected<CharSourceRange> operator()(const MatchResult &Result) {
262 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
263 if (!N)
264 return N.takeError();
265 if (const auto *Arg = N->get<T>())
266 return Func(Result, *Arg);
267 return typeError(ID, N->getNodeKind());
268 }
269};
270} // namespace
271
272// FIXME: Change the following functions from being in an anonymous namespace
273// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
274// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
275// namespace works around a bug in earlier versions.
276namespace {
277// Returns the range of the statements (all source between the braces).
278CharSourceRange getStatementsRange(const MatchResult &,
279 const CompoundStmt &CS) {
281 CS.getRBracLoc());
282}
283} // namespace
284
286 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
287}
288
289namespace {
290// Returns the range of the source between the call's parentheses.
291CharSourceRange getCallArgumentsRange(const MatchResult &Result,
292 const CallExpr &CE) {
294 findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
296 CE.getRParenLoc());
297}
298} // namespace
299
301 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
302}
303
304namespace {
305// Returns the range of the elements of the initializer list. Includes all
306// source between the braces.
307CharSourceRange getElementsRange(const MatchResult &,
308 const InitListExpr &E) {
310 E.getRBraceLoc());
311}
312} // namespace
313
315 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
316}
317
318namespace {
319// Returns the range of the else branch, including the `else` keyword.
320CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
322 CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
323 tok::TokenKind::semi, *Result.Context);
324}
325} // namespace
326
328 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
329}
330
332 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
333 Expected<CharSourceRange> SRange = S(Result);
334 if (!SRange)
335 return SRange.takeError();
336 return Result.SourceManager->getExpansionRange(*SRange);
337 };
338}
BoundNodesTreeBuilder Nodes
DynTypedNode Node
#define SM(sm)
Definition: Cuda.cpp:82
static Error invalidArgumentError(Twine Message)
static SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
static SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static Error missingPropertyError(StringRef ID, Twine Description, StringRef Property)
static Error typeError(StringRef ID, const ASTNodeKind &Kind)
static Expected< DynTypedNode > getNode(const ast_matchers::BoundNodes &Nodes, StringRef ID)
static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM, const LangOptions &LangOpts)
Defines a combinator library supporting the definition of selectors, which select source ranges based...
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TypeLoc interface and its subclasses.
SourceLocation Begin
Kind identifier.
Definition: ASTTypeTraits.h:51
Represents a C++ base or member initializer.
Definition: DeclCXX.h:2293
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2819
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3010
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2997
SourceLocation getRParenLoc() const
Definition: Expr.h:3129
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
static CharSourceRange getTokenRange(SourceRange R)
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1604
SourceLocation getLBracLoc() const
Definition: Stmt.h:1736
SourceLocation getRBracLoc() const
Definition: Stmt.h:1737
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:85
const LangOptions & getLangOpts() const LLVM_READONLY
Helper to get the language options from the ASTContext.
Definition: DeclBase.cpp:507
ASTNodeKind getNodeKind() const
SourceRange getSourceRange() const
For nodes which represent textual entities in the source code, return their SourceRange.
const T * get() const
Retrieve the stored node as type T.
This represents one expression.
Definition: Expr.h:110
IfStmt - This represents an if/then/else.
Definition: Stmt.h:2136
Describes an C or C++ initializer list.
Definition: Expr.h:4841
SourceLocation getLBraceLoc() const
Definition: Expr.h:4995
SourceLocation getRBraceLoc() const
Definition: Expr.h:4997
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:418
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
Definition: Lexer.cpp:955
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
Definition: Lexer.cpp:609
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition: Lexer.cpp:510
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3182
This represents a decl that may have a name.
Definition: Decl.h:249
Smart pointer class that efficiently represents Objective-C method names.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
Base wrapper for a particular "section" of type source info.
Definition: TypeLoc.h:59
T getAs() const
Convert to the specified TypeLoc type, returning a null TypeLoc if this TypeLoc is not of the desired...
Definition: TypeLoc.h:89
SourceRange getSourceRange() const LLVM_READONLY
Get the full source range.
Definition: TypeLoc.h:153
bool isNull() const
Definition: TypeLoc.h:121
Maps string IDs to AST nodes matched by parts of a matcher.
Definition: ASTMatchers.h:109
A class to allow finding matches over the Clang AST.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
CharSourceRange getExtendedRange(const T &Node, tok::TokenKind Next, ASTContext &Context)
Returns the source range spanning the node, extended to include Next, if it immediately follows Node.
Definition: SourceCode.h:34
CharSourceRange maybeExtendRange(CharSourceRange Range, tok::TokenKind Terminator, ASTContext &Context)
Extends Range to include the token Terminator, if it immediately follows the end of the range.
Definition: SourceCode.cpp:37
StringRef getText(CharSourceRange Range, const ASTContext &Context)
Returns the source-code text in the specified range.
Definition: SourceCode.cpp:31
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
@ Property
The type of a property.
Definition: Format.h:5304
Contains all information for a given match.