clang 22.0.0git
RangeSelector.cpp
Go to the documentation of this file.
1//===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/Expr.h"
11#include "clang/AST/TypeLoc.h"
14#include "clang/Lex/Lexer.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include <string>
20#include <utility>
21
22using namespace clang;
23using namespace transformer;
24
26using llvm::Error;
27using llvm::StringError;
28
29using MatchResult = MatchFinder::MatchResult;
30
31static Error invalidArgumentError(Twine Message) {
32 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
33}
34
35static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36 return invalidArgumentError("mismatched type (node id=" + ID +
37 " kind=" + Kind.asStringRef() + ")");
38}
39
40static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41 Twine ExpectedType) {
42 return invalidArgumentError("mismatched type: expected one of " +
43 ExpectedType + " (node id=" + ID +
44 " kind=" + Kind.asStringRef() + ")");
45}
46
47static Error missingPropertyError(StringRef ID, Twine Description,
48 StringRef Property) {
49 return invalidArgumentError(Description + " requires property '" + Property +
50 "' (node id=" + ID + ")");
51}
52
54 StringRef ID) {
55 auto &NodesMap = Nodes.getMap();
56 auto It = NodesMap.find(ID);
57 if (It == NodesMap.end())
58 return invalidArgumentError("ID not bound: " + ID);
59 return It->second;
60}
61
62// FIXME: handling of macros should be configurable.
64 const SourceManager &SM,
65 const LangOptions &LangOpts) {
66 if (Start.isInvalid() || Start.isMacroID())
67 return SourceLocation();
68
69 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
70 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71 return SourceLocation();
72
73 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
74}
75
76// Finds the start location of the previous token of kind \p TK.
77// FIXME: handling of macros should be configurable.
79 const SourceManager &SM,
80 const LangOptions &LangOpts,
81 tok::TokenKind TK) {
82 while (true) {
83 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84 if (L.isInvalid() || L.isMacroID())
85 return SourceLocation();
86
87 Token T;
88 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89 return SourceLocation();
90
91 if (T.is(TK))
92 return T.getLocation();
93
94 Start = L;
95 }
96}
97
99 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
101 if (!SelectedRange)
102 return SelectedRange.takeError();
103 return CharSourceRange::getCharRange(SelectedRange->getBegin());
104 };
105}
106
108 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
110 if (!SelectedRange)
111 return SelectedRange.takeError();
112 SourceLocation End = SelectedRange->getEnd();
113 if (SelectedRange->isTokenRange()) {
114 // We need to find the actual (exclusive) end location from which to
115 // create a new source range. However, that's not guaranteed to be valid,
116 // even if the token location itself is valid. So, we create a token range
117 // consisting only of the last token, then map that range back to the
118 // source file. If that succeeds, we have a valid location for the end of
119 // the generated range.
121 CharSourceRange::getTokenRange(SelectedRange->getEnd()),
122 *Result.SourceManager, Result.Context->getLangOpts());
123 if (Range.isInvalid())
125 "after: can't resolve sub-range to valid source range");
126 End = Range.getEnd();
127 }
128
130 };
131}
132
134 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
135 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
136 if (!Node)
137 return Node.takeError();
138 return (Node->get<Decl>() != nullptr ||
139 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
140 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
141 *Result.Context)
142 : CharSourceRange::getTokenRange(Node->getSourceRange());
143 };
144}
145
147 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
148 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
149 if (!Node)
150 return Node.takeError();
151 return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
152 *Result.Context);
153 };
154}
155
157 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
158 Expected<CharSourceRange> BeginRange = Begin(Result);
159 if (!BeginRange)
160 return BeginRange.takeError();
161 Expected<CharSourceRange> EndRange = End(Result);
162 if (!EndRange)
163 return EndRange.takeError();
164 SourceLocation B = BeginRange->getBegin();
165 SourceLocation E = EndRange->getEnd();
166 // Note: we are precluding the possibility of sub-token ranges in the case
167 // that EndRange is a token range.
168 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
169 return invalidArgumentError("Bad range: out of order");
170 }
171 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
172 };
173}
174
176 std::string EndID) {
177 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
178}
179
181 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
182 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
183 if (!Node)
184 return Node.takeError();
185 if (auto *M = Node->get<clang::MemberExpr>())
187 M->getMemberNameInfo().getSourceRange());
188 return typeError(ID, Node->getNodeKind(), "MemberExpr");
189 };
190}
191
193 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
195 if (!N)
196 return N.takeError();
197 auto &Node = *N;
198 if (const auto *D = Node.get<NamedDecl>()) {
199 if (!D->getDeclName().isIdentifier())
200 return missingPropertyError(ID, "name", "identifier");
201 SourceLocation L = D->getLocation();
202 auto R = CharSourceRange::getTokenRange(L, L);
203 // Verify that the range covers exactly the name.
204 // FIXME: extend this code to support cases like `operator +` or
205 // `foo<int>` for which this range will be too short. Doing so will
206 // require subcasing `NamedDecl`, because it doesn't provide virtual
207 // access to the \c DeclarationNameInfo.
208 StringRef Text = tooling::getText(R, *Result.Context);
209 if (Text != D->getName())
210 return llvm::make_error<StringError>(
211 llvm::errc::not_supported,
212 "range selected by name(node id=" + ID + "): '" + Text +
213 "' is different from decl name '" + D->getName() + "'");
214 return R;
215 }
216 if (const auto *E = Node.get<DeclRefExpr>()) {
217 if (!E->getNameInfo().getName().isIdentifier())
218 return missingPropertyError(ID, "name", "identifier");
219 SourceLocation L = E->getLocation();
221 }
222 if (const auto *I = Node.get<CXXCtorInitializer>()) {
223 if (!I->isMemberInitializer() && I->isWritten())
224 return missingPropertyError(ID, "name", "explicit member initializer");
225 SourceLocation L = I->getMemberLocation();
227 }
228 if (const auto *T = Node.get<TypeLoc>()) {
229 if (auto SpecLoc = T->getAs<TemplateSpecializationTypeLoc>();
230 !SpecLoc.isNull())
231 return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc());
232 return CharSourceRange::getTokenRange(T->getSourceRange());
233 }
234 return typeError(ID, Node.getNodeKind(),
235 "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
236 };
237}
238
239namespace {
240// FIXME: make this available in the public API for users to easily create their
241// own selectors.
242
243// Creates a selector from a range-selection function \p Func, which selects a
244// range that is relative to a bound node id. \c T is the node type expected by
245// \p Func.
246template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
247class RelativeSelector {
248 std::string ID;
249
250public:
251 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
252
253 Expected<CharSourceRange> operator()(const MatchResult &Result) {
254 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
255 if (!N)
256 return N.takeError();
257 if (const auto *Arg = N->get<T>())
258 return Func(Result, *Arg);
259 return typeError(ID, N->getNodeKind());
260 }
261};
262} // namespace
263
264// FIXME: Change the following functions from being in an anonymous namespace
265// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
266// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
267// namespace works around a bug in earlier versions.
268namespace {
269// Returns the range of the statements (all source between the braces).
270CharSourceRange getStatementsRange(const MatchResult &,
271 const CompoundStmt &CS) {
273 CS.getRBracLoc());
274}
275} // namespace
276
278 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
279}
280
281namespace {
282
283SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc,
284 const SourceManager &SM,
285 const LangOptions &LangOpts) {
286 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc();
287 return findPreviousTokenKind(Loc, SM, LangOpts, tok::TokenKind::l_paren);
288}
289
290// Returns the location after the last argument of the construct expr. Returns
291// an invalid location if there are no arguments.
292SourceLocation findLastArgEnd(const CXXConstructExpr &CE,
293 const SourceManager &SM,
294 const LangOptions &LangOpts) {
295 for (int i = CE.getNumArgs() - 1; i >= 0; --i) {
296 const Expr *Arg = CE.getArg(i);
297 if (isa<CXXDefaultArgExpr>(Arg))
298 continue;
299 return Lexer::getLocForEndOfToken(Arg->getEndLoc(), 0, SM, LangOpts);
300 }
301 return {};
302}
303
304// Returns the range of the source between the call's parentheses/braces.
305CharSourceRange getCallArgumentsRange(const MatchResult &Result,
306 const CallExpr &CE) {
307 const SourceLocation RLoc = CE.getRParenLoc();
309 findArgStartDelimiter(CE, RLoc, *Result.SourceManager,
310 Result.Context->getLangOpts())
311 .getLocWithOffset(1),
312 RLoc);
313}
314
315// Returns the range of the source between the construct expr's
316// parentheses/braces.
317CharSourceRange getConstructArgumentsRange(const MatchResult &Result,
318 const CXXConstructExpr &CE) {
319 if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) {
321 Lexer::getLocForEndOfToken(R.getBegin(), 0, *Result.SourceManager,
322 Result.Context->getLangOpts()),
323 R.getEnd());
324 }
325
326 if (CE.getNumArgs() > 0) {
328 CE.getArg(0)->getBeginLoc(),
329 findLastArgEnd(CE, *Result.SourceManager,
330 Result.Context->getLangOpts()));
331 }
332
333 return {};
334}
335
336} // namespace
337
339 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
340}
341
343 return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>(
344 std::move(ID));
345}
346
347namespace {
348// Returns the range of the elements of the initializer list. Includes all
349// source between the braces.
350CharSourceRange getElementsRange(const MatchResult &,
351 const InitListExpr &E) {
353 E.getRBraceLoc());
354}
355} // namespace
356
358 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
359}
360
361namespace {
362// Returns the range of the else branch, including the `else` keyword.
363CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
366 tok::TokenKind::semi, *Result.Context);
367}
368} // namespace
369
371 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
372}
373
375 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
377 if (!SRange)
378 return SRange.takeError();
379 return Result.SourceManager->getExpansionRange(*SRange);
380 };
381}
#define SM(sm)
static Error invalidArgumentError(Twine Message)
static SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
static SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static Error missingPropertyError(StringRef ID, Twine Description, StringRef Property)
static Error typeError(StringRef ID, const ASTNodeKind &Kind)
static Expected< DynTypedNode > getNode(const ast_matchers::BoundNodes &Nodes, StringRef ID)
Defines a combinator library supporting the definition of selectors, which select source ranges based...
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TypeLoc interface and its subclasses.
Kind identifier.
Represents a call to a C++ constructor.
Definition ExprCXX.h:1549
SourceRange getParenOrBraceRange() const
Definition ExprCXX.h:1730
Expr * getArg(unsigned Arg)
Return the specified argument.
Definition ExprCXX.h:1692
unsigned getNumArgs() const
Return the number of arguments to the constructor call.
Definition ExprCXX.h:1689
Represents a C++ base or member initializer.
Definition DeclCXX.h:2369
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2877
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3081
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3068
SourceLocation getRParenLoc() const
Definition Expr.h:3208
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
static CharSourceRange getTokenRange(SourceRange R)
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition Stmt.h:1720
SourceLocation getLBracLoc() const
Definition Stmt.h:1837
SourceLocation getRBracLoc() const
Definition Stmt.h:1838
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
This represents one expression.
Definition Expr.h:112
IfStmt - This represents an if/then/else.
Definition Stmt.h:2239
SourceLocation getElseLoc() const
Definition Stmt.h:2408
SourceLocation getEndLoc() const LLVM_READONLY
Definition Stmt.h:2452
Describes an C or C++ initializer list.
Definition Expr.h:5233
SourceLocation getLBraceLoc() const
Definition Expr.h:5394
SourceLocation getRBraceLoc() const
Definition Expr.h:5396
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
Definition Lexer.cpp:951
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
Definition Lexer.cpp:608
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition Lexer.cpp:509
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition Lexer.cpp:848
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
This represents a decl that may have a name.
Definition Decl.h:274
Smart pointer class that efficiently represents Objective-C method names.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition Stmt.h:85
SourceLocation getEndLoc() const LLVM_READONLY
Definition Stmt.cpp:362
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
Token - This structure provides full information about a lexed token.
Definition Token.h:36
Base wrapper for a particular "section" of type source info.
Definition TypeLoc.h:59
bool isNull() const
Definition TypeLoc.h:121
Maps string IDs to AST nodes matched by parts of a matcher.
const IDToNodeMap & getMap() const
Retrieve mapping from binding identifiers to bound nodes.
A class to allow finding matches over the Clang AST.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
CharSourceRange getExtendedRange(const T &Node, tok::TokenKind Next, ASTContext &Context)
Returns the source range spanning the node, extended to include Next, if it immediately follows Node.
Definition SourceCode.h:34
CharSourceRange maybeExtendRange(CharSourceRange Range, tok::TokenKind Terminator, ASTContext &Context)
Extends Range to include the token Terminator, if it immediately follows the end of the range.
StringRef getText(CharSourceRange Range, const ASTContext &Context)
Returns the source-code text in the specified range.
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token. ID is the node's binding in the match result.
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector constructExprArgs(std::string ID)
MatchConsumer< CharSourceRange > RangeSelector
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always). Useful for selecting expression statements....
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
llvm::Expected< QualType > ExpectedType
Contains all information for a given match.