clang  10.0.0svn
RangeSelector.cpp
Go to the documentation of this file.
1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/AST/Expr.h"
13 #include "clang/Lex/Lexer.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/Errc.h"
17 #include "llvm/Support/Error.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 using namespace clang;
23 using namespace transformer;
24 
28 using llvm::Error;
29 using llvm::StringError;
30 
32 
33 static Error invalidArgumentError(Twine Message) {
34  return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
35 }
36 
37 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
38  return invalidArgumentError("mismatched type (node id=" + ID +
39  " kind=" + Kind.asStringRef() + ")");
40 }
41 
42 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
43  Twine ExpectedType) {
44  return invalidArgumentError("mismatched type: expected one of " +
45  ExpectedType + " (node id=" + ID +
46  " kind=" + Kind.asStringRef() + ")");
47 }
48 
49 static Error missingPropertyError(StringRef ID, Twine Description,
50  StringRef Property) {
51  return invalidArgumentError(Description + " requires property '" + Property +
52  "' (node id=" + ID + ")");
53 }
54 
56  StringRef ID) {
57  auto &NodesMap = Nodes.getMap();
58  auto It = NodesMap.find(ID);
59  if (It == NodesMap.end())
60  return invalidArgumentError("ID not bound: " + ID);
61  return It->second;
62 }
63 
64 // FIXME: handling of macros should be configurable.
66  const SourceManager &SM,
67  const LangOptions &LangOpts) {
68  if (Start.isInvalid() || Start.isMacroID())
69  return SourceLocation();
70 
71  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
72  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
73  return SourceLocation();
74 
75  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
76 }
77 
78 // Finds the start location of the previous token of kind \p TK.
79 // FIXME: handling of macros should be configurable.
81  const SourceManager &SM,
82  const LangOptions &LangOpts,
83  tok::TokenKind TK) {
84  while (true) {
85  SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
86  if (L.isInvalid() || L.isMacroID())
87  return SourceLocation();
88 
89  Token T;
90  if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
91  return SourceLocation();
92 
93  if (T.is(TK))
94  return T.getLocation();
95 
96  Start = L;
97  }
98 }
99 
101  const LangOptions &LangOpts) {
102  SourceLocation EndLoc =
103  E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
104  return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
105 }
106 
108  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
109  Expected<CharSourceRange> SelectedRange = Selector(Result);
110  if (!SelectedRange)
111  return SelectedRange.takeError();
112  return CharSourceRange::getCharRange(SelectedRange->getBegin());
113  };
114 }
115 
117  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
118  Expected<CharSourceRange> SelectedRange = Selector(Result);
119  if (!SelectedRange)
120  return SelectedRange.takeError();
121  if (SelectedRange->isCharRange())
122  return CharSourceRange::getCharRange(SelectedRange->getEnd());
124  SelectedRange->getEnd(), 0, Result.Context->getSourceManager(),
125  Result.Context->getLangOpts()));
126  };
127 }
128 
130  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
131  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
132  if (!Node)
133  return Node.takeError();
134  return Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr
135  ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
136  *Result.Context)
137  : CharSourceRange::getTokenRange(Node->getSourceRange());
138  };
139 }
140 
142  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144  if (!Node)
145  return Node.takeError();
146  return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
147  *Result.Context);
148  };
149 }
150 
152  return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
153  Expected<CharSourceRange> BeginRange = Begin(Result);
154  if (!BeginRange)
155  return BeginRange.takeError();
156  Expected<CharSourceRange> EndRange = End(Result);
157  if (!EndRange)
158  return EndRange.takeError();
159  SourceLocation B = BeginRange->getBegin();
160  SourceLocation E = EndRange->getEnd();
161  // Note: we are precluding the possibility of sub-token ranges in the case
162  // that EndRange is a token range.
163  if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
164  return invalidArgumentError("Bad range: out of order");
165  }
166  return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
167  };
168 }
169 
170 RangeSelector transformer::range(std::string BeginID, std::string EndID) {
171  return transformer::range(node(std::move(BeginID)), node(std::move(EndID)));
172 }
173 
175  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
176  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
177  if (!Node)
178  return Node.takeError();
179  if (auto *M = Node->get<clang::MemberExpr>())
181  M->getMemberNameInfo().getSourceRange());
182  return typeError(ID, Node->getNodeKind(), "MemberExpr");
183  };
184 }
185 
187  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
188  Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
189  if (!N)
190  return N.takeError();
191  auto &Node = *N;
192  if (const auto *D = Node.get<NamedDecl>()) {
193  if (!D->getDeclName().isIdentifier())
194  return missingPropertyError(ID, "name", "identifier");
195  SourceLocation L = D->getLocation();
196  auto R = CharSourceRange::getTokenRange(L, L);
197  // Verify that the range covers exactly the name.
198  // FIXME: extend this code to support cases like `operator +` or
199  // `foo<int>` for which this range will be too short. Doing so will
200  // require subcasing `NamedDecl`, because it doesn't provide virtual
201  // access to the \c DeclarationNameInfo.
202  if (tooling::getText(R, *Result.Context) != D->getName())
203  return CharSourceRange();
204  return R;
205  }
206  if (const auto *E = Node.get<DeclRefExpr>()) {
207  if (!E->getNameInfo().getName().isIdentifier())
208  return missingPropertyError(ID, "name", "identifier");
209  SourceLocation L = E->getLocation();
210  return CharSourceRange::getTokenRange(L, L);
211  }
212  if (const auto *I = Node.get<CXXCtorInitializer>()) {
213  if (!I->isMemberInitializer() && I->isWritten())
214  return missingPropertyError(ID, "name", "explicit member initializer");
215  SourceLocation L = I->getMemberLocation();
216  return CharSourceRange::getTokenRange(L, L);
217  }
218  return typeError(ID, Node.getNodeKind(),
219  "DeclRefExpr, NamedDecl, CXXCtorInitializer");
220  };
221 }
222 
223 namespace {
224 // FIXME: make this available in the public API for users to easily create their
225 // own selectors.
226 
227 // Creates a selector from a range-selection function \p Func, which selects a
228 // range that is relative to a bound node id. \c T is the node type expected by
229 // \p Func.
230 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
231 class RelativeSelector {
232  std::string ID;
233 
234 public:
235  RelativeSelector(std::string ID) : ID(std::move(ID)) {}
236 
237  Expected<CharSourceRange> operator()(const MatchResult &Result) {
238  Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
239  if (!N)
240  return N.takeError();
241  if (const auto *Arg = N->get<T>())
242  return Func(Result, *Arg);
243  return typeError(ID, N->getNodeKind());
244  }
245 };
246 } // namespace
247 
248 // FIXME: Change the following functions from being in an anonymous namespace
249 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
250 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
251 // namespace works around a bug in earlier versions.
252 namespace {
253 // Returns the range of the statements (all source between the braces).
254 CharSourceRange getStatementsRange(const MatchResult &,
255  const CompoundStmt &CS) {
257  CS.getRBracLoc());
258 }
259 } // namespace
260 
262  return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
263 }
264 
265 namespace {
266 // Returns the range of the source between the call's parentheses.
267 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
268  const CallExpr &CE) {
270  findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
271  .getLocWithOffset(1),
272  CE.getRParenLoc());
273 }
274 } // namespace
275 
277  return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
278 }
279 
280 namespace {
281 // Returns the range of the elements of the initializer list. Includes all
282 // source between the braces.
283 CharSourceRange getElementsRange(const MatchResult &,
284  const InitListExpr &E) {
286  E.getRBraceLoc());
287 }
288 } // namespace
289 
291  return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
292 }
293 
294 namespace {
295 // Returns the range of the else branch, including the `else` keyword.
296 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
299  tok::TokenKind::semi, *Result.Context);
300 }
301 } // namespace
302 
304  return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
305 }
306 
308  return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
309  Expected<CharSourceRange> SRange = S(Result);
310  if (!SRange)
311  return SRange.takeError();
312  return Result.SourceManager->getExpansionRange(*SRange);
313  };
314 }
A class to allow finding matches over the Clang AST.
SourceLocation getRBracLoc() const
Definition: Stmt.h:1428
SourceLocation getRParenLoc() const
Definition: Expr.h:2764
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
Smart pointer class that efficiently represents Objective-C method names.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2677
Stmt - This represents one statement.
Definition: Stmt.h:66
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2664
StringRef asStringRef() const
String representation of the kind.
IfStmt - This represents an if/then/else.
Definition: Stmt.h:1822
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:97
static CharSourceRange getTokenRange(SourceRange R)
const T * get() const
Retrieve the stored node as type T.
StringRef getText(CharSourceRange Range, const ASTContext &Context)
Returns the source-code text in the specified range.
Definition: SourceCode.cpp:17
static Error missingPropertyError(StringRef ID, Twine Description, StringRef Property)
RangeSelector range(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
MatchFinder::MatchResult MatchResult
static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM, const LangOptions &LangOpts)
BoundNodesTreeBuilder Nodes
RangeSelector callArgs(std::string ID)
const IDToNodeMap & getMap() const
Retrieve mapping from binding identifiers to bound nodes.
Definition: ASTMatchers.h:120
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:274
SourceLocation getRBraceLoc() const
Definition: Expr.h:4524
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
Describes an C or C++ initializer list.
Definition: Expr.h:4375
SourceLocation getLBracLoc() const
Definition: Stmt.h:1427
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition: Lexer.cpp:455
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr or CxxCtorInitializer) selects the name&#39;s to...
llvm::Error Error
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1320
SourceLocation getEndLoc() const LLVM_READONLY
Definition: Stmt.h:2001
This represents one expression.
Definition: Expr.h:108
SourceLocation getElseLoc() const
Definition: Stmt.h:1984
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition: Lexer.cpp:778
SourceLocation End
Represents a character-granular source range.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:126
SourceLocation Begin
static SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Contains all information for a given match.
static Error invalidArgumentError(Twine Message)
const SourceManager & SM
Definition: Format.cpp:1667
static CharSourceRange getCharRange(SourceRange R)
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
static Error typeError(StringRef ID, const ASTNodeKind &Kind)
Maps string IDs to AST nodes matched by parts of a matcher.
Definition: ASTMatchers.h:103
Kind
Encodes a location in the source.
CharSourceRange maybeExtendRange(CharSourceRange Range, tok::TokenKind Next, ASTContext &Context)
Extends Range to include the token Next, if it immediately follows the end of the range...
Definition: SourceCode.cpp:23
clang::ASTContext *const Context
Utilities for interpreting the matched AST structures.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
Definition: Lexer.cpp:554
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
CharSourceRange getExtendedRange(const T &Node, tok::TokenKind Next, ASTContext &Context)
Returns the source range spanning the node, extended to include Next, if it immediately follows Node...
Definition: SourceCode.h:32
ast_type_traits::DynTypedNode DynTypedNode
RangeSelector after(RangeSelector Selector)
Selects the the point immediately following Selector.
const BoundNodes Nodes
Contains the nodes bound on the current match.
ast_type_traits::DynTypedNode Node
RangeSelector initListElements(std::string ID)
Dataflow Directional Tag Classes.
SourceLocation getLBraceLoc() const
Definition: Expr.h:4522
bool isMacroID() const
Represents a C++ base or member initializer.
Definition: DeclCXX.h:2137
static Expected< DynTypedNode > getNode(const ast_matchers::BoundNodes &Nodes, StringRef ID)
Defines a combinator library supporting the definition of selectors, which select source ranges based...
static SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:2811
Defines the clang::SourceLocation class and associated facilities.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2521
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion...
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1146
clang::SourceManager *const SourceManager
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon (for non-expression statements).
A trivial tuple used to represent a source range.
This represents a decl that may have a name.
Definition: Decl.h:248
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector statements(std::string ID)
const LangOptions & getLangOpts() const
Definition: ASTContext.h:723
This class handles loading and caching of source files into memory.
RangeSelector elseBranch(std::string ID)
Given an (bound to ID), selects the range of the else branch, starting from the else keyword...