clang  11.0.0git
Parsing.cpp
Go to the documentation of this file.
1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/AST/Expr.h"
12 #include "clang/Basic/CharInfo.h"
14 #include "clang/Lex/Lexer.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/StringMap.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Errc.h"
21 #include "llvm/Support/Error.h"
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 using namespace clang;
27 using namespace transformer;
28 
29 // FIXME: This implementation is entirely separate from that of the AST
30 // matchers. Given the similarity of the languages and uses of the two parsers,
31 // the two should share a common parsing infrastructure, as should other
32 // Transformer types. We intend to unify this implementation soon to share as
33 // much as possible with the AST Matchers parsing.
34 
35 namespace {
36 using llvm::Error;
37 using llvm::Expected;
38 
39 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
40 
41 struct ParseState {
42  // The remaining input to be processed.
43  StringRef Input;
44  // The original input. Not modified during parsing; only for reference in
45  // error reporting.
46  StringRef OriginalInput;
47 };
48 
49 // Represents an intermediate result returned by a parsing function. Functions
50 // that don't generate values should use `llvm::None`
51 template <typename ResultType> struct ParseProgress {
52  ParseState State;
53  // Intermediate result generated by the Parser.
54  ResultType Value;
55 };
56 
57 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
58 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
59 
60 class ParseError : public llvm::ErrorInfo<ParseError> {
61 public:
62  // Required field for all ErrorInfo derivatives.
63  static char ID;
64 
65  ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
66  : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
67  Excerpt(std::move(InputExcerpt)) {}
68 
69  void log(llvm::raw_ostream &OS) const override {
70  OS << "parse error at position (" << Pos << "): " << ErrorMsg
71  << ": " + Excerpt;
72  }
73 
74  std::error_code convertToErrorCode() const override {
75  return llvm::inconvertibleErrorCode();
76  }
77 
78  // Position of the error in the input string.
79  size_t Pos;
80  std::string ErrorMsg;
81  // Excerpt of the input starting at the error position.
82  std::string Excerpt;
83 };
84 
85 char ParseError::ID;
86 } // namespace
87 
88 static const llvm::StringMap<RangeSelectorOp<std::string>> &
90  static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
91  {"name", name},
92  {"node", node},
93  {"statement", statement},
94  {"statements", statements},
95  {"member", member},
96  {"callArgs", callArgs},
97  {"elseBranch", elseBranch},
98  {"initListElements", initListElements}};
99  return M;
100 }
101 
102 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
104  static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
105  {"before", before}, {"after", after}, {"expansion", expansion}};
106  return M;
107 }
108 
109 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
111  static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
112  {"encloseNodes", range}};
113  return M;
114 }
115 
116 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
118  static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
119  M = {{"enclose", range}};
120  return M;
121 }
122 
123 template <typename Element>
124 llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
125  llvm::StringRef Key) {
126  auto it = Map.find(Key);
127  if (it == Map.end())
128  return llvm::None;
129  return it->second;
130 }
131 
132 template <typename ResultType>
133 ParseProgress<ResultType> makeParseProgress(ParseState State,
134  ResultType Result) {
135  return ParseProgress<ResultType>{State, std::move(Result)};
136 }
137 
138 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
139  size_t Pos = S.OriginalInput.size() - S.Input.size();
140  return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
141  S.OriginalInput.substr(Pos, 20).str());
142 }
143 
144 // Returns a new ParseState that advances \c S by \c N characters.
145 static ParseState advance(ParseState S, size_t N) {
146  S.Input = S.Input.drop_front(N);
147  return S;
148 }
149 
150 static StringRef consumeWhitespace(StringRef S) {
151  return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
152 }
153 
154 // Parses a single expected character \c c from \c State, skipping preceding
155 // whitespace. Error if the expected character isn't found.
156 static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
157  State.Input = consumeWhitespace(State.Input);
158  if (State.Input.empty() || State.Input.front() != c)
159  return makeParseError(State,
160  ("expected char not found: " + llvm::Twine(c)).str());
161  return makeParseProgress(advance(State, 1), llvm::None);
162 }
163 
164 // Parses an identitifer "token" -- handles preceding whitespace.
165 static ExpectedProgress<std::string> parseId(ParseState State) {
166  State.Input = consumeWhitespace(State.Input);
167  auto Id = State.Input.take_while(
168  [](char c) { return c >= 0 && isIdentifierBody(c); });
169  if (Id.empty())
170  return makeParseError(State, "failed to parse name");
171  return makeParseProgress(advance(State, Id.size()), Id.str());
172 }
173 
174 // For consistency with the AST matcher parser and C++ code, node ids are
175 // written as strings. However, we do not support escaping in the string.
176 static ExpectedProgress<std::string> parseStringId(ParseState State) {
177  State.Input = consumeWhitespace(State.Input);
178  if (State.Input.empty())
179  return makeParseError(State, "unexpected end of input");
180  if (!State.Input.consume_front("\""))
181  return makeParseError(
182  State,
183  "expecting string, but encountered other character or end of input");
184 
185  StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
186  if (State.Input.size() == Id.size())
187  return makeParseError(State, "unterminated string");
188  // Advance past the trailing quote as well.
189  return makeParseProgress(advance(State, Id.size() + 1), Id.str());
190 }
191 
192 // Parses a single element surrounded by parens. `Op` is applied to the parsed
193 // result to create the result of this function call.
194 template <typename T>
195 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
196  RangeSelectorOp<T> Op,
197  ParseState State) {
198  auto P = parseChar('(', State);
199  if (!P)
200  return P.takeError();
201 
202  auto E = ParseElement(P->State);
203  if (!E)
204  return E.takeError();
205 
206  P = parseChar(')', E->State);
207  if (!P)
208  return P.takeError();
209 
210  return makeParseProgress(P->State, Op(std::move(E->Value)));
211 }
212 
213 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
214 // is applied to the parsed results to create the result of this function call.
215 template <typename T>
216 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
217  RangeSelectorOp<T, T> Op,
218  ParseState State) {
219  auto P = parseChar('(', State);
220  if (!P)
221  return P.takeError();
222 
223  auto Left = ParseElement(P->State);
224  if (!Left)
225  return Left.takeError();
226 
227  P = parseChar(',', Left->State);
228  if (!P)
229  return P.takeError();
230 
231  auto Right = ParseElement(P->State);
232  if (!Right)
233  return Right.takeError();
234 
235  P = parseChar(')', Right->State);
236  if (!P)
237  return P.takeError();
238 
239  return makeParseProgress(P->State,
240  Op(std::move(Left->Value), std::move(Right->Value)));
241 }
242 
243 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
244 // Id operator). Returns StencilType representing the operator on success and
245 // error if it fails to parse input for an operator.
246 static ExpectedProgress<RangeSelector>
248  auto Id = parseId(State);
249  if (!Id)
250  return Id.takeError();
251 
252  std::string OpName = std::move(Id->Value);
253  if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
254  return parseSingle(parseStringId, *Op, Id->State);
255 
256  if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
257  return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
258 
259  if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
260  return parsePair(parseStringId, *Op, Id->State);
261 
262  if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
263  return parsePair(parseRangeSelectorImpl, *Op, Id->State);
264 
265  return makeParseError(State, "unknown selector name: " + OpName);
266 }
267 
269  ParseState State = {Input, Input};
270  ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
271  if (!Result)
272  return Result.takeError();
273  State = Result->State;
274  // Discard any potentially trailing whitespace.
275  State.Input = consumeWhitespace(State.Input);
276  if (State.Input.empty())
277  return Result->Value;
278  return makeParseError(State, "unexpected input after selector");
279 }
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
ParseProgress< ResultType > makeParseProgress(ParseState State, ResultType Result)
Definition: Parsing.cpp:133
llvm::Expected< T > Expected
StringRef P
RangeSelector range(RangeSelector Begin, RangeSelector End)
DEPRECATED. Use enclose.
Definition: RangeSelector.h:41
static ExpectedProgress< std::string > parseId(ParseState State)
Definition: Parsing.cpp:165
LineState State
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
ExpectedProgress< RangeSelector > parsePair(ParseFunction< T > ParseElement, RangeSelectorOp< T, T > Op, ParseState State)
Definition: Parsing.cpp:216
Definition: Format.h:2679
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;, &#39;\n&#39;, &#39;\r&#39;.
Definition: CharInfo.h:87
static const llvm::StringMap< RangeSelectorOp< std::string > > & getUnaryStringSelectors()
Definition: Parsing.cpp:89
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr or CxxCtorInitializer) selects the name&#39;s to...
ExpectedProgress< RangeSelector > parseSingle(ParseFunction< T > ParseElement, RangeSelectorOp< T > Op, ParseState State)
Definition: Parsing.cpp:195
static const llvm::StringMap< RangeSelectorOp< std::string, std::string > > & getBinaryStringSelectors()
Definition: Parsing.cpp:110
int Id
Definition: ASTDiff.cpp:191
llvm::Expected< RangeSelector > parseRangeSelector(llvm::StringRef Input)
Parses a string representation of a RangeSelector.
Definition: Parsing.cpp:268
static ExpectedProgress< llvm::NoneType > parseChar(char c, ParseState State)
Definition: Parsing.cpp:156
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg)
Definition: Parsing.cpp:138
llvm::Optional< Element > findOptional(const llvm::StringMap< Element > &Map, llvm::StringRef Key)
Definition: Parsing.cpp:124
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
#define log(__x)
Definition: tgmath.h:460
Defines parsing functions for Transformer types.
static ExpectedProgress< std::string > parseStringId(ParseState State)
Definition: Parsing.cpp:176
static ParseState advance(ParseState S, size_t N)
Definition: Parsing.cpp:145
RangeSelector after(RangeSelector Selector)
Selects the the point immediately following Selector.
RangeSelector initListElements(std::string ID)
Dataflow Directional Tag Classes.
static const llvm::StringMap< RangeSelectorOp< RangeSelector > > & getUnaryRangeSelectors()
Definition: Parsing.cpp:103
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
Definition: CharInfo.h:58
Defines a combinator library supporting the definition of selectors, which select source ranges based...
static StringRef consumeWhitespace(StringRef S)
Definition: Parsing.cpp:150
Defines the clang::SourceLocation class and associated facilities.
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion...
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon (for non-expression statements).
static ExpectedProgress< RangeSelector > parseRangeSelectorImpl(ParseState State)
Definition: Parsing.cpp:247
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector statements(std::string ID)
__device__ __2f16 float c
static const llvm::StringMap< RangeSelectorOp< RangeSelector, RangeSelector > > & getBinaryRangeSelectors()
Definition: Parsing.cpp:117
RangeSelector elseBranch(std::string ID)
Given an (bound to ID), selects the range of the else branch, starting from the else keyword...