clang 20.0.0git
Parsing.cpp
Go to the documentation of this file.
1//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/Expr.h"
14#include "clang/Lex/Lexer.h"
17#include "llvm/ADT/StringMap.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Support/Errc.h"
20#include "llvm/Support/Error.h"
21#include <optional>
22#include <string>
23#include <utility>
24#include <vector>
25
26using namespace clang;
27using namespace transformer;
28
29// FIXME: This implementation is entirely separate from that of the AST
30// matchers. Given the similarity of the languages and uses of the two parsers,
31// the two should share a common parsing infrastructure, as should other
32// Transformer types. We intend to unify this implementation soon to share as
33// much as possible with the AST Matchers parsing.
34
35namespace {
36using llvm::Expected;
37
38template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
39
40struct ParseState {
41 // The remaining input to be processed.
42 StringRef Input;
43 // The original input. Not modified during parsing; only for reference in
44 // error reporting.
45 StringRef OriginalInput;
46};
47
48// Represents an intermediate result returned by a parsing function. Functions
49// that don't generate values should use `std::nullopt`
50template <typename ResultType> struct ParseProgress {
51 ParseState State;
52 // Intermediate result generated by the Parser.
53 ResultType Value;
54};
55
56template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
57template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
58
59class ParseError : public llvm::ErrorInfo<ParseError> {
60public:
61 // Required field for all ErrorInfo derivatives.
62 static char ID;
63
64 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
65 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
66 Excerpt(std::move(InputExcerpt)) {}
67
68 void log(llvm::raw_ostream &OS) const override {
69 OS << "parse error at position (" << Pos << "): " << ErrorMsg
70 << ": " + Excerpt;
71 }
72
73 std::error_code convertToErrorCode() const override {
74 return llvm::inconvertibleErrorCode();
75 }
76
77 // Position of the error in the input string.
78 size_t Pos;
79 std::string ErrorMsg;
80 // Excerpt of the input starting at the error position.
81 std::string Excerpt;
82};
83
84char ParseError::ID;
85} // namespace
86
87static const llvm::StringMap<RangeSelectorOp<std::string>> &
89 static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
90 {"name", name},
91 {"node", node},
92 {"statement", statement},
93 {"statements", statements},
94 {"member", member},
95 {"callArgs", callArgs},
96 {"elseBranch", elseBranch},
97 {"initListElements", initListElements}};
98 return M;
99}
100
101static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
103 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
104 {"before", before}, {"after", after}, {"expansion", expansion}};
105 return M;
106}
107
108static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
110 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
111 {"encloseNodes", encloseNodes}};
112 return M;
113}
114
115static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
117 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
118 M = {{"enclose", enclose}, {"between", between}};
119 return M;
120}
121
122template <typename Element>
123std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
124 llvm::StringRef Key) {
125 auto it = Map.find(Key);
126 if (it == Map.end())
127 return std::nullopt;
128 return it->second;
129}
130
131template <typename ResultType>
132ParseProgress<ResultType> makeParseProgress(ParseState State,
133 ResultType Result) {
134 return ParseProgress<ResultType>{State, std::move(Result)};
135}
136
137static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
138 size_t Pos = S.OriginalInput.size() - S.Input.size();
139 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
140 S.OriginalInput.substr(Pos, 20).str());
141}
142
143// Returns a new ParseState that advances \c S by \c N characters.
144static ParseState advance(ParseState S, size_t N) {
145 S.Input = S.Input.drop_front(N);
146 return S;
147}
148
149static StringRef consumeWhitespace(StringRef S) {
150 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
151}
152
153// Parses a single expected character \c c from \c State, skipping preceding
154// whitespace. Error if the expected character isn't found.
155static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
156 State.Input = consumeWhitespace(State.Input);
157 if (State.Input.empty() || State.Input.front() != c)
158 return makeParseError(State,
159 ("expected char not found: " + llvm::Twine(c)).str());
160 return makeParseProgress(advance(State, 1), std::nullopt);
161}
162
163// Parses an identitifer "token" -- handles preceding whitespace.
164static ExpectedProgress<std::string> parseId(ParseState State) {
165 State.Input = consumeWhitespace(State.Input);
166 auto Id = State.Input.take_while(
167 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
168 if (Id.empty())
169 return makeParseError(State, "failed to parse name");
170 return makeParseProgress(advance(State, Id.size()), Id.str());
171}
172
173// For consistency with the AST matcher parser and C++ code, node ids are
174// written as strings. However, we do not support escaping in the string.
175static ExpectedProgress<std::string> parseStringId(ParseState State) {
176 State.Input = consumeWhitespace(State.Input);
177 if (State.Input.empty())
178 return makeParseError(State, "unexpected end of input");
179 if (!State.Input.consume_front("\""))
180 return makeParseError(
181 State,
182 "expecting string, but encountered other character or end of input");
183
184 StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
185 if (State.Input.size() == Id.size())
186 return makeParseError(State, "unterminated string");
187 // Advance past the trailing quote as well.
188 return makeParseProgress(advance(State, Id.size() + 1), Id.str());
189}
190
191// Parses a single element surrounded by parens. `Op` is applied to the parsed
192// result to create the result of this function call.
193template <typename T>
194ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
195 RangeSelectorOp<T> Op,
196 ParseState State) {
197 auto P = parseChar('(', State);
198 if (!P)
199 return P.takeError();
200
201 auto E = ParseElement(P->State);
202 if (!E)
203 return E.takeError();
204
205 P = parseChar(')', E->State);
206 if (!P)
207 return P.takeError();
208
209 return makeParseProgress(P->State, Op(std::move(E->Value)));
210}
211
212// Parses a pair of elements surrounded by parens and separated by comma. `Op`
213// is applied to the parsed results to create the result of this function call.
214template <typename T>
215ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
216 RangeSelectorOp<T, T> Op,
217 ParseState State) {
218 auto P = parseChar('(', State);
219 if (!P)
220 return P.takeError();
221
222 auto Left = ParseElement(P->State);
223 if (!Left)
224 return Left.takeError();
225
226 P = parseChar(',', Left->State);
227 if (!P)
228 return P.takeError();
229
230 auto Right = ParseElement(P->State);
231 if (!Right)
232 return Right.takeError();
233
234 P = parseChar(')', Right->State);
235 if (!P)
236 return P.takeError();
237
238 return makeParseProgress(P->State,
239 Op(std::move(Left->Value), std::move(Right->Value)));
240}
241
242// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
243// Id operator). Returns StencilType representing the operator on success and
244// error if it fails to parse input for an operator.
245static ExpectedProgress<RangeSelector>
246parseRangeSelectorImpl(ParseState State) {
247 auto Id = parseId(State);
248 if (!Id)
249 return Id.takeError();
250
251 std::string OpName = std::move(Id->Value);
252 if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
253 return parseSingle(parseStringId, *Op, Id->State);
254
255 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
256 return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
257
258 if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
259 return parsePair(parseStringId, *Op, Id->State);
260
261 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
262 return parsePair(parseRangeSelectorImpl, *Op, Id->State);
263
264 return makeParseError(State, "unknown selector name: " + OpName);
265}
266
268 ParseState State = {Input, Input};
269 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
270 if (!Result)
271 return Result.takeError();
272 State = Result->State;
273 // Discard any potentially trailing whitespace.
274 State.Input = consumeWhitespace(State.Input);
275 if (State.Input.empty())
276 return Result->Value;
277 return makeParseError(State, "unexpected input after selector");
278}
StringRef P
Expr * E
static ExpectedProgress< std::string > parseStringId(ParseState State)
Definition: Parsing.cpp:175
static ExpectedProgress< std::nullopt_t > parseChar(char c, ParseState State)
Definition: Parsing.cpp:155
ParseProgress< ResultType > makeParseProgress(ParseState State, ResultType Result)
Definition: Parsing.cpp:132
static StringRef consumeWhitespace(StringRef S)
Definition: Parsing.cpp:149
ExpectedProgress< RangeSelector > parseSingle(ParseFunction< T > ParseElement, RangeSelectorOp< T > Op, ParseState State)
Definition: Parsing.cpp:194
ExpectedProgress< RangeSelector > parsePair(ParseFunction< T > ParseElement, RangeSelectorOp< T, T > Op, ParseState State)
Definition: Parsing.cpp:215
std::optional< Element > findOptional(const llvm::StringMap< Element > &Map, llvm::StringRef Key)
Definition: Parsing.cpp:123
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg)
Definition: Parsing.cpp:137
static const llvm::StringMap< RangeSelectorOp< RangeSelector > > & getUnaryRangeSelectors()
Definition: Parsing.cpp:102
static ExpectedProgress< RangeSelector > parseRangeSelectorImpl(ParseState State)
Definition: Parsing.cpp:246
static ExpectedProgress< std::string > parseId(ParseState State)
Definition: Parsing.cpp:164
static ParseState advance(ParseState S, size_t N)
Definition: Parsing.cpp:144
static const llvm::StringMap< RangeSelectorOp< RangeSelector, RangeSelector > > & getBinaryRangeSelectors()
Definition: Parsing.cpp:116
static const llvm::StringMap< RangeSelectorOp< std::string, std::string > > & getBinaryStringSelectors()
Definition: Parsing.cpp:109
static const llvm::StringMap< RangeSelectorOp< std::string > > & getUnaryStringSelectors()
Definition: Parsing.cpp:88
Defines parsing functions for Transformer types.
Defines a combinator library supporting the definition of selectors, which select source ranges based...
uint32_t Id
Definition: SemaARM.cpp:1144
Defines the clang::SourceLocation class and associated facilities.
__device__ __2f16 float c
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
RangeSelector between(RangeSelector R1, RangeSelector R2)
Selects the range between R1 and `R2.
Definition: RangeSelector.h:60
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
llvm::Expected< RangeSelector > parseRangeSelector(llvm::StringRef Input)
Parses a string representation of a RangeSelector.
Definition: Parsing.cpp:267
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
Definition: CharInfo.h:41
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
Definition: CharInfo.h:61
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition: CharInfo.h:108
#define log(__x)
Definition: tgmath.h:460