clang 22.0.0git
Parsing.cpp
Go to the documentation of this file.
1//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/ADT/StringMap.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/Error.h"
15#include <optional>
16#include <string>
17#include <utility>
18
19using namespace clang;
20using namespace transformer;
21
22// FIXME: This implementation is entirely separate from that of the AST
23// matchers. Given the similarity of the languages and uses of the two parsers,
24// the two should share a common parsing infrastructure, as should other
25// Transformer types. We intend to unify this implementation soon to share as
26// much as possible with the AST Matchers parsing.
27
28namespace {
29using llvm::Expected;
30
31template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
32
33struct ParseState {
34 // The remaining input to be processed.
35 StringRef Input;
36 // The original input. Not modified during parsing; only for reference in
37 // error reporting.
38 StringRef OriginalInput;
39};
40
41// Represents an intermediate result returned by a parsing function. Functions
42// that don't generate values should use `std::nullopt`
43template <typename ResultType> struct ParseProgress {
44 ParseState State;
45 // Intermediate result generated by the Parser.
46 ResultType Value;
47};
48
49template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
50template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
51
52class ParseError : public llvm::ErrorInfo<ParseError> {
53public:
54 // Required field for all ErrorInfo derivatives.
55 static char ID;
56
57 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
58 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
59 Excerpt(std::move(InputExcerpt)) {}
60
61 void log(llvm::raw_ostream &OS) const override {
62 OS << "parse error at position (" << Pos << "): " << ErrorMsg
63 << ": " + Excerpt;
64 }
65
66 std::error_code convertToErrorCode() const override {
67 return llvm::inconvertibleErrorCode();
68 }
69
70 // Position of the error in the input string.
71 size_t Pos;
72 std::string ErrorMsg;
73 // Excerpt of the input starting at the error position.
74 std::string Excerpt;
75};
76
77char ParseError::ID;
78} // namespace
79
80static const llvm::StringMap<RangeSelectorOp<std::string>> &
82 static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
83 {"name", name},
84 {"node", node},
85 {"statement", statement},
86 {"statements", statements},
87 {"member", member},
88 {"callArgs", callArgs},
89 {"elseBranch", elseBranch},
90 {"initListElements", initListElements}};
91 return M;
92}
93
94static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
96 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
97 {"before", before}, {"after", after}, {"expansion", expansion}};
98 return M;
99}
100
101static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
103 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
104 {"encloseNodes", encloseNodes}};
105 return M;
106}
107
108static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
110 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
111 M = {{"enclose", enclose}, {"between", between}, {"merge", merge}};
112 return M;
113}
114
115template <typename Element>
116std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
117 llvm::StringRef Key) {
118 auto it = Map.find(Key);
119 if (it == Map.end())
120 return std::nullopt;
121 return it->second;
122}
123
124template <typename ResultType>
125ParseProgress<ResultType> makeParseProgress(ParseState State,
126 ResultType Result) {
127 return ParseProgress<ResultType>{State, std::move(Result)};
128}
129
130static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
131 size_t Pos = S.OriginalInput.size() - S.Input.size();
132 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
133 S.OriginalInput.substr(Pos, 20).str());
134}
135
136// Returns a new ParseState that advances \c S by \c N characters.
137static ParseState advance(ParseState S, size_t N) {
138 S.Input = S.Input.drop_front(N);
139 return S;
140}
141
142static StringRef consumeWhitespace(StringRef S) {
143 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
144}
145
146// Parses a single expected character \c c from \c State, skipping preceding
147// whitespace. Error if the expected character isn't found.
148static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
149 State.Input = consumeWhitespace(State.Input);
150 if (State.Input.empty() || State.Input.front() != c)
151 return makeParseError(State,
152 ("expected char not found: " + llvm::Twine(c)).str());
153 return makeParseProgress(advance(State, 1), std::nullopt);
154}
155
156// Parses an identitifer "token" -- handles preceding whitespace.
157static ExpectedProgress<std::string> parseId(ParseState State) {
158 State.Input = consumeWhitespace(State.Input);
159 auto Id = State.Input.take_while(
160 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
161 if (Id.empty())
162 return makeParseError(State, "failed to parse name");
163 return makeParseProgress(advance(State, Id.size()), Id.str());
164}
165
166// For consistency with the AST matcher parser and C++ code, node ids are
167// written as strings. However, we do not support escaping in the string.
168static ExpectedProgress<std::string> parseStringId(ParseState State) {
169 State.Input = consumeWhitespace(State.Input);
170 if (State.Input.empty())
171 return makeParseError(State, "unexpected end of input");
172 if (!State.Input.consume_front("\""))
173 return makeParseError(
174 State,
175 "expecting string, but encountered other character or end of input");
176
177 StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
178 if (State.Input.size() == Id.size())
179 return makeParseError(State, "unterminated string");
180 // Advance past the trailing quote as well.
181 return makeParseProgress(advance(State, Id.size() + 1), Id.str());
182}
183
184// Parses a single element surrounded by parens. `Op` is applied to the parsed
185// result to create the result of this function call.
186template <typename T>
187ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
188 RangeSelectorOp<T> Op,
189 ParseState State) {
190 auto P = parseChar('(', State);
191 if (!P)
192 return P.takeError();
193
194 auto E = ParseElement(P->State);
195 if (!E)
196 return E.takeError();
197
198 P = parseChar(')', E->State);
199 if (!P)
200 return P.takeError();
201
202 return makeParseProgress(P->State, Op(std::move(E->Value)));
203}
204
205// Parses a pair of elements surrounded by parens and separated by comma. `Op`
206// is applied to the parsed results to create the result of this function call.
207template <typename T>
208ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
209 RangeSelectorOp<T, T> Op,
210 ParseState State) {
211 auto P = parseChar('(', State);
212 if (!P)
213 return P.takeError();
214
215 auto Left = ParseElement(P->State);
216 if (!Left)
217 return Left.takeError();
218
219 P = parseChar(',', Left->State);
220 if (!P)
221 return P.takeError();
222
223 auto Right = ParseElement(P->State);
224 if (!Right)
225 return Right.takeError();
226
227 P = parseChar(')', Right->State);
228 if (!P)
229 return P.takeError();
230
231 return makeParseProgress(P->State,
232 Op(std::move(Left->Value), std::move(Right->Value)));
233}
234
235// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
236// Id operator). Returns StencilType representing the operator on success and
237// error if it fails to parse input for an operator.
238static ExpectedProgress<RangeSelector>
239parseRangeSelectorImpl(ParseState State) {
240 auto Id = parseId(State);
241 if (!Id)
242 return Id.takeError();
243
244 std::string OpName = std::move(Id->Value);
245 if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
246 return parseSingle(parseStringId, *Op, Id->State);
247
248 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
249 return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
250
251 if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
252 return parsePair(parseStringId, *Op, Id->State);
253
254 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
255 return parsePair(parseRangeSelectorImpl, *Op, Id->State);
256
257 return makeParseError(State, "unknown selector name: " + OpName);
258}
259
261 ParseState State = {Input, Input};
262 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
263 if (!Result)
264 return Result.takeError();
265 State = Result->State;
266 // Discard any potentially trailing whitespace.
267 State.Input = consumeWhitespace(State.Input);
268 if (State.Input.empty())
269 return Result->Value;
270 return makeParseError(State, "unexpected input after selector");
271}
static ExpectedProgress< std::string > parseStringId(ParseState State)
Definition Parsing.cpp:168
static ExpectedProgress< std::nullopt_t > parseChar(char c, ParseState State)
Definition Parsing.cpp:148
ParseProgress< ResultType > makeParseProgress(ParseState State, ResultType Result)
Definition Parsing.cpp:125
static StringRef consumeWhitespace(StringRef S)
Definition Parsing.cpp:142
ExpectedProgress< RangeSelector > parseSingle(ParseFunction< T > ParseElement, RangeSelectorOp< T > Op, ParseState State)
Definition Parsing.cpp:187
ExpectedProgress< RangeSelector > parsePair(ParseFunction< T > ParseElement, RangeSelectorOp< T, T > Op, ParseState State)
Definition Parsing.cpp:208
std::optional< Element > findOptional(const llvm::StringMap< Element > &Map, llvm::StringRef Key)
Definition Parsing.cpp:116
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg)
Definition Parsing.cpp:130
static const llvm::StringMap< RangeSelectorOp< RangeSelector > > & getUnaryRangeSelectors()
Definition Parsing.cpp:95
static ExpectedProgress< RangeSelector > parseRangeSelectorImpl(ParseState State)
Definition Parsing.cpp:239
static ExpectedProgress< std::string > parseId(ParseState State)
Definition Parsing.cpp:157
static ParseState advance(ParseState S, size_t N)
Definition Parsing.cpp:137
static const llvm::StringMap< RangeSelectorOp< RangeSelector, RangeSelector > > & getBinaryRangeSelectors()
Definition Parsing.cpp:109
static const llvm::StringMap< RangeSelectorOp< std::string, std::string > > & getBinaryStringSelectors()
Definition Parsing.cpp:102
static const llvm::StringMap< RangeSelectorOp< std::string > > & getUnaryStringSelectors()
Definition Parsing.cpp:81
Defines parsing functions for Transformer types.
Defines a combinator library supporting the definition of selectors, which select source ranges based...
__device__ __2f16 float c
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector merge(RangeSelector First, RangeSelector Second)
Selects the merge of the two ranges, i.e.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token. ID is the node's binding in the match result.
RangeSelector between(RangeSelector R1, RangeSelector R2)
Selects the range between R1 and `R2.
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
MatchConsumer< CharSourceRange > RangeSelector
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
llvm::Expected< RangeSelector > parseRangeSelector(llvm::StringRef Input)
Parses a string representation of a RangeSelector.
Definition Parsing.cpp:260
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always). Useful for selecting expression statements....
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
Definition CharInfo.h:41
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
Definition CharInfo.h:61
@ Result
The result type of a method or function.
Definition TypeBase.h:905
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition CharInfo.h:108
#define log(__x)
Definition tgmath.h:460