clang 20.0.0git
FormatTokenSource.h
Go to the documentation of this file.
1//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the \c FormatTokenSource interface, which provides a token
11/// stream as well as the ability to manipulate the token stream.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17
18#include "UnwrappedLineParser.h"
19
20#define DEBUG_TYPE "format-token-source"
21
22namespace clang {
23namespace format {
24
25// Navigate a token stream.
26//
27// Enables traversal of a token stream, resetting the position in a token
28// stream, as well as inserting new tokens.
30public:
31 virtual ~FormatTokenSource() {}
32
33 // Returns the next token in the token stream.
34 virtual FormatToken *getNextToken() = 0;
35
36 // Returns the token preceding the token returned by the last call to
37 // getNextToken() in the token stream, or nullptr if no such token exists.
38 //
39 // Must not be called directly at the position directly after insertTokens()
40 // is called.
42
43 // Returns the token that would be returned by the next call to
44 // getNextToken().
45 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
46
47 // Returns whether we are at the end of the file.
48 // This can be different from whether getNextToken() returned an eof token
49 // when the FormatTokenSource is a view on a part of the token stream.
50 virtual bool isEOF() = 0;
51
52 // Gets the current position in the token stream, to be used by setPosition().
53 //
54 // Note that the value of the position is not meaningful, and specifically
55 // should not be used to get relative token positions.
56 virtual unsigned getPosition() = 0;
57
58 // Resets the token stream to the state it was in when getPosition() returned
59 // Position, and return the token at that position in the stream.
60 virtual FormatToken *setPosition(unsigned Position) = 0;
61
62 // Insert the given tokens before the current position.
63 // Returns the first token in \c Tokens.
64 // The next returned token will be the second token in \c Tokens.
65 // Requires the last token in Tokens to be EOF; once the EOF token is reached,
66 // the next token will be the last token returned by getNextToken();
67 //
68 // For example, given the token sequence 'a1 a2':
69 // getNextToken() -> a1
70 // insertTokens('b1 b2') -> b1
71 // getNextToken() -> b2
72 // getNextToken() -> a1
73 // getNextToken() -> a2
75
76 [[nodiscard]] FormatToken *getNextNonComment() {
77 FormatToken *Tok;
78 do {
79 Tok = getNextToken();
80 assert(Tok);
81 } while (Tok->is(tok::comment));
82 return Tok;
83 }
84};
85
87public:
89 : Tokens(Tokens), Position(-1) {}
90
92 if (Position >= 0 && isEOF()) {
93 LLVM_DEBUG({
94 llvm::dbgs() << "Next ";
95 dbgToken(Position);
96 });
97 return Tokens[Position];
98 }
99 Position = successor(Position);
100 LLVM_DEBUG({
101 llvm::dbgs() << "Next ";
102 dbgToken(Position);
103 });
104 return Tokens[Position];
105 }
106
108 assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
109 return Position > 0 ? Tokens[Position - 1] : nullptr;
110 }
111
112 FormatToken *peekNextToken(bool SkipComment = false) override {
113 if (isEOF())
114 return Tokens[Position];
115 int Next = successor(Position);
116 if (SkipComment)
117 while (Tokens[Next]->is(tok::comment))
118 Next = successor(Next);
119 LLVM_DEBUG({
120 llvm::dbgs() << "Peeking ";
121 dbgToken(Next);
122 });
123 return Tokens[Next];
124 }
125
126 bool isEOF() override {
127 return Position == -1 ? false : Tokens[Position]->is(tok::eof);
128 }
129
130 unsigned getPosition() override {
131 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
132 assert(Position >= 0);
133 return Position;
134 }
135
136 FormatToken *setPosition(unsigned P) override {
137 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
138 Position = P;
139 return Tokens[Position];
140 }
141
143 assert(Position != -1);
144 assert((*New.rbegin())->Tok.is(tok::eof));
145 int Next = Tokens.size();
146 Tokens.append(New.begin(), New.end());
147 LLVM_DEBUG({
148 llvm::dbgs() << "Inserting:\n";
149 for (int I = Next, E = Tokens.size(); I != E; ++I)
150 dbgToken(I, " ");
151 llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
152 << Position << "\n";
153 });
154 Jumps[Tokens.size() - 1] = Position;
155 Position = Next;
156 LLVM_DEBUG({
157 llvm::dbgs() << "At inserted token ";
158 dbgToken(Position);
159 });
160 return Tokens[Position];
161 }
162
163 void reset() { Position = -1; }
164
165private:
166 int successor(int Current) const {
167 int Next = Current + 1;
168 auto it = Jumps.find(Next);
169 if (it != Jumps.end()) {
170 Next = it->second;
171 assert(!Jumps.contains(Next));
172 }
173 return Next;
174 }
175
176 void dbgToken(int Position, StringRef Indent = "") {
177 FormatToken *Tok = Tokens[Position];
178 llvm::dbgs() << Indent << "[" << Position
179 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
180 << ", Macro: " << !!Tok->MacroCtx << "\n";
181 }
182
183 SmallVector<FormatToken *> Tokens;
184 int Position;
185
186 // Maps from position a to position b, so that when we reach a, the token
187 // stream continues at position b instead.
188 llvm::DenseMap<int, int> Jumps;
189};
190
192public:
194 FormatToken *&ResetToken)
195 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
196 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
197 Token(nullptr), PreviousToken(nullptr) {
198 FakeEOF.Tok.startToken();
199 FakeEOF.Tok.setKind(tok::eof);
200 TokenSource = this;
201 Line.Level = 0;
202 Line.InPPDirective = true;
203 // InMacroBody gets set after the `#define x` part.
204 }
205
206 ~ScopedMacroState() override {
207 TokenSource = PreviousTokenSource;
208 ResetToken = Token;
209 Line.InPPDirective = false;
210 Line.InMacroBody = false;
211 Line.Level = PreviousLineLevel;
212 }
213
215 // The \c UnwrappedLineParser guards against this by never calling
216 // \c getNextToken() after it has encountered the first eof token.
217 assert(!eof());
218 PreviousToken = Token;
219 Token = PreviousTokenSource->getNextToken();
220 if (eof())
221 return &FakeEOF;
222 return Token;
223 }
224
226 return PreviousTokenSource->getPreviousToken();
227 }
228
229 FormatToken *peekNextToken(bool SkipComment) override {
230 if (eof())
231 return &FakeEOF;
232 return PreviousTokenSource->peekNextToken(SkipComment);
233 }
234
235 bool isEOF() override { return PreviousTokenSource->isEOF(); }
236
237 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
238
239 FormatToken *setPosition(unsigned Position) override {
240 PreviousToken = nullptr;
241 Token = PreviousTokenSource->setPosition(Position);
242 return Token;
243 }
244
246 llvm_unreachable("Cannot insert tokens while parsing a macro.");
247 return nullptr;
248 }
249
250private:
251 bool eof() {
252 return Token && Token->HasUnescapedNewline &&
253 !continuesLineComment(*Token, PreviousToken,
254 /*MinColumnToken=*/PreviousToken);
255 }
256
257 FormatToken FakeEOF;
258 UnwrappedLine &Line;
259 FormatTokenSource *&TokenSource;
260 FormatToken *&ResetToken;
261 unsigned PreviousLineLevel;
262 FormatTokenSource *PreviousTokenSource;
263
264 FormatToken *Token;
265 FormatToken *PreviousToken;
266};
267
268} // namespace format
269} // namespace clang
270
271#undef DEBUG_TYPE
272
273#endif
StringRef P
Expr * E
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
void setKind(tok::TokenKind K)
Definition: Token.h:95
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens)=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
FormatToken * getPreviousToken() override
IndexedTokenSource(ArrayRef< FormatToken * > Tokens)
FormatToken * getNextToken() override
FormatToken * peekNextToken(bool SkipComment=false) override
FormatToken * setPosition(unsigned P) override
FormatToken * insertTokens(ArrayRef< FormatToken * > New) override
FormatToken * peekNextToken(bool SkipComment) override
FormatToken * getNextToken() override
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, FormatToken *&ResetToken)
FormatToken * setPosition(unsigned Position) override
FormatToken * getPreviousToken() override
FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens) override
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1963
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition: stdbool.h:26
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:292
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:604
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
bool InMacroBody
Whether it is part of a macro body.
unsigned Level
The indent level of the UnwrappedLine.
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.