clang 18.0.0git
FormatTokenSource.h
Go to the documentation of this file.
1//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the \c FormatTokenSource interface, which provides a token
11/// stream as well as the ability to manipulate the token stream.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17
18#include "FormatToken.h"
19#include "UnwrappedLineParser.h"
20#include "llvm/ADT/DenseMap.h"
21#include <cstddef>
22
23#define DEBUG_TYPE "format-token-source"
24
25namespace clang {
26namespace format {
27
28// Navigate a token stream.
29//
30// Enables traversal of a token stream, resetting the position in a token
31// stream, as well as inserting new tokens.
33public:
34 virtual ~FormatTokenSource() {}
35
36 // Returns the next token in the token stream.
37 virtual FormatToken *getNextToken() = 0;
38
39 // Returns the token preceding the token returned by the last call to
40 // getNextToken() in the token stream, or nullptr if no such token exists.
41 //
42 // Must not be called directly at the position directly after insertTokens()
43 // is called.
45
46 // Returns the token that would be returned by the next call to
47 // getNextToken().
48 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
49
50 // Returns whether we are at the end of the file.
51 // This can be different from whether getNextToken() returned an eof token
52 // when the FormatTokenSource is a view on a part of the token stream.
53 virtual bool isEOF() = 0;
54
55 // Gets the current position in the token stream, to be used by setPosition().
56 //
57 // Note that the value of the position is not meaningful, and specifically
58 // should not be used to get relative token positions.
59 virtual unsigned getPosition() = 0;
60
61 // Resets the token stream to the state it was in when getPosition() returned
62 // Position, and return the token at that position in the stream.
63 virtual FormatToken *setPosition(unsigned Position) = 0;
64
65 // Insert the given tokens before the current position.
66 // Returns the first token in \c Tokens.
67 // The next returned token will be the second token in \c Tokens.
68 // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69 // the next token will be the last token returned by getNextToken();
70 //
71 // For example, given the token sequence 'a1 a2':
72 // getNextToken() -> a1
73 // insertTokens('b1 b2') -> b1
74 // getNextToken() -> b2
75 // getNextToken() -> a1
76 // getNextToken() -> a2
78};
79
81public:
83 : Tokens(Tokens), Position(-1) {}
84
86 if (Position >= 0 && isEOF()) {
87 LLVM_DEBUG({
88 llvm::dbgs() << "Next ";
89 dbgToken(Position);
90 });
91 return Tokens[Position];
92 }
93 Position = successor(Position);
94 LLVM_DEBUG({
95 llvm::dbgs() << "Next ";
96 dbgToken(Position);
97 });
98 return Tokens[Position];
99 }
100
102 assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
103 return Position > 0 ? Tokens[Position - 1] : nullptr;
104 }
105
106 FormatToken *peekNextToken(bool SkipComment = false) override {
107 if (isEOF())
108 return Tokens[Position];
109 int Next = successor(Position);
110 if (SkipComment)
111 while (Tokens[Next]->is(tok::comment))
112 Next = successor(Next);
113 LLVM_DEBUG({
114 llvm::dbgs() << "Peeking ";
115 dbgToken(Next);
116 });
117 return Tokens[Next];
118 }
119
120 bool isEOF() override {
121 return Position == -1 ? false : Tokens[Position]->is(tok::eof);
122 }
123
124 unsigned getPosition() override {
125 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
126 assert(Position >= 0);
127 return Position;
128 }
129
130 FormatToken *setPosition(unsigned P) override {
131 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
132 Position = P;
133 return Tokens[Position];
134 }
135
137 assert(Position != -1);
138 assert((*New.rbegin())->Tok.is(tok::eof));
139 int Next = Tokens.size();
140 Tokens.append(New.begin(), New.end());
141 LLVM_DEBUG({
142 llvm::dbgs() << "Inserting:\n";
143 for (int I = Next, E = Tokens.size(); I != E; ++I)
144 dbgToken(I, " ");
145 llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
146 << Position << "\n";
147 });
148 Jumps[Tokens.size() - 1] = Position;
149 Position = Next;
150 LLVM_DEBUG({
151 llvm::dbgs() << "At inserted token ";
152 dbgToken(Position);
153 });
154 return Tokens[Position];
155 }
156
157 void reset() { Position = -1; }
158
159private:
160 int successor(int Current) const {
161 int Next = Current + 1;
162 auto it = Jumps.find(Next);
163 if (it != Jumps.end()) {
164 Next = it->second;
165 assert(!Jumps.contains(Next));
166 }
167 return Next;
168 }
169
170 void dbgToken(int Position, llvm::StringRef Indent = "") {
171 FormatToken *Tok = Tokens[Position];
172 llvm::dbgs() << Indent << "[" << Position
173 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
174 << ", Macro: " << !!Tok->MacroCtx << "\n";
175 }
176
177 SmallVector<FormatToken *> Tokens;
178 int Position;
179
180 // Maps from position a to position b, so that when we reach a, the token
181 // stream continues at position b instead.
182 llvm::DenseMap<int, int> Jumps;
183};
184
186public:
188 FormatToken *&ResetToken)
189 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
190 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
191 Token(nullptr), PreviousToken(nullptr) {
192 FakeEOF.Tok.startToken();
193 FakeEOF.Tok.setKind(tok::eof);
194 TokenSource = this;
195 Line.Level = 0;
196 Line.InPPDirective = true;
197 // InMacroBody gets set after the `#define x` part.
198 }
199
200 ~ScopedMacroState() override {
201 TokenSource = PreviousTokenSource;
202 ResetToken = Token;
203 Line.InPPDirective = false;
204 Line.InMacroBody = false;
205 Line.Level = PreviousLineLevel;
206 }
207
209 // The \c UnwrappedLineParser guards against this by never calling
210 // \c getNextToken() after it has encountered the first eof token.
211 assert(!eof());
212 PreviousToken = Token;
213 Token = PreviousTokenSource->getNextToken();
214 if (eof())
215 return &FakeEOF;
216 return Token;
217 }
218
220 return PreviousTokenSource->getPreviousToken();
221 }
222
223 FormatToken *peekNextToken(bool SkipComment) override {
224 if (eof())
225 return &FakeEOF;
226 return PreviousTokenSource->peekNextToken(SkipComment);
227 }
228
229 bool isEOF() override { return PreviousTokenSource->isEOF(); }
230
231 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
232
233 FormatToken *setPosition(unsigned Position) override {
234 PreviousToken = nullptr;
235 Token = PreviousTokenSource->setPosition(Position);
236 return Token;
237 }
238
240 llvm_unreachable("Cannot insert tokens while parsing a macro.");
241 return nullptr;
242 }
243
244private:
245 bool eof() {
246 return Token && Token->HasUnescapedNewline &&
247 !continuesLineComment(*Token, PreviousToken,
248 /*MinColumnToken=*/PreviousToken);
249 }
250
251 FormatToken FakeEOF;
252 UnwrappedLine &Line;
253 FormatTokenSource *&TokenSource;
254 FormatToken *&ResetToken;
255 unsigned PreviousLineLevel;
256 FormatTokenSource *PreviousTokenSource;
257
258 FormatToken *Token;
259 FormatToken *PreviousToken;
260};
261
262} // namespace format
263} // namespace clang
264
265#undef DEBUG_TYPE
266
267#endif
StringRef P
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:94
void startToken()
Reset all flags to cleared.
Definition: Token.h:176
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens)=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
FormatToken * getPreviousToken() override
IndexedTokenSource(ArrayRef< FormatToken * > Tokens)
FormatToken * getNextToken() override
FormatToken * peekNextToken(bool SkipComment=false) override
FormatToken * setPosition(unsigned P) override
FormatToken * insertTokens(ArrayRef< FormatToken * > New) override
FormatToken * peekNextToken(bool SkipComment) override
FormatToken * getNextToken() override
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, FormatToken *&ResetToken)
FormatToken * setPosition(unsigned Position) override
FormatToken * getPreviousToken() override
FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens) override
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1826
#define false
Definition: stdbool.h:22
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:269
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
bool InMacroBody
Whether it is part of a macro body.
unsigned Level
The indent level of the UnwrappedLine.
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.