clang 19.0.0git
Token.h
Go to the documentation of this file.
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the Token interface.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CLANG_LEX_TOKEN_H
14#define LLVM_CLANG_LEX_TOKEN_H
15
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/StringRef.h"
20#include <cassert>
21
22namespace clang {
23
24class IdentifierInfo;
25class LangOptions;
26
27/// Token - This structure provides full information about a lexed token.
28/// It is not intended to be space efficient, it is intended to return as much
29/// information as possible about each returned token. This is expected to be
30/// compressed into a smaller form if memory footprint is important.
31///
32/// The parser can create a special "annotation token" representing a stream of
33/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
34/// can be represented by a single typename annotation token that carries
35/// information about the SourceRange of the tokens and the type object.
36class Token {
37 /// The location of the token. This is actually a SourceLocation.
39
40 // Conceptually these next two fields could be in a union. However, this
41 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
42 // routine. Keeping as separate members with casts until a more beautiful fix
43 // presents itself.
44
45 /// UintData - This holds either the length of the token text, when
46 /// a normal token, or the end of the SourceRange when an annotation
47 /// token.
49
50 /// PtrData - This is a union of four different pointer types, which depends
51 /// on what type of token this is:
52 /// Identifiers, keywords, etc:
53 /// This is an IdentifierInfo*, which contains the uniqued identifier
54 /// spelling.
55 /// Literals: isLiteral() returns true.
56 /// This is a pointer to the start of the token in a text buffer, which
57 /// may be dirty (have trigraphs / escaped newlines).
58 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
59 /// This is a pointer to sema-specific data for the annotation token.
60 /// Eof:
61 /// This is a pointer to a Decl.
62 /// Other:
63 /// This is null.
64 void *PtrData;
65
66 /// Kind - The actual flavor of token this is.
67 tok::TokenKind Kind;
68
69 /// Flags - Bits we track about this token, members of the TokenFlags enum.
70 unsigned short Flags;
71
72public:
73 // Various flags set per token:
75 StartOfLine = 0x01, // At start of line or only after whitespace
76 // (considering the line after macro expansion).
77 LeadingSpace = 0x02, // Whitespace exists before this token (considering
78 // whitespace after macro expansion).
79 DisableExpand = 0x04, // This identifier may never be macro expanded.
80 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
81 LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
82 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
83 HasUCN = 0x40, // This identifier contains a UCN.
84 IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
85 StringifiedInMacro = 0x100, // This string or character literal is formed by
86 // macro stringizing or charizing operator.
87 CommaAfterElided = 0x200, // The comma following this token was elided (MS).
88 IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
89 IsReinjected = 0x800, // A phase 4 token that was produced before and
90 // re-added, e.g. via EnterTokenStream. Annotation
91 // tokens are *not* reinjected.
92 };
93
94 tok::TokenKind getKind() const { return Kind; }
95 void setKind(tok::TokenKind K) { Kind = K; }
96
97 /// is/isNot - Predicates to check if this token is a specific kind, as in
98 /// "if (Tok.is(tok::l_brace)) {...}".
99 bool is(tok::TokenKind K) const { return Kind == K; }
100 bool isNot(tok::TokenKind K) const { return Kind != K; }
102 return is(K1) || is(K2);
103 }
104 template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
105 return is(K1) || isOneOf(Ks...);
106 }
107
108 /// Return true if this is a raw identifier (when lexing
109 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
110 bool isAnyIdentifier() const {
112 }
113
114 /// Return true if this is a "literal", like a numeric
115 /// constant, string, etc.
116 bool isLiteral() const {
117 return tok::isLiteral(getKind());
118 }
119
120 /// Return true if this is any of tok::annot_* kind tokens.
121 bool isAnnotation() const { return tok::isAnnotation(getKind()); }
122
123 /// Return true if the token is a keyword that is parsed in the same
124 /// position as a standard attribute, but that has semantic meaning
125 /// and so cannot be a true attribute.
128 }
129
130 /// Return a source location identifier for the specified
131 /// offset in the current file.
134 }
135 unsigned getLength() const {
136 assert(!isAnnotation() && "Annotation tokens have no length field");
137 return UintData;
138 }
139
141 void setLength(unsigned Len) {
142 assert(!isAnnotation() && "Annotation tokens have no length field");
143 UintData = Len;
144 }
145
147 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
148 return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
149 }
151 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
152 UintData = L.getRawEncoding();
153 }
154
157 }
158
162 }
163
164 /// SourceRange of the group of tokens that this annotation token
165 /// represents.
168 }
172 }
173
174 const char *getName() const { return tok::getTokenName(Kind); }
175
176 /// Reset all flags to cleared.
177 void startToken() {
178 Kind = tok::unknown;
179 Flags = 0;
180 PtrData = nullptr;
181 UintData = 0;
183 }
184
185 bool hasPtrData() const { return PtrData != nullptr; }
186
188 assert(isNot(tok::raw_identifier) &&
189 "getIdentifierInfo() on a tok::raw_identifier token!");
190 assert(!isAnnotation() &&
191 "getIdentifierInfo() on an annotation token!");
192 if (isLiteral()) return nullptr;
193 if (is(tok::eof)) return nullptr;
194 return (IdentifierInfo*) PtrData;
195 }
197 PtrData = (void*) II;
198 }
199
200 const void *getEofData() const {
201 assert(is(tok::eof));
202 return reinterpret_cast<const void *>(PtrData);
203 }
204 void setEofData(const void *D) {
205 assert(is(tok::eof));
206 assert(!PtrData);
207 PtrData = const_cast<void *>(D);
208 }
209
210 /// getRawIdentifier - For a raw identifier token (i.e., an identifier
211 /// lexed in raw mode), returns a reference to the text substring in the
212 /// buffer if known.
213 StringRef getRawIdentifier() const {
214 assert(is(tok::raw_identifier));
215 return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
216 }
217 void setRawIdentifierData(const char *Ptr) {
218 assert(is(tok::raw_identifier));
219 PtrData = const_cast<char*>(Ptr);
220 }
221
222 /// getLiteralData - For a literal token (numeric constant, string, etc), this
223 /// returns a pointer to the start of it in the text buffer if known, null
224 /// otherwise.
225 const char *getLiteralData() const {
226 assert(isLiteral() && "Cannot get literal data of non-literal");
227 return reinterpret_cast<const char*>(PtrData);
228 }
229 void setLiteralData(const char *Ptr) {
230 assert(isLiteral() && "Cannot set literal data of non-literal");
231 PtrData = const_cast<char*>(Ptr);
232 }
233
234 void *getAnnotationValue() const {
235 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
236 return PtrData;
237 }
238 void setAnnotationValue(void *val) {
239 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
240 PtrData = val;
241 }
242
243 /// Set the specified flag.
244 void setFlag(TokenFlags Flag) {
245 Flags |= Flag;
246 }
247
248 /// Get the specified flag.
249 bool getFlag(TokenFlags Flag) const {
250 return (Flags & Flag) != 0;
251 }
252
253 /// Unset the specified flag.
255 Flags &= ~Flag;
256 }
257
258 /// Return the internal represtation of the flags.
259 ///
260 /// This is only intended for low-level operations such as writing tokens to
261 /// disk.
262 unsigned getFlags() const {
263 return Flags;
264 }
265
266 /// Set a flag to either true or false.
267 void setFlagValue(TokenFlags Flag, bool Val) {
268 if (Val)
269 setFlag(Flag);
270 else
271 clearFlag(Flag);
272 }
273
274 /// isAtStartOfLine - Return true if this token is at the start of a line.
275 ///
276 bool isAtStartOfLine() const { return getFlag(StartOfLine); }
277
278 /// Return true if this token has whitespace before it.
279 ///
280 bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
281
282 /// Return true if this identifier token should never
283 /// be expanded in the future, due to C99 6.10.3.4p2.
284 bool isExpandDisabled() const { return getFlag(DisableExpand); }
285
286 /// Return true if we have an ObjC keyword identifier.
287 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
288
289 /// Return the ObjC keyword kind.
291
292 bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
293
294 /// Return true if this token has trigraphs or escaped newlines in it.
295 bool needsCleaning() const { return getFlag(NeedsCleaning); }
296
297 /// Return true if this token has an empty macro before it.
298 ///
300
301 /// Return true if this token is a string or character literal which
302 /// has a ud-suffix.
303 bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
304
305 /// Returns true if this token contains a universal character name.
306 bool hasUCN() const { return getFlag(HasUCN); }
307
308 /// Returns true if this token is formed by macro by stringizing or charizing
309 /// operator.
311
312 /// Returns true if the comma after this token was elided.
313 bool commaAfterElided() const { return getFlag(CommaAfterElided); }
314
315 /// Returns true if this token is an editor placeholder.
316 ///
317 /// Editor placeholders are produced by the code-completion engine and are
318 /// represented as characters between '<#' and '#>' in the source code. The
319 /// lexer uses identifier tokens to represent placeholders.
321};
322
323/// Information about the conditional stack (\#if directives)
324/// currently active.
326 /// Location where the conditional started.
328
329 /// True if this was contained in a skipping directive, e.g.,
330 /// in a "\#if 0" block.
332
333 /// True if we have emitted tokens already, and now we're in
334 /// an \#else block or something. Only useful in Skipping blocks.
336
337 /// True if we've seen a \#else in this block. If so,
338 /// \#elif/\#else directives are not allowed.
340};
341
342// Extra information needed for annonation tokens.
347};
348} // end namespace clang
349
350#endif // LLVM_CLANG_LEX_TOKEN_H
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
One of these records is kept for each identifier that is lexed.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:461
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
void setLiteralData(const char *Ptr)
Definition: Token.h:229
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110
SourceLocation getEndLoc() const
Definition: Token.h:159
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:262
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:150
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:306
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:254
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
const char * getName() const
Definition: Token.h:174
unsigned getLength() const
Definition: Token.h:135
void setLength(unsigned Len)
Definition: Token.h:141
bool isEditorPlaceholder() const
Returns true if this token is an editor placeholder.
Definition: Token.h:320
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition: Token.h:284
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition: Token.h:313
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:146
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:70
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
void * getAnnotationValue() const
Definition: Token.h:234
bool isOneOf(tok::TokenKind K1, Ts... Ks) const
Definition: Token.h:104
tok::TokenKind getKind() const
Definition: Token.h:94
bool isRegularKeywordAttribute() const
Return true if the token is a keyword that is parsed in the same position as a standard attribute,...
Definition: Token.h:126
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
void setEofData(const void *D)
Definition: Token.h:204
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition: Token.h:249
@ DisableExpand
Definition: Token.h:79
@ HasUCN
Definition: Token.h:83
@ IsEditorPlaceholder
Definition: Token.h:88
@ IgnoredComma
Definition: Token.h:84
@ IsReinjected
Definition: Token.h:89
@ LeadingEmptyMacro
Definition: Token.h:81
@ LeadingSpace
Definition: Token.h:77
@ StartOfLine
Definition: Token.h:75
@ StringifiedInMacro
Definition: Token.h:85
@ HasUDSuffix
Definition: Token.h:82
@ CommaAfterElided
Definition: Token.h:87
@ NeedsCleaning
Definition: Token.h:80
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:166
void setLocation(SourceLocation L)
Definition: Token.h:140
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:299
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:217
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool hasPtrData() const
Definition: Token.h:185
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121
void setAnnotationValue(void *val)
Definition: Token.h:238
const void * getEofData() const
Definition: Token.h:200
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix.
Definition: Token.h:303
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:310
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:61
void setAnnotationRange(SourceRange R)
Definition: Token.h:169
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
Definition: Lexer.cpp:78
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:295
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196
SourceLocation getLastLoc() const
Definition: Token.h:155
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:267
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition: Token.h:213
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:225
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:244
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:24
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:83
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
constexpr bool isRegularKeywordAttribute(TokenKind K)
Definition: TokenKinds.h:110
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.cpp:58
The JSON file list parser is used to communicate input to InstallAPI.
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:325
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:335
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:327
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:331
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:339
ArrayRef< Token > Toks
Definition: Token.h:346