clang 23.0.0git
Token.h
Go to the documentation of this file.
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the Token interface.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CLANG_LEX_TOKEN_H
14#define LLVM_CLANG_LEX_TOKEN_H
15
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/StringRef.h"
20#include <cassert>
21
22namespace clang {
23
24class IdentifierInfo;
25class LangOptions;
26
27/// Token - This structure provides full information about a lexed token.
28/// It is not intended to be space efficient, it is intended to return as much
29/// information as possible about each returned token. This is expected to be
30/// compressed into a smaller form if memory footprint is important.
31///
32/// The parser can create a special "annotation token" representing a stream of
33/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
34/// can be represented by a single typename annotation token that carries
35/// information about the SourceRange of the tokens and the type object.
36class Token {
37 /// The location of the token. This is actually a SourceLocation.
39
40 // Conceptually these next two fields could be in a union. However, this
41 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
42 // routine. Keeping as separate members with casts until a more beautiful fix
43 // presents itself.
44
45 /// UintData - This holds either the length of the token text, when
46 /// a normal token, or the end of the SourceRange when an annotation
47 /// token.
49
50 /// PtrData - This is a union of four different pointer types, which depends
51 /// on what type of token this is:
52 /// Identifiers, keywords, etc:
53 /// This is an IdentifierInfo*, which contains the uniqued identifier
54 /// spelling.
55 /// Literals: isLiteral() returns true.
56 /// This is a pointer to the start of the token in a text buffer, which
57 /// may be dirty (have trigraphs / escaped newlines).
58 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
59 /// This is a pointer to sema-specific data for the annotation token.
60 /// Eof:
61 /// This is a pointer to a Decl.
62 /// Other:
63 /// This is null.
64 void *PtrData;
65
66 /// Kind - The actual flavor of token this is.
67 tok::TokenKind Kind;
68
69 /// Flags - Bits we track about this token, members of the TokenFlags enum.
70 unsigned short Flags;
71
72public:
73 // Various flags set per token:
75 StartOfLine = 0x01, // At start of line or only after whitespace
76 // (considering the line after macro expansion).
77 LeadingSpace = 0x02, // Whitespace exists before this token (considering
78 // whitespace after macro expansion).
79 DisableExpand = 0x04, // This identifier may never be macro expanded.
80 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
81 LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
82 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
83 HasUCN = 0x40, // This identifier contains a UCN.
84 IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
85 StringifiedInMacro = 0x100, // This string or character literal is formed by
86 // macro stringizing or charizing operator.
87 CommaAfterElided = 0x200, // The comma following this token was elided (MS).
88 IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
89 IsReinjected = 0x800, // A phase 4 token that was produced before and
90 // re-added, e.g. via EnterTokenStream. Annotation
91 // tokens are *not* reinjected.
93 0x1000, // Whether we've seen any 'no-trivial' pp-directives before
94 // current position.
96 0x2000, // This token is at the start of a physical line.
97 };
98
99 tok::TokenKind getKind() const { return Kind; }
100 void setKind(tok::TokenKind K) { Kind = K; }
101
102 /// is/isNot - Predicates to check if this token is a specific kind, as in
103 /// "if (Tok.is(tok::l_brace)) {...}".
104 bool is(tok::TokenKind K) const { return Kind == K; }
105 template <typename... Ts> bool isOneOf(Ts... Ks) const {
106 static_assert(sizeof...(Ts) > 0,
107 "requires at least one tok::TokenKind specified");
108 return (is(Ks) || ...);
109 }
110
111 bool isNot(tok::TokenKind K) const { return Kind != K; }
112 template <typename... Ts> bool isNoneOf(Ts... Ks) const {
113 static_assert(sizeof...(Ts) > 0,
114 "requires at least one tok::TokenKind specified");
115 return (isNot(Ks) && ...);
116 }
117
118 /// Return true if this is a raw identifier (when lexing
119 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
120 bool isAnyIdentifier() const {
122 }
123
124 /// Return true if this is a "literal", like a numeric
125 /// constant, string, etc.
126 bool isLiteral() const {
127 return tok::isLiteral(getKind());
128 }
129
130 /// Return true if this is any of tok::annot_* kind tokens.
131 bool isAnnotation() const { return tok::isAnnotation(getKind()); }
132
133 /// Return true if the token is a keyword that is parsed in the same
134 /// position as a standard attribute, but that has semantic meaning
135 /// and so cannot be a true attribute.
139
140 /// Return a source location identifier for the specified
141 /// offset in the current file.
145 unsigned getLength() const {
146 assert(!isAnnotation() && "Annotation tokens have no length field");
147 return UintData;
148 }
149
151 void setLength(unsigned Len) {
152 assert(!isAnnotation() && "Annotation tokens have no length field");
153 UintData = Len;
154 }
155
157 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
158 return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
159 }
161 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
162 UintData = L.getRawEncoding();
163 }
164
168
173
174 /// SourceRange of the group of tokens that this annotation token
175 /// represents.
183
184 const char *getName() const { return tok::getTokenName(Kind); }
185
186 /// Reset all flags to cleared.
187 void startToken() {
188 Kind = tok::unknown;
189 Flags = 0;
190 PtrData = nullptr;
191 UintData = 0;
193 }
194
195 bool hasPtrData() const { return PtrData != nullptr; }
196
198 assert(isNot(tok::raw_identifier) &&
199 "getIdentifierInfo() on a tok::raw_identifier token!");
200 assert(!isAnnotation() &&
201 "getIdentifierInfo() on an annotation token!");
202 if (isLiteral()) return nullptr;
203 if (is(tok::eof)) return nullptr;
204 return (IdentifierInfo*) PtrData;
205 }
207 PtrData = (void*) II;
208 }
209
210 const void *getEofData() const {
211 assert(is(tok::eof));
212 return reinterpret_cast<const void *>(PtrData);
213 }
214 void setEofData(const void *D) {
215 assert(is(tok::eof));
216 assert(!PtrData);
217 PtrData = const_cast<void *>(D);
218 }
219
220 /// getRawIdentifier - For a raw identifier token (i.e., an identifier
221 /// lexed in raw mode), returns a reference to the text substring in the
222 /// buffer if known.
223 StringRef getRawIdentifier() const {
224 assert(is(tok::raw_identifier));
225 return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
226 }
227 void setRawIdentifierData(const char *Ptr) {
228 assert(is(tok::raw_identifier));
229 PtrData = const_cast<char*>(Ptr);
230 }
231
232 /// getLiteralData - For a literal token (numeric constant, string, etc), this
233 /// returns a pointer to the start of it in the text buffer if known, null
234 /// otherwise.
235 const char *getLiteralData() const {
236 assert(isLiteral() && "Cannot get literal data of non-literal");
237 return reinterpret_cast<const char*>(PtrData);
238 }
239 void setLiteralData(const char *Ptr) {
240 assert(isLiteral() && "Cannot set literal data of non-literal");
241 PtrData = const_cast<char*>(Ptr);
242 }
243
244 void *getAnnotationValue() const {
245 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
246 return PtrData;
247 }
248 void setAnnotationValue(void *val) {
249 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
250 PtrData = val;
251 }
252
253 /// Set the specified flag.
254 void setFlag(TokenFlags Flag) {
255 Flags |= Flag;
256 }
257
258 /// Get the specified flag.
259 bool getFlag(TokenFlags Flag) const {
260 return (Flags & Flag) != 0;
261 }
262
263 /// Unset the specified flag.
265 Flags &= ~Flag;
266 }
267
268 /// Return the internal represtation of the flags.
269 ///
270 /// This is only intended for low-level operations such as writing tokens to
271 /// disk.
272 unsigned getFlags() const {
273 return Flags;
274 }
275
276 /// Set a flag to either true or false.
277 void setFlagValue(TokenFlags Flag, bool Val) {
278 if (Val)
279 setFlag(Flag);
280 else
281 clearFlag(Flag);
282 }
283
284 /// isAtStartOfLine - Return true if this token is at the start of a line.
285 ///
286 bool isAtStartOfLine() const { return getFlag(StartOfLine); }
287
288 /// isAtPhysicalStartOfLine - Return true if this token is at the start of a
289 /// physical line.
291
292 /// Return true if this token has whitespace before it.
293 ///
294 bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
295
296 /// Return true if this identifier token should never
297 /// be expanded in the future, due to C99 6.10.3.4p2.
298 bool isExpandDisabled() const { return getFlag(DisableExpand); }
299
300 /// Return true if we have an ObjC keyword identifier.
301 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
302
303 /// Return the ObjC keyword kind.
305
306 /// Return true if we have a C++20 modules contextual keyword(export, import
307 /// or module).
308 bool isModuleContextualKeyword(bool AllowExport = true) const;
309
310 bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
311
312 /// Return true if this token has trigraphs or escaped newlines in it.
313 bool needsCleaning() const { return getFlag(NeedsCleaning); }
314
315 /// Return true if this token has an empty macro before it.
316 ///
318
319 /// Return true if this token is a string or character literal which
320 /// has a ud-suffix.
321 bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
322
323 /// Returns true if this token contains a universal character name.
324 bool hasUCN() const { return getFlag(HasUCN); }
325
326 /// Returns true if this token is formed by macro by stringizing or charizing
327 /// operator.
329
330 /// Returns true if the comma after this token was elided.
331 bool commaAfterElided() const { return getFlag(CommaAfterElided); }
332
333 /// Returns true if this token is an editor placeholder.
334 ///
335 /// Editor placeholders are produced by the code-completion engine and are
336 /// represented as characters between '<#' and '#>' in the source code. The
337 /// lexer uses identifier tokens to represent placeholders.
339
343};
344
345/// Information about the conditional stack (\#if directives)
346/// currently active.
348 /// Location where the conditional started.
350
351 /// True if this was contained in a skipping directive, e.g.,
352 /// in a "\#if 0" block.
354
355 /// True if we have emitted tokens already, and now we're in
356 /// an \#else block or something. Only useful in Skipping blocks.
358
359 /// True if we've seen a \#else in this block. If so,
360 /// \#elif/\#else directives are not allowed.
362};
363
364// Extra information needed for annonation tokens.
370} // end namespace clang
371
372#endif // LLVM_CLANG_LEX_TOKEN_H
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
One of these records is kept for each identifier that is lexed.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
void setLiteralData(const char *Ptr)
Definition Token.h:239
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition Token.h:120
SourceLocation getEndLoc() const
Definition Token.h:169
unsigned getFlags() const
Return the internal represtation of the flags.
Definition Token.h:272
void setAnnotationEndLoc(SourceLocation L)
Definition Token.h:160
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition Token.h:324
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition Token.h:264
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition Token.h:126
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
const char * getName() const
Definition Token.h:184
unsigned getLength() const
Definition Token.h:145
void setLength(unsigned Len)
Definition Token.h:151
bool isEditorPlaceholder() const
Returns true if this token is an editor placeholder.
Definition Token.h:338
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition Token.h:298
void setKind(tok::TokenKind K)
Definition Token.h:100
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition Token.h:331
SourceLocation getAnnotationEndLoc() const
Definition Token.h:156
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition Lexer.cpp:68
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:104
void * getAnnotationValue() const
Definition Token.h:244
tok::TokenKind getKind() const
Definition Token.h:99
bool isRegularKeywordAttribute() const
Return true if the token is a keyword that is parsed in the same position as a standard attribute,...
Definition Token.h:136
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition Token.h:286
bool isOneOf(Ts... Ks) const
Definition Token.h:105
void setEofData(const void *D)
Definition Token.h:214
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition Token.h:259
@ DisableExpand
Definition Token.h:79
@ PhysicalStartOfLine
Definition Token.h:95
@ IsEditorPlaceholder
Definition Token.h:88
@ IgnoredComma
Definition Token.h:84
@ HasSeenNoTrivialPPDirective
Definition Token.h:92
@ IsReinjected
Definition Token.h:89
@ LeadingEmptyMacro
Definition Token.h:81
@ LeadingSpace
Definition Token.h:77
@ StartOfLine
Definition Token.h:75
@ StringifiedInMacro
Definition Token.h:85
@ HasUDSuffix
Definition Token.h:82
@ CommaAfterElided
Definition Token.h:87
@ NeedsCleaning
Definition Token.h:80
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
Definition Lexer.cpp:75
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition Token.h:294
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition Token.h:176
void setLocation(SourceLocation L)
Definition Token.h:150
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition Token.h:317
void setRawIdentifierData(const char *Ptr)
Definition Token.h:227
bool isAtPhysicalStartOfLine() const
isAtPhysicalStartOfLine - Return true if this token is at the start of a physical line.
Definition Token.h:290
bool isNot(tok::TokenKind K) const
Definition Token.h:111
bool hasPtrData() const
Definition Token.h:195
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition Token.h:131
void setAnnotationValue(void *val)
Definition Token.h:248
const void * getEofData() const
Definition Token.h:210
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix.
Definition Token.h:321
bool isNoneOf(Ts... Ks) const
Definition Token.h:112
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition Token.h:328
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition Lexer.cpp:59
void setAnnotationRange(SourceRange R)
Definition Token.h:179
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
Definition Lexer.cpp:87
void startToken()
Reset all flags to cleared.
Definition Token.h:187
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition Token.h:313
bool hasSeenNoTrivialPPDirective() const
Definition Token.h:340
void setIdentifierInfo(IdentifierInfo *II)
Definition Token.h:206
SourceLocation getLastLoc() const
Definition Token.h:165
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition Token.h:277
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition Token.h:223
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition Token.h:235
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition Token.h:254
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition TokenKinds.h:87
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition TokenKinds.h:41
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
constexpr bool isRegularKeywordAttribute(TokenKind K)
Definition TokenKinds.h:124
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition TokenKinds.h:101
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
The JSON file list parser is used to communicate input to InstallAPI.
Information about the conditional stack (#if directives) currently active.
Definition Token.h:347
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition Token.h:357
SourceLocation IfLoc
Location where the conditional started.
Definition Token.h:349
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition Token.h:353
bool FoundElse
True if we've seen a #else in this block.
Definition Token.h:361
ArrayRef< Token > Toks
Definition Token.h:368