clang 17.0.0git
FormatTokenLexer.h
Go to the documentation of this file.
1//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains FormatTokenLexer, which tokenizes a source file
11/// into a token stream suitable for ClangFormat.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
23#include "clang/Format/Format.h"
24#include "llvm/ADT/MapVector.h"
25#include "llvm/ADT/StringSet.h"
26#include "llvm/Support/Regex.h"
27
28#include <stack>
29
30namespace clang {
31namespace format {
32
37};
38
40public:
41 FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
42 const FormatStyle &Style, encoding::Encoding Encoding,
43 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
44 IdentifierTable &IdentTable);
45
47
48 const AdditionalKeywords &getKeywords() { return Keywords; }
49
50private:
51 void tryMergePreviousTokens();
52
53 bool tryMergeLessLess();
54 bool tryMergeNSStringLiteral();
55 bool tryMergeJSPrivateIdentifier();
56 bool tryMergeCSharpStringLiteral();
57 bool tryMergeCSharpKeywordVariables();
58 bool tryMergeNullishCoalescingEqual();
59 bool tryTransformCSharpForEach();
60 bool tryMergeForEach();
61 bool tryTransformTryUsageForC();
62
63 // Merge the most recently lexed tokens into a single token if their kinds are
64 // correct.
65 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
66 // Merge without checking their kinds.
67 bool tryMergeTokens(size_t Count, TokenType NewType);
68 // Merge if their kinds match any one of Kinds.
69 bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
70 TokenType NewType);
71
72 // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
73 bool precedesOperand(FormatToken *Tok);
74
75 bool canPrecedeRegexLiteral(FormatToken *Prev);
76
77 // Tries to parse a JavaScript Regex literal starting at the current token,
78 // if that begins with a slash and is in a location where JavaScript allows
79 // regex literals. Changes the current token to a regex literal and updates
80 // its text if successful.
81 void tryParseJSRegexLiteral();
82
83 // Handles JavaScript template strings.
84 //
85 // JavaScript template strings use backticks ('`') as delimiters, and allow
86 // embedding expressions nested in ${expr-here}. Template strings can be
87 // nested recursively, i.e. expressions can contain template strings in turn.
88 //
89 // The code below parses starting from a backtick, up to a closing backtick or
90 // an opening ${. It also maintains a stack of lexing contexts to handle
91 // nested template parts by balancing curly braces.
92 void handleTemplateStrings();
93
94 void handleCSharpVerbatimAndInterpolatedStrings();
95
96 void tryParsePythonComment();
97
98 bool tryMerge_TMacro();
99
100 bool tryMergeConflictMarkers();
101
102 void truncateToken(size_t NewLen);
103
104 FormatToken *getStashedToken();
105
106 FormatToken *getNextToken();
107
108 FormatToken *FormatTok;
109 bool IsFirstToken;
110 std::stack<LexerState> StateStack;
111 unsigned Column;
112 unsigned TrailingWhitespace;
113 std::unique_ptr<Lexer> Lex;
114 LangOptions LangOpts;
115 const SourceManager &SourceMgr;
116 FileID ID;
117 const FormatStyle &Style;
118 IdentifierTable &IdentTable;
119 AdditionalKeywords Keywords;
120 encoding::Encoding Encoding;
121 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
122 // Index (in 'Tokens') of the last token that starts a new line.
123 unsigned FirstInLineIndex;
125
126 llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
127
128 bool FormattingDisabled;
129
130 llvm::Regex MacroBlockBeginRegex;
131 llvm::Regex MacroBlockEndRegex;
132
133 // Targets that may appear inside a C# attribute.
134 static const llvm::StringSet<> CSharpAttributeTargets;
135
136 /// Handle Verilog-specific tokens.
137 bool readRawTokenVerilogSpecific(Token &Tok);
138
139 void readRawToken(FormatToken &Tok);
140
141 void resetLexer(unsigned Offset);
142};
143
144} // namespace format
145} // namespace clang
146
147#endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned Offset
Definition: Format.cpp:2774
Various functions to configurably format source code.
Defines the clang::LangOptions interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:82
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
const AdditionalKeywords & getKeywords()
ArrayRef< FormatToken * > lex()
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:158
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:924
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:243