clang 20.0.0git
IntegerLiteralSeparatorFixer.cpp
Go to the documentation of this file.
1//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11/// literal separators.
12///
13//===----------------------------------------------------------------------===//
14
16
17namespace clang {
18namespace format {
19
20enum class Base { Binary, Decimal, Hex, Other };
21
22static Base getBase(const StringRef IntegerLiteral) {
23 assert(IntegerLiteral.size() > 1);
24
25 if (IntegerLiteral[0] > '0') {
26 assert(IntegerLiteral[0] <= '9');
27 return Base::Decimal;
28 }
29
30 assert(IntegerLiteral[0] == '0');
31
32 switch (IntegerLiteral[1]) {
33 case 'b':
34 case 'B':
35 return Base::Binary;
36 case 'x':
37 case 'X':
38 return Base::Hex;
39 default:
40 return Base::Other;
41 }
42}
43
44std::pair<tooling::Replacements, unsigned>
46 const FormatStyle &Style) {
47 switch (Style.Language) {
50 Separator = '\'';
51 break;
55 Separator = '_';
56 break;
57 default:
58 return {};
59 }
60
61 const auto &Option = Style.IntegerLiteralSeparator;
62 const auto Binary = Option.Binary;
63 const auto Decimal = Option.Decimal;
64 const auto Hex = Option.Hex;
65 const bool SkipBinary = Binary == 0;
66 const bool SkipDecimal = Decimal == 0;
67 const bool SkipHex = Hex == 0;
68
69 if (SkipBinary && SkipDecimal && SkipHex)
70 return {};
71
72 const auto BinaryMinDigits =
73 std::max((int)Option.BinaryMinDigits, Binary + 1);
74 const auto DecimalMinDigits =
75 std::max((int)Option.DecimalMinDigits, Decimal + 1);
76 const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
77
78 const auto &SourceMgr = Env.getSourceManager();
79 AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80
81 const auto ID = Env.getFileID();
82 const auto LangOpts = getFormattingLangOpts(Style);
83 Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
85
86 Token Tok;
88
89 for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90 auto Length = Tok.getLength();
91 if (Length < 2)
92 continue;
93 auto Location = Tok.getLocation();
94 auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95 if (Tok.is(tok::comment)) {
97 Skip = true;
98 else if (isClangFormatOn(Text))
99 Skip = false;
100 continue;
101 }
102 if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103 !AffectedRangeMgr.affectsCharSourceRange(
104 CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105 continue;
106 }
107 const auto B = getBase(Text);
108 const bool IsBase2 = B == Base::Binary;
109 const bool IsBase10 = B == Base::Decimal;
110 const bool IsBase16 = B == Base::Hex;
111 if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112 (IsBase16 && SkipHex) || B == Base::Other) {
113 continue;
114 }
115 if (Style.isCpp()) {
116 // Hex alpha digits a-f/A-F must be at the end of the string literal.
117 StringRef Suffixes = "_himnsuyd";
118 if (const auto Pos =
119 Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
120 Pos != StringRef::npos) {
121 Text = Text.substr(0, Pos);
122 Length = Pos;
123 }
124 }
125 if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126 (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127 continue;
128 }
129 const auto Start = Text[0] == '0' ? 2 : 0;
130 auto End = Text.find_first_of("uUlLzZn", Start);
131 if (End == StringRef::npos)
132 End = Length;
133 if (Start > 0 || End < Length) {
134 Length = End - Start;
135 Text = Text.substr(Start, Length);
136 }
137 auto DigitsPerGroup = Decimal;
138 auto MinDigits = DecimalMinDigits;
139 if (IsBase2) {
140 DigitsPerGroup = Binary;
141 MinDigits = BinaryMinDigits;
142 } else if (IsBase16) {
143 DigitsPerGroup = Hex;
144 MinDigits = HexMinDigits;
145 }
146 const auto SeparatorCount = Text.count(Separator);
147 const int DigitCount = Length - SeparatorCount;
148 const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
149 if (RemoveSeparator && SeparatorCount == 0)
150 continue;
151 if (!RemoveSeparator && SeparatorCount > 0 &&
152 checkSeparator(Text, DigitsPerGroup)) {
153 continue;
154 }
155 const auto &Formatted =
156 format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
157 assert(Formatted != Text);
158 if (Start > 0)
159 Location = Location.getLocWithOffset(Start);
160 cantFail(Result.add(
161 tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162 }
163
164 return {Result, 0};
165}
166
167bool IntegerLiteralSeparatorFixer::checkSeparator(
168 const StringRef IntegerLiteral, int DigitsPerGroup) const {
169 assert(DigitsPerGroup > 0);
170
171 int I = 0;
172 for (auto C : llvm::reverse(IntegerLiteral)) {
173 if (C == Separator) {
174 if (I < DigitsPerGroup)
175 return false;
176 I = 0;
177 } else {
178 if (I == DigitsPerGroup)
179 return false;
180 ++I;
181 }
182 }
183
184 return true;
185}
186
187std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
188 int DigitsPerGroup,
189 int DigitCount,
190 bool RemoveSeparator) const {
191 assert(DigitsPerGroup != 0);
192
193 std::string Formatted;
194
195 if (RemoveSeparator) {
196 for (auto C : IntegerLiteral)
197 if (C != Separator)
198 Formatted.push_back(C);
199 return Formatted;
200 }
201
202 int Remainder = DigitCount % DigitsPerGroup;
203
204 int I = 0;
205 for (auto C : IntegerLiteral) {
206 if (C == Separator)
207 continue;
208 if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209 Formatted.push_back(Separator);
210 I = 0;
211 Remainder = 0;
212 }
213 Formatted.push_back(C);
214 ++I;
215 }
216
217 return Formatted;
218}
219
220} // namespace format
221} // namespace clang
static char ID
Definition: Arena.cpp:183
static constexpr CPUSuffix Suffixes[]
Definition: Hexagon.cpp:231
StringRef Text
Definition: Format.cpp:3033
const Environment & Env
Definition: HTMLLogger.cpp:147
This file declares IntegerLiteralSeparatorFixer that fixes C++ integer literal separators.
static CharSourceRange getCharRange(SourceRange R)
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:78
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition: Lexer.h:236
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition: Lexer.h:269
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
SourceLocation getEndLoc() const
Definition: Token.h:159
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool affectsCharSourceRange(const CharSourceRange &Range)
std::pair< tooling::Replacements, unsigned > process(const Environment &Env, const FormatStyle &Style)
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
static Base getBase(const StringRef IntegerLiteral)
bool isClangFormatOff(StringRef Comment)
Definition: Format.cpp:4209
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3904
bool isClangFormatOn(StringRef Comment)
Definition: Format.cpp:4205
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ LK_CSharp
Should be used for C#.
Definition: Format.h:3262
@ LK_Java
Should be used for Java.
Definition: Format.h:3264
@ LK_Cpp
Should be used for C, C++.
Definition: Format.h:3260
@ LK_JavaScript
Should be used for JavaScript.
Definition: Format.h:3266
@ LK_ObjC
Should be used for Objective-C, Objective-C++.
Definition: Format.h:3270