clang 22.0.0git
IntegerLiteralSeparatorFixer.cpp
Go to the documentation of this file.
1//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11/// literal separators.
12///
13//===----------------------------------------------------------------------===//
14
16
17namespace clang {
18namespace format {
19
20enum class Base { Binary, Decimal, Hex, Other };
21
22static Base getBase(StringRef IntegerLiteral) {
23 assert(IntegerLiteral.size() > 1);
24
25 if (IntegerLiteral[0] > '0') {
26 assert(IntegerLiteral[0] <= '9');
27 return Base::Decimal;
28 }
29
30 assert(IntegerLiteral[0] == '0');
31
32 switch (IntegerLiteral[1]) {
33 case 'b':
34 case 'B':
35 return Base::Binary;
36 case 'x':
37 case 'X':
38 return Base::Hex;
39 default:
40 return Base::Other;
41 }
42}
43
44std::pair<tooling::Replacements, unsigned>
46 const FormatStyle &Style) {
47 switch (Style.Language) {
48 case FormatStyle::LK_CSharp:
49 case FormatStyle::LK_Java:
50 case FormatStyle::LK_JavaScript:
51 Separator = '_';
52 break;
53 case FormatStyle::LK_Cpp:
54 case FormatStyle::LK_ObjC:
55 if (Style.Standard >= FormatStyle::LS_Cpp14) {
56 Separator = '\'';
57 break;
58 }
59 [[fallthrough]];
60 default:
61 return {};
62 }
63
64 const auto &Option = Style.IntegerLiteralSeparator;
65 const auto Binary = Option.Binary;
66 const auto Decimal = Option.Decimal;
67 const auto Hex = Option.Hex;
68 const bool SkipBinary = Binary == 0;
69 const bool SkipDecimal = Decimal == 0;
70 const bool SkipHex = Hex == 0;
71
72 if (SkipBinary && SkipDecimal && SkipHex)
73 return {};
74
75 auto CalcMinAndMax = [](int DigitsPerGroup, int MinDigitsInsert,
76 int MaxDigitsRemove) {
77 MinDigitsInsert = std::max(MinDigitsInsert, DigitsPerGroup + 1);
78 if (MinDigitsInsert < 1)
79 MaxDigitsRemove = 0;
80 else if (MaxDigitsRemove < 1 || MaxDigitsRemove >= MinDigitsInsert)
81 MaxDigitsRemove = MinDigitsInsert - 1;
82 return std::pair(MinDigitsInsert, MaxDigitsRemove);
83 };
84
85 const auto [BinaryMinDigitsInsert, BinaryMaxDigitsRemove] = CalcMinAndMax(
86 Binary, Option.BinaryMinDigitsInsert, Option.BinaryMaxDigitsRemove);
87 const auto [DecimalMinDigitsInsert, DecimalMaxDigitsRemove] = CalcMinAndMax(
88 Decimal, Option.DecimalMinDigitsInsert, Option.DecimalMaxDigitsRemove);
89 const auto [HexMinDigitsInsert, HexMaxDigitsRemove] =
90 CalcMinAndMax(Hex, Option.HexMinDigitsInsert, Option.HexMaxDigitsRemove);
91
92 const auto &SourceMgr = Env.getSourceManager();
93 AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
94
95 const auto ID = Env.getFileID();
96 const auto LangOpts = getFormattingLangOpts(Style);
97 Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
99
100 Token Tok;
102
103 for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
104 auto Length = Tok.getLength();
105 if (Length < 2)
106 continue;
107 auto Location = Tok.getLocation();
108 auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
109 if (Tok.is(tok::comment)) {
111 Skip = true;
112 else if (isClangFormatOn(Text))
113 Skip = false;
114 continue;
115 }
116 if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
117 !AffectedRangeMgr.affectsCharSourceRange(
118 CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
119 continue;
120 }
121 const auto B = getBase(Text);
122 const bool IsBase2 = B == Base::Binary;
123 const bool IsBase10 = B == Base::Decimal;
124 const bool IsBase16 = B == Base::Hex;
125 if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
126 (IsBase16 && SkipHex) || B == Base::Other) {
127 continue;
128 }
129 if (Style.isCpp()) {
130 // Hex alpha digits a-f/A-F must be at the end of the string literal.
131 static constexpr StringRef Suffixes("_himnsuyd");
132 if (const auto Pos =
133 Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
134 Pos != StringRef::npos) {
135 Text = Text.substr(0, Pos);
136 Length = Pos;
137 }
138 }
139 if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
140 (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
141 continue;
142 }
143 const auto Start = Text[0] == '0' ? 2 : 0;
144 auto End = Text.find_first_of("uUlLzZn", Start);
145 if (End == StringRef::npos)
146 End = Length;
147 if (Start > 0 || End < Length) {
148 Length = End - Start;
149 Text = Text.substr(Start, Length);
150 }
151 auto DigitsPerGroup = Decimal;
152 auto MinDigitsInsert = DecimalMinDigitsInsert;
153 auto MaxDigitsRemove = DecimalMaxDigitsRemove;
154 if (IsBase2) {
155 DigitsPerGroup = Binary;
156 MinDigitsInsert = BinaryMinDigitsInsert;
157 MaxDigitsRemove = BinaryMaxDigitsRemove;
158 } else if (IsBase16) {
159 DigitsPerGroup = Hex;
160 MinDigitsInsert = HexMinDigitsInsert;
161 MaxDigitsRemove = HexMaxDigitsRemove;
162 }
163 const auto SeparatorCount = Text.count(Separator);
164 const int DigitCount = Length - SeparatorCount;
165 if (DigitCount > MaxDigitsRemove && DigitCount < MinDigitsInsert)
166 continue;
167 const bool RemoveSeparator =
168 DigitsPerGroup < 0 || DigitCount <= MaxDigitsRemove;
169 if (RemoveSeparator && SeparatorCount == 0)
170 continue;
171 if (!RemoveSeparator && SeparatorCount > 0 &&
172 checkSeparator(Text, DigitsPerGroup)) {
173 continue;
174 }
175 const auto &Formatted =
176 format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
177 assert(Formatted != Text);
178 if (Start > 0)
179 Location = Location.getLocWithOffset(Start);
180 cantFail(Result.add(
181 tooling::Replacement(SourceMgr, Location, Length, Formatted)));
182 }
183
184 return {Result, 0};
185}
186
187bool IntegerLiteralSeparatorFixer::checkSeparator(StringRef IntegerLiteral,
188 int DigitsPerGroup) const {
189 assert(DigitsPerGroup > 0);
190
191 int I = 0;
192 for (auto C : llvm::reverse(IntegerLiteral)) {
193 if (C == Separator) {
194 if (I < DigitsPerGroup)
195 return false;
196 I = 0;
197 } else {
198 if (I == DigitsPerGroup)
199 return false;
200 ++I;
201 }
202 }
203
204 return true;
205}
206
207std::string IntegerLiteralSeparatorFixer::format(StringRef IntegerLiteral,
208 int DigitsPerGroup,
209 int DigitCount,
210 bool RemoveSeparator) const {
211 assert(DigitsPerGroup != 0);
212
213 std::string Formatted;
214
215 if (RemoveSeparator) {
216 for (auto C : IntegerLiteral)
217 if (C != Separator)
218 Formatted.push_back(C);
219 return Formatted;
220 }
221
222 int Remainder = DigitCount % DigitsPerGroup;
223
224 int I = 0;
225 for (auto C : IntegerLiteral) {
226 if (C == Separator)
227 continue;
228 if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
229 Formatted.push_back(Separator);
230 I = 0;
231 Remainder = 0;
232 }
233 Formatted.push_back(C);
234 ++I;
235 }
236
237 return Formatted;
238}
239
240} // namespace format
241} // namespace clang
static constexpr CPUSuffix Suffixes[]
Definition Hexagon.cpp:255
Token Tok
The Token.
This file declares IntegerLiteralSeparatorFixer that fixes C++ integer literal separators.
static CharSourceRange getCharRange(SourceRange R)
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition Lexer.h:78
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition Lexer.h:236
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition Lexer.h:269
Token - This structure provides full information about a lexed token.
Definition Token.h:36
bool affectsCharSourceRange(const CharSourceRange &Range)
SourceManager & getSourceManager() const
ArrayRef< CharSourceRange > getCharRanges() const
std::pair< tooling::Replacements, unsigned > process(const Environment &Env, const FormatStyle &Style)
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
static Base getBase(StringRef IntegerLiteral)
bool isClangFormatOff(StringRef Comment)
Definition Format.cpp:4591
bool isClangFormatOn(StringRef Comment)
Definition Format.cpp:4587
LangOptions getFormattingLangOpts(const FormatStyle &Style)
Definition Format.cpp:4231
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905