clang 22.0.0git
NumericLiteralCaseFixer.cpp
Go to the documentation of this file.
1//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements NumericLiteralCaseFixer that standardizes character
11/// case within numeric literals.
12///
13//===----------------------------------------------------------------------===//
14
16#include "NumericLiteralInfo.h"
17
18#include "llvm/ADT/StringExtras.h"
19
20#include <algorithm>
21
22namespace clang {
23namespace format {
24
25static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
26 // Check if language is supported.
27 switch (Style.Language) {
28 case FormatStyle::LK_C:
29 case FormatStyle::LK_Cpp:
30 case FormatStyle::LK_ObjC:
31 case FormatStyle::LK_CSharp:
32 case FormatStyle::LK_Java:
33 case FormatStyle::LK_JavaScript:
34 break;
35 default:
36 return false;
37 }
38
39 // Check if style options are set.
40 const auto &Option = Style.NumericLiteralCase;
41 const auto Leave = FormatStyle::NLCS_Leave;
42 return Option.Prefix != Leave || Option.HexDigit != Leave ||
43 Option.ExponentLetter != Leave || Option.Suffix != Leave;
44}
45
46static std::string
47transformComponent(StringRef Component,
48 FormatStyle::NumericLiteralComponentStyle ConfigValue) {
49 switch (ConfigValue) {
50 case FormatStyle::NLCS_Upper:
51 return Component.upper();
52 case FormatStyle::NLCS_Lower:
53 return Component.lower();
54 default:
55 // Covers FormatStyle::NLCS_Leave.
56 return Component.str();
57 }
58}
59
60/// Test if Suffix matches a C++ literal reserved by the library.
61/// Matches against all suffixes reserved in the C++23 standard.
62static bool matchesReservedSuffix(StringRef Suffix) {
63 static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
64 "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y",
65 };
66
67 // This can be static_assert when we have access to constexpr is_sorted in
68 // C++ 20.
69 assert(llvm::is_sorted(SortedReservedSuffixes) &&
70 "Must be sorted as precondition for lower_bound().");
71
72 auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
73 if (entry == SortedReservedSuffixes.cend())
74 return false;
75 return *entry == Suffix;
76}
77
78static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
79 const char Separator = Style.isCpp() ? '\'' : '_';
80 const NumericLiteralInfo Info(NumericLiteral, Separator);
81 const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos;
82 const bool HasExponent = Info.ExponentLetterPos != StringRef::npos;
83 const bool HasSuffix = Info.SuffixPos != StringRef::npos;
84
85 std::string Formatted;
86
87 if (HasBaseLetter) {
88 Formatted +=
89 transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
90 Style.NumericLiteralCase.Prefix);
91 }
92 // Reformat this slice as HexDigit whether or not the digit has hexadecimal
93 // characters because binary/decimal/octal digits are unchanged.
94 Formatted += transformComponent(
95 NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
96 HasExponent ? Info.ExponentLetterPos
97 : HasSuffix ? Info.SuffixPos
98 : NumericLiteral.size()),
99 Style.NumericLiteralCase.HexDigit);
100
101 if (HasExponent) {
102 Formatted += transformComponent(
103 NumericLiteral.slice(Info.ExponentLetterPos,
104 HasSuffix ? Info.SuffixPos
105 : NumericLiteral.size()),
106 Style.NumericLiteralCase.ExponentLetter);
107 }
108
109 if (HasSuffix) {
110 StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
111 if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
112 // In C++, it is idiomatic, but NOT standardized to define user-defined
113 // literals with a leading '_'. Omit user defined literals and standard
114 // reserved suffixes from transformation.
115 Formatted += Suffix.str();
116 } else {
117 Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
118 }
119 }
120
121 return Formatted;
122}
123
124std::pair<tooling::Replacements, unsigned>
126 const FormatStyle &Style) {
128 return {};
129
130 const auto &SourceMgr = Env.getSourceManager();
131 AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
132
133 const auto ID = Env.getFileID();
134 const auto LangOpts = getFormattingLangOpts(Style);
135 Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
136 Lex.SetCommentRetentionState(true);
137
138 Token Tok;
140
141 for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
142 // Skip tokens that are too small to contain a formattable literal.
143 // Size=2 is the smallest possible literal that could contain formattable
144 // components, for example "1u".
145 auto Length = Tok.getLength();
146 if (Length < 2)
147 continue;
148
149 // Service clang-format off/on comments.
150 auto Location = Tok.getLocation();
151 auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
152 if (Tok.is(tok::comment)) {
154 Skip = true;
155 else if (isClangFormatOn(Text))
156 Skip = false;
157 continue;
158 }
159
160 if (Skip || Tok.isNot(tok::numeric_constant) ||
161 !AffectedRangeMgr.affectsCharSourceRange(
162 CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
163 continue;
164 }
165
166 const auto Formatted = format(Text, Style);
167 if (Formatted != Text) {
168 cantFail(Result.add(
169 tooling::Replacement(SourceMgr, Location, Length, Formatted)));
170 }
171 }
172
173 return {Result, 0};
174}
175
176} // namespace format
177} // namespace clang
Token Tok
The Token.
This file declares NumericLiteralCaseFixer that standardizes character case within numeric literals.
static CharSourceRange getCharRange(SourceRange R)
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition Lexer.h:78
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition Lexer.h:236
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition Lexer.h:269
Token - This structure provides full information about a lexed token.
Definition Token.h:36
bool affectsCharSourceRange(const CharSourceRange &Range)
SourceManager & getSourceManager() const
ArrayRef< CharSourceRange > getCharRanges() const
std::pair< tooling::Replacements, unsigned > process(const Environment &Env, const FormatStyle &Style)
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
static std::string format(StringRef NumericLiteral, const FormatStyle &Style)
static std::string transformComponent(StringRef Component, FormatStyle::NumericLiteralComponentStyle ConfigValue)
static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style)
static bool matchesReservedSuffix(StringRef Suffix)
Test if Suffix matches a C++ literal reserved by the library.
bool isClangFormatOff(StringRef Comment)
Definition Format.cpp:4465
bool isClangFormatOn(StringRef Comment)
Definition Format.cpp:4461
LangOptions getFormattingLangOpts(const FormatStyle &Style)
Definition Format.cpp:4106
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905