clang-tools 19.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/AST/AST.h"
11#include "clang/Basic/SourceManager.h"
12#include <optional>
13#include <utility>
14
16
17std::pair<Token, SourceLocation>
18getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 Token Token;
21 Token.setKind(tok::unknown);
22
23 Location = Location.getLocWithOffset(-1);
24 if (Location.isInvalid())
25 return {Token, Location};
26
27 auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28 while (Location != StartOfFile) {
29 Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30 if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31 (!SkipComments || !Token.is(tok::comment))) {
32 break;
33 }
34 Location = Location.getLocWithOffset(-1);
35 }
36 return {Token, Location};
37}
38
39Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
40 const LangOptions &LangOpts, bool SkipComments) {
41 auto [Token, Start] =
42 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
43 return Token;
44}
45
46SourceLocation findPreviousTokenStart(SourceLocation Start,
47 const SourceManager &SM,
48 const LangOptions &LangOpts) {
49 if (Start.isInvalid() || Start.isMacroID())
50 return {};
51
52 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
53 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
54 return {};
55
56 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
57}
58
59SourceLocation findPreviousTokenKind(SourceLocation Start,
60 const SourceManager &SM,
61 const LangOptions &LangOpts,
62 tok::TokenKind TK) {
63 if (Start.isInvalid() || Start.isMacroID())
64 return {};
65
66 while (true) {
67 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
68 if (L.isInvalid() || L.isMacroID())
69 return {};
70
71 Token T;
72 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
73 return {};
74
75 if (T.is(TK))
76 return T.getLocation();
77
78 Start = L;
79 }
80}
81
82SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
83 const LangOptions &LangOpts) {
84 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
85}
86
87std::optional<Token>
88findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
89 const LangOptions &LangOpts) {
90 // `Lexer::findNextToken` will ignore comment
91 if (Start.isMacroID())
92 return std::nullopt;
93 Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
94 // Break down the source location.
95 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
96 bool InvalidTemp = false;
97 StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
98 if (InvalidTemp)
99 return std::nullopt;
100 // Lex from the start of the given location.
101 Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
102 File.data() + LocInfo.second, File.end());
103 L.SetCommentRetentionState(true);
104 // Find the token.
105 Token Tok;
106 L.LexFromRawLexer(Tok);
107 return Tok;
108}
109
110std::optional<Token>
111findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
112 const LangOptions &LangOpts) {
113 while (Start.isValid()) {
114 std::optional<Token> CurrentToken =
115 Lexer::findNextToken(Start, SM, LangOpts);
116 if (!CurrentToken || !CurrentToken->is(tok::comment))
117 return CurrentToken;
118
119 Start = CurrentToken->getLocation();
120 }
121
122 return std::nullopt;
123}
124
126 const SourceManager &SM,
127 const LangOptions &LangOpts) {
128 assert(Range.isValid() && "Invalid Range for relexing provided");
129 SourceLocation Loc = Range.getBegin();
130
131 while (Loc <= Range.getEnd()) {
132 if (Loc.isMacroID())
133 return true;
134
135 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
136
137 if (!Tok)
138 return true;
139
140 if (Tok->is(tok::hash))
141 return true;
142
143 Loc = Tok->getLocation();
144 }
145
146 return false;
147}
148
149std::optional<Token> getQualifyingToken(tok::TokenKind TK,
150 CharSourceRange Range,
151 const ASTContext &Context,
152 const SourceManager &SM) {
153 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
154 TK == tok::kw_restrict) &&
155 "TK is not a qualifier keyword");
156 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
157 StringRef File = SM.getBufferData(LocInfo.first);
158 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
159 File.begin(), File.data() + LocInfo.second, File.end());
160 std::optional<Token> LastMatchBeforeTemplate;
161 std::optional<Token> LastMatchAfterTemplate;
162 bool SawTemplate = false;
163 Token Tok;
164 while (!RawLexer.LexFromRawLexer(Tok) &&
165 Range.getEnd() != Tok.getLocation() &&
166 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
167 if (Tok.is(tok::raw_identifier)) {
168 IdentifierInfo &Info = Context.Idents.get(
169 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
170 Tok.setIdentifierInfo(&Info);
171 Tok.setKind(Info.getTokenID());
172 }
173 if (Tok.is(tok::less))
174 SawTemplate = true;
175 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
176 LastMatchAfterTemplate = std::nullopt;
177 else if (Tok.is(TK)) {
178 if (SawTemplate)
179 LastMatchAfterTemplate = Tok;
180 else
181 LastMatchBeforeTemplate = Tok;
182 }
183 }
184 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
185 : LastMatchBeforeTemplate;
186}
187
188static bool breakAndReturnEnd(const Stmt &S) {
189 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
190}
191
192static bool breakAndReturnEndPlus1Token(const Stmt &S) {
193 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
194 SEHLeaveStmt>(S);
195}
196
197// Given a Stmt which does not include it's semicolon this method returns the
198// SourceLocation of the semicolon.
199static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
200 const SourceManager &SM,
201 const LangOptions &LangOpts) {
202
203 if (EndLoc.isMacroID()) {
204 // Assuming EndLoc points to a function call foo within macro F.
205 // This method is supposed to return location of the semicolon within
206 // those macro arguments:
207 // F ( foo() ; )
208 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
209 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
210 std::optional<Token> NextTok =
211 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
212
213 // Was the next token found successfully?
214 // All macro issues are simply resolved by ensuring it's a semicolon.
215 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
216 // Ideally this would return `F` with spelling location `;` (NextTok)
217 // following the example above. For now simply return NextTok location.
218 return NextTok->getLocation();
219 }
220
221 // Fallthrough to 'normal handling'.
222 // F ( foo() ) ;
223 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
224 }
225
226 std::optional<Token> NextTok =
227 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
228
229 // Testing for semicolon again avoids some issues with macros.
230 if (NextTok && NextTok->is(tok::TokenKind::semi))
231 return NextTok->getLocation();
232
233 return {};
234}
235
236SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
237 const LangOptions &LangOpts) {
238
239 const Stmt *LastChild = &S;
240 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
241 !breakAndReturnEndPlus1Token(*LastChild)) {
242 for (const Stmt *Child : LastChild->children())
243 LastChild = Child;
244 }
245
246 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
247 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
248
249 return S.getEndLoc();
250}
251
252SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
253 const SourceManager &SM) {
254 if (!FuncDecl)
255 return {};
256
257 const LangOptions &LangOpts = FuncDecl->getLangOpts();
258
259 if (FuncDecl->getNumParams() == 0) {
260 // Start at the beginning of the function declaration, and find the closing
261 // parenthesis after which we would place the noexcept specifier.
262 Token CurrentToken;
263 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
264 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
265 true)) {
266 if (CurrentToken.is(tok::r_paren))
267 return CurrentLocation.getLocWithOffset(1);
268
269 CurrentLocation = CurrentToken.getEndLoc();
270 }
271
272 // Failed to find the closing parenthesis, so just return an invalid
273 // SourceLocation.
274 return {};
275 }
276
277 // FunctionDecl with parameters
278 const SourceLocation NoexceptLoc =
279 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
280 if (NoexceptLoc.isValid())
281 return Lexer::findLocationAfterToken(
282 NoexceptLoc, tok::r_paren, SM, LangOpts,
283 /*SkipTrailingWhitespaceAndNewLine=*/true);
284
285 return {};
286}
287
288} // namespace clang::tidy::utils::lexer
FunctionInfo Info
CharSourceRange Range
SourceRange for the file name.
SourceLocation Loc
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
Definition: LexerUtils.cpp:252
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Definition: LexerUtils.cpp:18
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:236
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:125
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:82
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:39
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:111
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:46
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:199
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:192
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:188
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:59
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:149
std::optional< Token > findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:88