clang-tools 20.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/AST/AST.h"
11#include "clang/Basic/SourceManager.h"
12#include <optional>
13#include <utility>
14
16
17std::pair<Token, SourceLocation>
18getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 Token Token;
21 Token.setKind(tok::unknown);
22
23 Location = Location.getLocWithOffset(-1);
24 if (Location.isInvalid())
25 return {Token, Location};
26
27 const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28 while (Location != StartOfFile) {
29 Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30 if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31 (!SkipComments || !Token.is(tok::comment))) {
32 break;
33 }
34 if (Location == StartOfFile)
35 return {Token, Location};
36 Location = Location.getLocWithOffset(-1);
37 }
38 return {Token, Location};
39}
40
41Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
42 const LangOptions &LangOpts, bool SkipComments) {
43 auto [Token, Start] =
44 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
45 return Token;
46}
47
48SourceLocation findPreviousTokenStart(SourceLocation Start,
49 const SourceManager &SM,
50 const LangOptions &LangOpts) {
51 if (Start.isInvalid() || Start.isMacroID())
52 return {};
53
54 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
55 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
56 return {};
57
58 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
59}
60
61SourceLocation findPreviousTokenKind(SourceLocation Start,
62 const SourceManager &SM,
63 const LangOptions &LangOpts,
64 tok::TokenKind TK) {
65 if (Start.isInvalid() || Start.isMacroID())
66 return {};
67
68 while (true) {
69 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
70 if (L.isInvalid() || L.isMacroID())
71 return {};
72
73 Token T;
74 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
75 return {};
76
77 if (T.is(TK))
78 return T.getLocation();
79
80 Start = L;
81 }
82}
83
84SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
85 const LangOptions &LangOpts) {
86 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
87}
88
89std::optional<Token>
90findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
91 const LangOptions &LangOpts) {
92 // `Lexer::findNextToken` will ignore comment
93 if (Start.isMacroID())
94 return std::nullopt;
95 Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
96 // Break down the source location.
97 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
98 bool InvalidTemp = false;
99 StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
100 if (InvalidTemp)
101 return std::nullopt;
102 // Lex from the start of the given location.
103 Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
104 File.data() + LocInfo.second, File.end());
105 L.SetCommentRetentionState(true);
106 // Find the token.
107 Token Tok;
108 L.LexFromRawLexer(Tok);
109 return Tok;
110}
111
112std::optional<Token>
113findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
114 const LangOptions &LangOpts) {
115 while (Start.isValid()) {
116 std::optional<Token> CurrentToken =
117 Lexer::findNextToken(Start, SM, LangOpts);
118 if (!CurrentToken || !CurrentToken->is(tok::comment))
119 return CurrentToken;
120
121 Start = CurrentToken->getLocation();
122 }
123
124 return std::nullopt;
125}
126
128 const SourceManager &SM,
129 const LangOptions &LangOpts) {
130 assert(Range.isValid() && "Invalid Range for relexing provided");
131 SourceLocation Loc = Range.getBegin();
132
133 while (Loc <= Range.getEnd()) {
134 if (Loc.isMacroID())
135 return true;
136
137 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
138
139 if (!Tok)
140 return true;
141
142 if (Tok->is(tok::hash))
143 return true;
144
145 Loc = Tok->getLocation();
146 }
147
148 return false;
149}
150
151std::optional<Token> getQualifyingToken(tok::TokenKind TK,
152 CharSourceRange Range,
153 const ASTContext &Context,
154 const SourceManager &SM) {
155 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
156 TK == tok::kw_restrict) &&
157 "TK is not a qualifier keyword");
158 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
159 StringRef File = SM.getBufferData(LocInfo.first);
160 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
161 File.begin(), File.data() + LocInfo.second, File.end());
162 std::optional<Token> LastMatchBeforeTemplate;
163 std::optional<Token> LastMatchAfterTemplate;
164 bool SawTemplate = false;
165 Token Tok;
166 while (!RawLexer.LexFromRawLexer(Tok) &&
167 Range.getEnd() != Tok.getLocation() &&
168 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
169 if (Tok.is(tok::raw_identifier)) {
170 IdentifierInfo &Info = Context.Idents.get(
171 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
172 Tok.setIdentifierInfo(&Info);
173 Tok.setKind(Info.getTokenID());
174 }
175 if (Tok.is(tok::less))
176 SawTemplate = true;
177 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
178 LastMatchAfterTemplate = std::nullopt;
179 else if (Tok.is(TK)) {
180 if (SawTemplate)
181 LastMatchAfterTemplate = Tok;
182 else
183 LastMatchBeforeTemplate = Tok;
184 }
185 }
186 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
187 : LastMatchBeforeTemplate;
188}
189
190static bool breakAndReturnEnd(const Stmt &S) {
191 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
192}
193
194static bool breakAndReturnEndPlus1Token(const Stmt &S) {
195 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
196 SEHLeaveStmt>(S);
197}
198
199// Given a Stmt which does not include it's semicolon this method returns the
200// SourceLocation of the semicolon.
201static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
202 const SourceManager &SM,
203 const LangOptions &LangOpts) {
204
205 if (EndLoc.isMacroID()) {
206 // Assuming EndLoc points to a function call foo within macro F.
207 // This method is supposed to return location of the semicolon within
208 // those macro arguments:
209 // F ( foo() ; )
210 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
211 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
212 std::optional<Token> NextTok =
213 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
214
215 // Was the next token found successfully?
216 // All macro issues are simply resolved by ensuring it's a semicolon.
217 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
218 // Ideally this would return `F` with spelling location `;` (NextTok)
219 // following the example above. For now simply return NextTok location.
220 return NextTok->getLocation();
221 }
222
223 // Fallthrough to 'normal handling'.
224 // F ( foo() ) ;
225 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
226 }
227
228 std::optional<Token> NextTok =
229 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
230
231 // Testing for semicolon again avoids some issues with macros.
232 if (NextTok && NextTok->is(tok::TokenKind::semi))
233 return NextTok->getLocation();
234
235 return {};
236}
237
238SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
239 const LangOptions &LangOpts) {
240
241 const Stmt *LastChild = &S;
242 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
243 !breakAndReturnEndPlus1Token(*LastChild)) {
244 for (const Stmt *Child : LastChild->children())
245 LastChild = Child;
246 }
247
248 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
249 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
250
251 return S.getEndLoc();
252}
253
254SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
255 const SourceManager &SM) {
256 if (!FuncDecl)
257 return {};
258
259 const LangOptions &LangOpts = FuncDecl->getLangOpts();
260
261 if (FuncDecl->getNumParams() == 0) {
262 // Start at the beginning of the function declaration, and find the closing
263 // parenthesis after which we would place the noexcept specifier.
264 Token CurrentToken;
265 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
266 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
267 true)) {
268 if (CurrentToken.is(tok::r_paren))
269 return CurrentLocation.getLocWithOffset(1);
270
271 CurrentLocation = CurrentToken.getEndLoc();
272 }
273
274 // Failed to find the closing parenthesis, so just return an invalid
275 // SourceLocation.
276 return {};
277 }
278
279 // FunctionDecl with parameters
280 const SourceLocation NoexceptLoc =
281 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
282 if (NoexceptLoc.isValid())
283 return Lexer::findLocationAfterToken(
284 NoexceptLoc, tok::r_paren, SM, LangOpts,
285 /*SkipTrailingWhitespaceAndNewLine=*/true);
286
287 return {};
288}
289
290} // namespace clang::tidy::utils::lexer
FunctionInfo Info
CharSourceRange Range
SourceRange for the file name.
SourceLocation Loc
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
Definition: LexerUtils.cpp:254
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Definition: LexerUtils.cpp:18
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:238
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:127
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:84
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:41
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:113
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:48
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:201
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:194
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:190
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:61
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:151
std::optional< Token > findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:90
static constexpr const char FuncDecl[]