clang-tools 20.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/AST/AST.h"
11#include "clang/Basic/SourceManager.h"
12#include <optional>
13#include <utility>
14
16
17std::pair<Token, SourceLocation>
18getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 const std::optional<Token> Tok =
21 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
22
23 if (Tok.has_value()) {
24 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
25 }
26
27 Token Token;
28 Token.setKind(tok::unknown);
29 return {Token, SourceLocation()};
30}
31
32Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
33 const LangOptions &LangOpts, bool SkipComments) {
34 auto [Token, Start] =
35 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
36 return Token;
37}
38
39SourceLocation findPreviousTokenStart(SourceLocation Start,
40 const SourceManager &SM,
41 const LangOptions &LangOpts) {
42 if (Start.isInvalid() || Start.isMacroID())
43 return {};
44
45 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47 return {};
48
49 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50}
51
52SourceLocation findPreviousTokenKind(SourceLocation Start,
53 const SourceManager &SM,
54 const LangOptions &LangOpts,
55 tok::TokenKind TK) {
56 if (Start.isInvalid() || Start.isMacroID())
57 return {};
58
59 while (true) {
60 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61 if (L.isInvalid() || L.isMacroID())
62 return {};
63
64 Token T;
65 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66 return {};
67
68 if (T.is(TK))
69 return T.getLocation();
70
71 Start = L;
72 }
73}
74
75SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76 const LangOptions &LangOpts) {
77 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78}
79
80std::optional<Token>
81findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
82 const LangOptions &LangOpts) {
83 while (Start.isValid()) {
84 std::optional<Token> CurrentToken =
85 Lexer::findNextToken(Start, SM, LangOpts);
86 if (!CurrentToken || !CurrentToken->is(tok::comment))
87 return CurrentToken;
88
89 Start = CurrentToken->getLocation();
90 }
91
92 return std::nullopt;
93}
94
96 const SourceManager &SM,
97 const LangOptions &LangOpts) {
98 assert(Range.isValid() && "Invalid Range for relexing provided");
99 SourceLocation Loc = Range.getBegin();
100
101 while (Loc <= Range.getEnd()) {
102 if (Loc.isMacroID())
103 return true;
104
105 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
106
107 if (!Tok)
108 return true;
109
110 if (Tok->is(tok::hash))
111 return true;
112
113 Loc = Tok->getLocation();
114 }
115
116 return false;
117}
118
119std::optional<Token> getQualifyingToken(tok::TokenKind TK,
120 CharSourceRange Range,
121 const ASTContext &Context,
122 const SourceManager &SM) {
123 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
124 TK == tok::kw_restrict) &&
125 "TK is not a qualifier keyword");
126 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
127 StringRef File = SM.getBufferData(LocInfo.first);
128 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
129 File.begin(), File.data() + LocInfo.second, File.end());
130 std::optional<Token> LastMatchBeforeTemplate;
131 std::optional<Token> LastMatchAfterTemplate;
132 bool SawTemplate = false;
133 Token Tok;
134 while (!RawLexer.LexFromRawLexer(Tok) &&
135 Range.getEnd() != Tok.getLocation() &&
136 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
137 if (Tok.is(tok::raw_identifier)) {
138 IdentifierInfo &Info = Context.Idents.get(
139 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
140 Tok.setIdentifierInfo(&Info);
141 Tok.setKind(Info.getTokenID());
142 }
143 if (Tok.is(tok::less))
144 SawTemplate = true;
145 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
146 LastMatchAfterTemplate = std::nullopt;
147 else if (Tok.is(TK)) {
148 if (SawTemplate)
149 LastMatchAfterTemplate = Tok;
150 else
151 LastMatchBeforeTemplate = Tok;
152 }
153 }
154 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
155 : LastMatchBeforeTemplate;
156}
157
158static bool breakAndReturnEnd(const Stmt &S) {
159 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
160}
161
162static bool breakAndReturnEndPlus1Token(const Stmt &S) {
163 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
164 SEHLeaveStmt>(S);
165}
166
167// Given a Stmt which does not include it's semicolon this method returns the
168// SourceLocation of the semicolon.
169static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
170 const SourceManager &SM,
171 const LangOptions &LangOpts) {
172
173 if (EndLoc.isMacroID()) {
174 // Assuming EndLoc points to a function call foo within macro F.
175 // This method is supposed to return location of the semicolon within
176 // those macro arguments:
177 // F ( foo() ; )
178 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
179 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
180 std::optional<Token> NextTok =
181 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
182
183 // Was the next token found successfully?
184 // All macro issues are simply resolved by ensuring it's a semicolon.
185 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
186 // Ideally this would return `F` with spelling location `;` (NextTok)
187 // following the example above. For now simply return NextTok location.
188 return NextTok->getLocation();
189 }
190
191 // Fallthrough to 'normal handling'.
192 // F ( foo() ) ;
193 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
194 }
195
196 std::optional<Token> NextTok =
197 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
198
199 // Testing for semicolon again avoids some issues with macros.
200 if (NextTok && NextTok->is(tok::TokenKind::semi))
201 return NextTok->getLocation();
202
203 return {};
204}
205
206SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
207 const LangOptions &LangOpts) {
208
209 const Stmt *LastChild = &S;
210 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
211 !breakAndReturnEndPlus1Token(*LastChild)) {
212 for (const Stmt *Child : LastChild->children())
213 LastChild = Child;
214 }
215
216 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
217 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
218
219 return S.getEndLoc();
220}
221
222SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
223 const SourceManager &SM) {
224 if (!FuncDecl)
225 return {};
226
227 const LangOptions &LangOpts = FuncDecl->getLangOpts();
228
229 if (FuncDecl->getNumParams() == 0) {
230 // Start at the beginning of the function declaration, and find the closing
231 // parenthesis after which we would place the noexcept specifier.
232 Token CurrentToken;
233 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
234 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
235 true)) {
236 if (CurrentToken.is(tok::r_paren))
237 return CurrentLocation.getLocWithOffset(1);
238
239 CurrentLocation = CurrentToken.getEndLoc();
240 }
241
242 // Failed to find the closing parenthesis, so just return an invalid
243 // SourceLocation.
244 return {};
245 }
246
247 // FunctionDecl with parameters
248 const SourceLocation NoexceptLoc =
249 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
250 if (NoexceptLoc.isValid())
251 return Lexer::findLocationAfterToken(
252 NoexceptLoc, tok::r_paren, SM, LangOpts,
253 /*SkipTrailingWhitespaceAndNewLine=*/true);
254
255 return {};
256}
257
258} // namespace clang::tidy::utils::lexer
FunctionInfo Info
CharSourceRange Range
SourceRange for the file name.
SourceLocation Loc
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
Definition: LexerUtils.cpp:222
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Definition: LexerUtils.cpp:18
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:206
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:95
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:75
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:32
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:81
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:39
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:169
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:162
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:158
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:52
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:119
static constexpr const char FuncDecl[]