clang-tools 22.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/AST/AST.h"
11#include "clang/Basic/SourceManager.h"
12#include <optional>
13#include <utility>
14
16
17std::pair<Token, SourceLocation>
18getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 const std::optional<Token> Tok =
21 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
22
23 if (Tok.has_value()) {
24 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
25 }
26
27 Token Token;
28 Token.setKind(tok::unknown);
29 return {Token, SourceLocation()};
30}
31
32Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
33 const LangOptions &LangOpts, bool SkipComments) {
34 auto [Token, Start] =
35 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
36 return Token;
37}
38
39SourceLocation findPreviousTokenStart(SourceLocation Start,
40 const SourceManager &SM,
41 const LangOptions &LangOpts) {
42 if (Start.isInvalid() || Start.isMacroID())
43 return {};
44
45 const SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47 return {};
48
49 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50}
51
52SourceLocation findPreviousTokenKind(SourceLocation Start,
53 const SourceManager &SM,
54 const LangOptions &LangOpts,
55 tok::TokenKind TK) {
56 if (Start.isInvalid() || Start.isMacroID())
57 return {};
58
59 while (true) {
60 const SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61 if (L.isInvalid() || L.isMacroID())
62 return {};
63
64 Token T;
65 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66 return {};
67
68 if (T.is(TK))
69 return T.getLocation();
70
71 Start = L;
72 }
73}
74
75SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76 const LangOptions &LangOpts) {
77 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78}
79
80std::optional<Token>
81findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
82 const LangOptions &LangOpts) {
83 while (Start.isValid()) {
84 std::optional<Token> CurrentToken =
85 Lexer::findNextToken(Start, SM, LangOpts);
86 if (!CurrentToken || !CurrentToken->is(tok::comment))
87 return CurrentToken;
88
89 Start = CurrentToken->getLocation();
90 }
91
92 return std::nullopt;
93}
94
96 const SourceManager &SM,
97 const LangOptions &LangOpts) {
98 assert(Range.isValid() && "Invalid Range for relexing provided");
99 SourceLocation Loc = Range.getBegin();
100
101 while (Loc <= Range.getEnd()) {
102 if (Loc.isMacroID())
103 return true;
104
105 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
106
107 if (!Tok)
108 return true;
109
110 if (Tok->is(tok::hash))
111 return true;
112
113 Loc = Tok->getLocation();
114 }
115
116 return false;
117}
118
119std::optional<Token> getQualifyingToken(tok::TokenKind TK,
120 CharSourceRange Range,
121 const ASTContext &Context,
122 const SourceManager &SM) {
123 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
124 TK == tok::kw_restrict) &&
125 "TK is not a qualifier keyword");
126 const std::pair<FileID, unsigned> LocInfo =
127 SM.getDecomposedLoc(Range.getBegin());
128 const StringRef File = SM.getBufferData(LocInfo.first);
129 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
130 File.begin(), File.data() + LocInfo.second, File.end());
131 std::optional<Token> LastMatchBeforeTemplate;
132 std::optional<Token> LastMatchAfterTemplate;
133 bool SawTemplate = false;
134 Token Tok;
135 while (!RawLexer.LexFromRawLexer(Tok) &&
136 Range.getEnd() != Tok.getLocation() &&
137 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
138 if (Tok.is(tok::raw_identifier)) {
139 IdentifierInfo &Info = Context.Idents.get(
140 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
141 Tok.setIdentifierInfo(&Info);
142 Tok.setKind(Info.getTokenID());
143 }
144 if (Tok.is(tok::less))
145 SawTemplate = true;
146 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
147 LastMatchAfterTemplate = std::nullopt;
148 else if (Tok.is(TK)) {
149 if (SawTemplate)
150 LastMatchAfterTemplate = Tok;
151 else
152 LastMatchBeforeTemplate = Tok;
153 }
154 }
155 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
156 : LastMatchBeforeTemplate;
157}
158
159static bool breakAndReturnEnd(const Stmt &S) {
160 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
161}
162
163static bool breakAndReturnEndPlus1Token(const Stmt &S) {
164 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
165 SEHLeaveStmt>(S);
166}
167
168// Given a Stmt which does not include it's semicolon this method returns the
169// SourceLocation of the semicolon.
170static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
171 const SourceManager &SM,
172 const LangOptions &LangOpts) {
173 if (EndLoc.isMacroID()) {
174 // Assuming EndLoc points to a function call foo within macro F.
175 // This method is supposed to return location of the semicolon within
176 // those macro arguments:
177 // F ( foo() ; )
178 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
179 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
180 std::optional<Token> NextTok =
181 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
182
183 // Was the next token found successfully?
184 // All macro issues are simply resolved by ensuring it's a semicolon.
185 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
186 // Ideally this would return `F` with spelling location `;` (NextTok)
187 // following the example above. For now simply return NextTok location.
188 return NextTok->getLocation();
189 }
190
191 // Fallthrough to 'normal handling'.
192 // F ( foo() ) ;
193 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
194 }
195
196 std::optional<Token> NextTok =
197 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
198
199 // Testing for semicolon again avoids some issues with macros.
200 if (NextTok && NextTok->is(tok::TokenKind::semi))
201 return NextTok->getLocation();
202
203 return {};
204}
205
206SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
207 const LangOptions &LangOpts) {
208 const Stmt *LastChild = &S;
209 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
210 !breakAndReturnEndPlus1Token(*LastChild)) {
211 for (const Stmt *Child : LastChild->children())
212 LastChild = Child;
213 }
214
215 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
216 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
217
218 return S.getEndLoc();
219}
220
221SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
222 const SourceManager &SM) {
223 if (!FuncDecl)
224 return {};
225
226 const LangOptions &LangOpts = FuncDecl->getLangOpts();
227
228 if (FuncDecl->getNumParams() == 0) {
229 // Start at the beginning of the function declaration, and find the closing
230 // parenthesis after which we would place the noexcept specifier.
231 Token CurrentToken;
232 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
233 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
234 true)) {
235 if (CurrentToken.is(tok::r_paren))
236 return CurrentLocation.getLocWithOffset(1);
237
238 CurrentLocation = CurrentToken.getEndLoc();
239 }
240
241 // Failed to find the closing parenthesis, so just return an invalid
242 // SourceLocation.
243 return {};
244 }
245
246 // FunctionDecl with parameters
247 const SourceLocation NoexceptLoc =
248 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
249 if (NoexceptLoc.isValid())
250 return Lexer::findLocationAfterToken(
251 NoexceptLoc, tok::r_paren, SM, LangOpts,
252 /*SkipTrailingWhitespaceAndNewLine=*/true);
253
254 return {};
255}
256
257} // namespace clang::tidy::utils::lexer
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
static bool breakAndReturnEndPlus1Token(const Stmt &S)
static bool breakAndReturnEnd(const Stmt &S)
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
static constexpr const char FuncDecl[]