clang-tools 22.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/AST/AST.h"
11#include "clang/Basic/SourceManager.h"
12#include <optional>
13#include <utility>
14
16
17std::pair<Token, SourceLocation>
18getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 const std::optional<Token> Tok =
21 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
22
23 if (Tok.has_value()) {
24 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
25 }
26
27 Token Token;
28 Token.setKind(tok::unknown);
29 return {Token, SourceLocation()};
30}
31
32Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
33 const LangOptions &LangOpts, bool SkipComments) {
34 auto [Token, Start] =
35 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
36 return Token;
37}
38
39SourceLocation findPreviousTokenStart(SourceLocation Start,
40 const SourceManager &SM,
41 const LangOptions &LangOpts) {
42 if (Start.isInvalid() || Start.isMacroID())
43 return {};
44
45 const SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47 return {};
48
49 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50}
51
52SourceLocation findPreviousTokenKind(SourceLocation Start,
53 const SourceManager &SM,
54 const LangOptions &LangOpts,
55 tok::TokenKind TK) {
56 if (Start.isInvalid() || Start.isMacroID())
57 return {};
58
59 while (true) {
60 const SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61 if (L.isInvalid() || L.isMacroID())
62 return {};
63
64 Token T;
65 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66 return {};
67
68 if (T.is(TK))
69 return T.getLocation();
70
71 Start = L;
72 }
73}
74
75SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76 const LangOptions &LangOpts) {
77 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78}
79
80std::optional<Token>
81findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
82 const LangOptions &LangOpts) {
83 while (Start.isValid()) {
84 std::optional<Token> CurrentToken =
85 Lexer::findNextToken(Start, SM, LangOpts);
86 if (!CurrentToken || !CurrentToken->is(tok::comment))
87 return CurrentToken;
88
89 Start = CurrentToken->getLocation();
90 }
91
92 return std::nullopt;
93}
94
96 const SourceManager &SM,
97 const LangOptions &LangOpts) {
98 assert(Range.isValid() && "Invalid Range for relexing provided");
99 SourceLocation Loc = Range.getBegin();
100
101 while (Loc <= Range.getEnd()) {
102 if (Loc.isMacroID())
103 return true;
104
105 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
106
107 if (!Tok)
108 return true;
109
110 if (Tok->is(tok::hash))
111 return true;
112
113 Loc = Tok->getLocation();
114 }
115
116 return false;
117}
118
119std::optional<Token> getQualifyingToken(tok::TokenKind TK,
120 CharSourceRange Range,
121 const ASTContext &Context,
122 const SourceManager &SM) {
123 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
124 TK == tok::kw_restrict) &&
125 "TK is not a qualifier keyword");
126 const std::pair<FileID, unsigned> LocInfo =
127 SM.getDecomposedLoc(Range.getBegin());
128 const StringRef File = SM.getBufferData(LocInfo.first);
129 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
130 File.begin(), File.data() + LocInfo.second, File.end());
131 std::optional<Token> LastMatchBeforeTemplate;
132 std::optional<Token> LastMatchAfterTemplate;
133 bool SawTemplate = false;
134 Token Tok;
135 while (!RawLexer.LexFromRawLexer(Tok) &&
136 Range.getEnd() != Tok.getLocation() &&
137 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
138 if (Tok.is(tok::raw_identifier)) {
139 IdentifierInfo &Info = Context.Idents.get(
140 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
141 Tok.setIdentifierInfo(&Info);
142 Tok.setKind(Info.getTokenID());
143 }
144 if (Tok.is(tok::less))
145 SawTemplate = true;
146 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
147 LastMatchAfterTemplate = std::nullopt;
148 else if (Tok.is(TK)) {
149 if (SawTemplate)
150 LastMatchAfterTemplate = Tok;
151 else
152 LastMatchBeforeTemplate = Tok;
153 }
154 }
155 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
156 : LastMatchBeforeTemplate;
157}
158
159static bool breakAndReturnEnd(const Stmt &S) {
160 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
161}
162
163static bool breakAndReturnEndPlus1Token(const Stmt &S) {
164 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
165 SEHLeaveStmt>(S);
166}
167
168// Given a Stmt which does not include it's semicolon this method returns the
169// SourceLocation of the semicolon.
170static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
171 const SourceManager &SM,
172 const LangOptions &LangOpts) {
173
174 if (EndLoc.isMacroID()) {
175 // Assuming EndLoc points to a function call foo within macro F.
176 // This method is supposed to return location of the semicolon within
177 // those macro arguments:
178 // F ( foo() ; )
179 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
180 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
181 std::optional<Token> NextTok =
182 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
183
184 // Was the next token found successfully?
185 // All macro issues are simply resolved by ensuring it's a semicolon.
186 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
187 // Ideally this would return `F` with spelling location `;` (NextTok)
188 // following the example above. For now simply return NextTok location.
189 return NextTok->getLocation();
190 }
191
192 // Fallthrough to 'normal handling'.
193 // F ( foo() ) ;
194 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
195 }
196
197 std::optional<Token> NextTok =
198 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
199
200 // Testing for semicolon again avoids some issues with macros.
201 if (NextTok && NextTok->is(tok::TokenKind::semi))
202 return NextTok->getLocation();
203
204 return {};
205}
206
207SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
208 const LangOptions &LangOpts) {
209
210 const Stmt *LastChild = &S;
211 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
212 !breakAndReturnEndPlus1Token(*LastChild)) {
213 for (const Stmt *Child : LastChild->children())
214 LastChild = Child;
215 }
216
217 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
218 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
219
220 return S.getEndLoc();
221}
222
223SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
224 const SourceManager &SM) {
225 if (!FuncDecl)
226 return {};
227
228 const LangOptions &LangOpts = FuncDecl->getLangOpts();
229
230 if (FuncDecl->getNumParams() == 0) {
231 // Start at the beginning of the function declaration, and find the closing
232 // parenthesis after which we would place the noexcept specifier.
233 Token CurrentToken;
234 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
235 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
236 true)) {
237 if (CurrentToken.is(tok::r_paren))
238 return CurrentLocation.getLocWithOffset(1);
239
240 CurrentLocation = CurrentToken.getEndLoc();
241 }
242
243 // Failed to find the closing parenthesis, so just return an invalid
244 // SourceLocation.
245 return {};
246 }
247
248 // FunctionDecl with parameters
249 const SourceLocation NoexceptLoc =
250 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
251 if (NoexceptLoc.isValid())
252 return Lexer::findLocationAfterToken(
253 NoexceptLoc, tok::r_paren, SM, LangOpts,
254 /*SkipTrailingWhitespaceAndNewLine=*/true);
255
256 return {};
257}
258
259} // namespace clang::tidy::utils::lexer
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
static bool breakAndReturnEndPlus1Token(const Stmt &S)
static bool breakAndReturnEnd(const Stmt &S)
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
static constexpr const char FuncDecl[]