clang-tools 22.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/Basic/SourceManager.h"
11#include <optional>
12#include <utility>
13
15
16std::pair<Token, SourceLocation>
17getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
18 const LangOptions &LangOpts, bool SkipComments) {
19 const std::optional<Token> Tok =
20 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
21
22 if (Tok.has_value())
23 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
24
25 Token Token;
26 Token.setKind(tok::unknown);
27 return {Token, SourceLocation()};
28}
29
30Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
31 const LangOptions &LangOpts, bool SkipComments) {
32 auto [Token, Start] =
33 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
34 return Token;
35}
36
37SourceLocation findPreviousTokenStart(SourceLocation Start,
38 const SourceManager &SM,
39 const LangOptions &LangOpts) {
40 if (Start.isInvalid() || Start.isMacroID())
41 return {};
42
43 const SourceLocation BeforeStart = Start.getLocWithOffset(-1);
44 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
45 return {};
46
47 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
48}
49
50SourceLocation findPreviousTokenKind(SourceLocation Start,
51 const SourceManager &SM,
52 const LangOptions &LangOpts,
53 tok::TokenKind TK) {
54 if (Start.isInvalid() || Start.isMacroID())
55 return {};
56
57 while (true) {
58 const SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
59 if (L.isInvalid() || L.isMacroID())
60 return {};
61
62 Token T;
63 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
64 return {};
65
66 if (T.is(TK))
67 return T.getLocation();
68
69 Start = L;
70 }
71}
72
73SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
74 const LangOptions &LangOpts) {
75 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
76}
77
78std::optional<Token>
79findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
80 const LangOptions &LangOpts) {
81 while (Start.isValid()) {
82 std::optional<Token> CurrentToken =
83 Lexer::findNextToken(Start, SM, LangOpts);
84 if (!CurrentToken || !CurrentToken->is(tok::comment))
85 return CurrentToken;
86
87 Start = CurrentToken->getLocation();
88 }
89
90 return std::nullopt;
91}
92
94 const SourceManager &SM,
95 const LangOptions &LangOpts) {
96 assert(Range.isValid() && "Invalid Range for relexing provided");
97 SourceLocation Loc = Range.getBegin();
98
99 while (Loc <= Range.getEnd()) {
100 if (Loc.isMacroID())
101 return true;
102
103 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
104
105 if (!Tok)
106 return true;
107
108 if (Tok->is(tok::hash))
109 return true;
110
111 Loc = Tok->getLocation();
112 }
113
114 return false;
115}
116
117std::optional<Token> getQualifyingToken(tok::TokenKind TK,
118 CharSourceRange Range,
119 const ASTContext &Context,
120 const SourceManager &SM) {
121 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
122 TK == tok::kw_restrict) &&
123 "TK is not a qualifier keyword");
124 const std::pair<FileID, unsigned> LocInfo =
125 SM.getDecomposedLoc(Range.getBegin());
126 const StringRef File = SM.getBufferData(LocInfo.first);
127 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
128 File.begin(), File.data() + LocInfo.second, File.end());
129 std::optional<Token> LastMatchBeforeTemplate;
130 std::optional<Token> LastMatchAfterTemplate;
131 bool SawTemplate = false;
132 Token Tok;
133 while (!RawLexer.LexFromRawLexer(Tok) &&
134 Range.getEnd() != Tok.getLocation() &&
135 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
136 if (Tok.is(tok::raw_identifier)) {
137 IdentifierInfo &Info = Context.Idents.get(
138 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
139 Tok.setIdentifierInfo(&Info);
140 Tok.setKind(Info.getTokenID());
141 }
142 if (Tok.is(tok::less))
143 SawTemplate = true;
144 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
145 LastMatchAfterTemplate = std::nullopt;
146 else if (Tok.is(TK)) {
147 if (SawTemplate)
148 LastMatchAfterTemplate = Tok;
149 else
150 LastMatchBeforeTemplate = Tok;
151 }
152 }
153 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
154 : LastMatchBeforeTemplate;
155}
156
157static bool breakAndReturnEnd(const Stmt &S) {
158 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
159}
160
161static bool breakAndReturnEndPlus1Token(const Stmt &S) {
162 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
163 SEHLeaveStmt>(S);
164}
165
166// Given a Stmt which does not include it's semicolon this method returns the
167// SourceLocation of the semicolon.
168static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
169 const SourceManager &SM,
170 const LangOptions &LangOpts) {
171 if (EndLoc.isMacroID()) {
172 // Assuming EndLoc points to a function call foo within macro F.
173 // This method is supposed to return location of the semicolon within
174 // those macro arguments:
175 // F ( foo() ; )
176 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
177 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
178 std::optional<Token> NextTok =
179 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
180
181 // Was the next token found successfully?
182 // All macro issues are simply resolved by ensuring it's a semicolon.
183 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
184 // Ideally this would return `F` with spelling location `;` (NextTok)
185 // following the example above. For now simply return NextTok location.
186 return NextTok->getLocation();
187 }
188
189 // Fallthrough to 'normal handling'.
190 // F ( foo() ) ;
191 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
192 }
193
194 std::optional<Token> NextTok =
195 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
196
197 // Testing for semicolon again avoids some issues with macros.
198 if (NextTok && NextTok->is(tok::TokenKind::semi))
199 return NextTok->getLocation();
200
201 return {};
202}
203
204SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
205 const LangOptions &LangOpts) {
206 const Stmt *LastChild = &S;
207 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
208 !breakAndReturnEndPlus1Token(*LastChild)) {
209 for (const Stmt *Child : LastChild->children())
210 LastChild = Child;
211 }
212
213 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
214 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
215
216 return S.getEndLoc();
217}
218
219SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
220 const SourceManager &SM) {
221 if (!FuncDecl)
222 return {};
223
224 const LangOptions &LangOpts = FuncDecl->getLangOpts();
225
226 if (FuncDecl->getNumParams() == 0) {
227 // Start at the beginning of the function declaration, and find the closing
228 // parenthesis after which we would place the noexcept specifier.
229 Token CurrentToken;
230 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
231 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
232 true)) {
233 if (CurrentToken.is(tok::r_paren))
234 return CurrentLocation.getLocWithOffset(1);
235
236 CurrentLocation = CurrentToken.getEndLoc();
237 }
238
239 // Failed to find the closing parenthesis, so just return an invalid
240 // SourceLocation.
241 return {};
242 }
243
244 // FunctionDecl with parameters
245 const SourceLocation NoexceptLoc =
246 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
247 if (NoexceptLoc.isValid())
248 return Lexer::findLocationAfterToken(
249 NoexceptLoc, tok::r_paren, SM, LangOpts,
250 /*SkipTrailingWhitespaceAndNewLine=*/true);
251
252 return {};
253}
254
255} // namespace clang::tidy::utils::lexer
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
static bool breakAndReturnEndPlus1Token(const Stmt &S)
static bool breakAndReturnEnd(const Stmt &S)
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
static constexpr const char FuncDecl[]