clang-tools 22.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "LexerUtils.h"
10#include "clang/Basic/SourceManager.h"
11#include <optional>
12#include <utility>
13
15
16std::pair<Token, SourceLocation>
17getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
18 const LangOptions &LangOpts, bool SkipComments) {
19 const std::optional<Token> Tok =
20 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
21
22 if (Tok.has_value()) {
23 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
24 }
25
26 Token Token;
27 Token.setKind(tok::unknown);
28 return {Token, SourceLocation()};
29}
30
31Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
32 const LangOptions &LangOpts, bool SkipComments) {
33 auto [Token, Start] =
34 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
35 return Token;
36}
37
38SourceLocation findPreviousTokenStart(SourceLocation Start,
39 const SourceManager &SM,
40 const LangOptions &LangOpts) {
41 if (Start.isInvalid() || Start.isMacroID())
42 return {};
43
44 const SourceLocation BeforeStart = Start.getLocWithOffset(-1);
45 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
46 return {};
47
48 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
49}
50
51SourceLocation findPreviousTokenKind(SourceLocation Start,
52 const SourceManager &SM,
53 const LangOptions &LangOpts,
54 tok::TokenKind TK) {
55 if (Start.isInvalid() || Start.isMacroID())
56 return {};
57
58 while (true) {
59 const SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
60 if (L.isInvalid() || L.isMacroID())
61 return {};
62
63 Token T;
64 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
65 return {};
66
67 if (T.is(TK))
68 return T.getLocation();
69
70 Start = L;
71 }
72}
73
74SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
75 const LangOptions &LangOpts) {
76 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
77}
78
79std::optional<Token>
80findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
81 const LangOptions &LangOpts) {
82 while (Start.isValid()) {
83 std::optional<Token> CurrentToken =
84 Lexer::findNextToken(Start, SM, LangOpts);
85 if (!CurrentToken || !CurrentToken->is(tok::comment))
86 return CurrentToken;
87
88 Start = CurrentToken->getLocation();
89 }
90
91 return std::nullopt;
92}
93
95 const SourceManager &SM,
96 const LangOptions &LangOpts) {
97 assert(Range.isValid() && "Invalid Range for relexing provided");
98 SourceLocation Loc = Range.getBegin();
99
100 while (Loc <= Range.getEnd()) {
101 if (Loc.isMacroID())
102 return true;
103
104 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
105
106 if (!Tok)
107 return true;
108
109 if (Tok->is(tok::hash))
110 return true;
111
112 Loc = Tok->getLocation();
113 }
114
115 return false;
116}
117
118std::optional<Token> getQualifyingToken(tok::TokenKind TK,
119 CharSourceRange Range,
120 const ASTContext &Context,
121 const SourceManager &SM) {
122 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
123 TK == tok::kw_restrict) &&
124 "TK is not a qualifier keyword");
125 const std::pair<FileID, unsigned> LocInfo =
126 SM.getDecomposedLoc(Range.getBegin());
127 const StringRef File = SM.getBufferData(LocInfo.first);
128 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
129 File.begin(), File.data() + LocInfo.second, File.end());
130 std::optional<Token> LastMatchBeforeTemplate;
131 std::optional<Token> LastMatchAfterTemplate;
132 bool SawTemplate = false;
133 Token Tok;
134 while (!RawLexer.LexFromRawLexer(Tok) &&
135 Range.getEnd() != Tok.getLocation() &&
136 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
137 if (Tok.is(tok::raw_identifier)) {
138 IdentifierInfo &Info = Context.Idents.get(
139 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
140 Tok.setIdentifierInfo(&Info);
141 Tok.setKind(Info.getTokenID());
142 }
143 if (Tok.is(tok::less))
144 SawTemplate = true;
145 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
146 LastMatchAfterTemplate = std::nullopt;
147 else if (Tok.is(TK)) {
148 if (SawTemplate)
149 LastMatchAfterTemplate = Tok;
150 else
151 LastMatchBeforeTemplate = Tok;
152 }
153 }
154 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
155 : LastMatchBeforeTemplate;
156}
157
158static bool breakAndReturnEnd(const Stmt &S) {
159 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
160}
161
162static bool breakAndReturnEndPlus1Token(const Stmt &S) {
163 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
164 SEHLeaveStmt>(S);
165}
166
167// Given a Stmt which does not include it's semicolon this method returns the
168// SourceLocation of the semicolon.
169static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
170 const SourceManager &SM,
171 const LangOptions &LangOpts) {
172 if (EndLoc.isMacroID()) {
173 // Assuming EndLoc points to a function call foo within macro F.
174 // This method is supposed to return location of the semicolon within
175 // those macro arguments:
176 // F ( foo() ; )
177 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
178 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
179 std::optional<Token> NextTok =
180 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
181
182 // Was the next token found successfully?
183 // All macro issues are simply resolved by ensuring it's a semicolon.
184 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
185 // Ideally this would return `F` with spelling location `;` (NextTok)
186 // following the example above. For now simply return NextTok location.
187 return NextTok->getLocation();
188 }
189
190 // Fallthrough to 'normal handling'.
191 // F ( foo() ) ;
192 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
193 }
194
195 std::optional<Token> NextTok =
196 findNextTokenSkippingComments(EndLoc, SM, LangOpts);
197
198 // Testing for semicolon again avoids some issues with macros.
199 if (NextTok && NextTok->is(tok::TokenKind::semi))
200 return NextTok->getLocation();
201
202 return {};
203}
204
205SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
206 const LangOptions &LangOpts) {
207 const Stmt *LastChild = &S;
208 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
209 !breakAndReturnEndPlus1Token(*LastChild)) {
210 for (const Stmt *Child : LastChild->children())
211 LastChild = Child;
212 }
213
214 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
215 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
216
217 return S.getEndLoc();
218}
219
220SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
221 const SourceManager &SM) {
222 if (!FuncDecl)
223 return {};
224
225 const LangOptions &LangOpts = FuncDecl->getLangOpts();
226
227 if (FuncDecl->getNumParams() == 0) {
228 // Start at the beginning of the function declaration, and find the closing
229 // parenthesis after which we would place the noexcept specifier.
230 Token CurrentToken;
231 SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
232 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
233 true)) {
234 if (CurrentToken.is(tok::r_paren))
235 return CurrentLocation.getLocWithOffset(1);
236
237 CurrentLocation = CurrentToken.getEndLoc();
238 }
239
240 // Failed to find the closing parenthesis, so just return an invalid
241 // SourceLocation.
242 return {};
243 }
244
245 // FunctionDecl with parameters
246 const SourceLocation NoexceptLoc =
247 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
248 if (NoexceptLoc.isValid())
249 return Lexer::findLocationAfterToken(
250 NoexceptLoc, tok::r_paren, SM, LangOpts,
251 /*SkipTrailingWhitespaceAndNewLine=*/true);
252
253 return {};
254}
255
256} // namespace clang::tidy::utils::lexer
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition LexerUtils.h:68
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
static bool breakAndReturnEndPlus1Token(const Stmt &S)
static bool breakAndReturnEnd(const Stmt &S)
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
static constexpr const char FuncDecl[]