clang-tools  17.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 #include <optional>
13 
15 
16 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
17  const LangOptions &LangOpts, bool SkipComments) {
18  Token Token;
19  Token.setKind(tok::unknown);
20 
21  Location = Location.getLocWithOffset(-1);
22  if (Location.isInvalid())
23  return Token;
24 
25  auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
26  while (Location != StartOfFile) {
27  Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
28  if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
29  (!SkipComments || !Token.is(tok::comment))) {
30  break;
31  }
32  Location = Location.getLocWithOffset(-1);
33  }
34  return Token;
35 }
36 
37 SourceLocation findPreviousTokenStart(SourceLocation Start,
38  const SourceManager &SM,
39  const LangOptions &LangOpts) {
40  if (Start.isInvalid() || Start.isMacroID())
41  return SourceLocation();
42 
43  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
44  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
45  return SourceLocation();
46 
47  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
48 }
49 
50 SourceLocation findPreviousTokenKind(SourceLocation Start,
51  const SourceManager &SM,
52  const LangOptions &LangOpts,
53  tok::TokenKind TK) {
54  if (Start.isInvalid() || Start.isMacroID())
55  return SourceLocation();
56 
57  while (true) {
58  SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
59  if (L.isInvalid() || L.isMacroID())
60  return SourceLocation();
61 
62  Token T;
63  if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
64  return SourceLocation();
65 
66  if (T.is(TK))
67  return T.getLocation();
68 
69  Start = L;
70  }
71 }
72 
73 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
74  const LangOptions &LangOpts) {
75  return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
76 }
77 
78 std::optional<Token>
79 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
80  const LangOptions &LangOpts) {
81  std::optional<Token> CurrentToken;
82  do {
83  CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
84  } while (CurrentToken && CurrentToken->is(tok::comment));
85  return CurrentToken;
86 }
87 
89  const SourceManager &SM,
90  const LangOptions &LangOpts) {
91  assert(Range.isValid() && "Invalid Range for relexing provided");
92  SourceLocation Loc = Range.getBegin();
93 
94  while (Loc < Range.getEnd()) {
95  if (Loc.isMacroID())
96  return true;
97 
98  std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
99 
100  if (!Tok)
101  return true;
102 
103  if (Tok->is(tok::hash))
104  return true;
105 
106  Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
107  }
108 
109  return false;
110 }
111 
112 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
113  CharSourceRange Range,
114  const ASTContext &Context,
115  const SourceManager &SM) {
116  assert((TK == tok::kw_const || TK == tok::kw_volatile ||
117  TK == tok::kw_restrict) &&
118  "TK is not a qualifier keyword");
119  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
120  StringRef File = SM.getBufferData(LocInfo.first);
121  Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
122  File.begin(), File.data() + LocInfo.second, File.end());
123  std::optional<Token> LastMatchBeforeTemplate;
124  std::optional<Token> LastMatchAfterTemplate;
125  bool SawTemplate = false;
126  Token Tok;
127  while (!RawLexer.LexFromRawLexer(Tok) &&
128  Range.getEnd() != Tok.getLocation() &&
129  !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
130  if (Tok.is(tok::raw_identifier)) {
131  IdentifierInfo &Info = Context.Idents.get(
132  StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
133  Tok.setIdentifierInfo(&Info);
134  Tok.setKind(Info.getTokenID());
135  }
136  if (Tok.is(tok::less))
137  SawTemplate = true;
138  else if (Tok.isOneOf(tok::greater, tok::greatergreater))
139  LastMatchAfterTemplate = std::nullopt;
140  else if (Tok.is(TK)) {
141  if (SawTemplate)
142  LastMatchAfterTemplate = Tok;
143  else
144  LastMatchBeforeTemplate = Tok;
145  }
146  }
147  return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
148  : LastMatchBeforeTemplate;
149 }
150 
151 static bool breakAndReturnEnd(const Stmt &S) {
152  return isa<CompoundStmt, DeclStmt, NullStmt>(S);
153 }
154 
155 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
156  return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
157 }
158 
159 // Given a Stmt which does not include it's semicolon this method returns the
160 // SourceLocation of the semicolon.
161 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
162  const SourceManager &SM,
163  const LangOptions &LangOpts) {
164 
165  if (EndLoc.isMacroID()) {
166  // Assuming EndLoc points to a function call foo within macro F.
167  // This method is supposed to return location of the semicolon within
168  // those macro arguments:
169  // F ( foo() ; )
170  // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
171  const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
172  std::optional<Token> NextTok =
173  findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
174 
175  // Was the next token found successfully?
176  // All macro issues are simply resolved by ensuring it's a semicolon.
177  if (NextTok && NextTok->is(tok::TokenKind::semi)) {
178  // Ideally this would return `F` with spelling location `;` (NextTok)
179  // following the example above. For now simply return NextTok location.
180  return NextTok->getLocation();
181  }
182 
183  // Fallthrough to 'normal handling'.
184  // F ( foo() ) ;
185  // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
186  }
187 
188  std::optional<Token> NextTok =
190 
191  // Testing for semicolon again avoids some issues with macros.
192  if (NextTok && NextTok->is(tok::TokenKind::semi))
193  return NextTok->getLocation();
194 
195  return SourceLocation();
196 }
197 
198 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
199  const LangOptions &LangOpts) {
200 
201  const Stmt *LastChild = &S;
202  while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
203  !breakAndReturnEndPlus1Token(*LastChild)) {
204  for (const Stmt *Child : LastChild->children())
205  LastChild = Child;
206  }
207 
208  if (!breakAndReturnEnd(*LastChild) &&
209  breakAndReturnEndPlus1Token(*LastChild))
210  return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
211 
212  return S.getEndLoc();
213 }
214 
215 } // namespace clang::tidy::utils::lexer
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:37
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:43
clang::tidy::utils::lexer::breakAndReturnEndPlus1Token
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:155
clang::tidy::utils::lexer::getSemicolonAfterStmtEndLoc
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:161
Location
Definition: Modularize.cpp:381
clang::tidy::utils::lexer::rangeContainsExpansionsOrDirectives
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:88
clang::tidy::utils::lexer::findNextTokenSkippingComments
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:79
clang::tidy::utils::lexer::findPreviousTokenStart
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:37
clang::tidy::utils::lexer::breakAndReturnEnd
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:151
clang::tidy::utils::lexer::getPreviousToken
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:16
clang::tidy::utils::lexer::getUnifiedEndLoc
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:198
LexerUtils.h
Info
FunctionInfo Info
Definition: FunctionSizeCheck.cpp:119
clang::tidy::utils::lexer::getQualifyingToken
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:112
clang::tidy::utils::lexer::findNextAnyTokenKind
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:64
LangOpts
const LangOptions * LangOpts
Definition: ExtractFunction.cpp:374
clang::tidy::utils::lexer::findNextTerminator
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:73
clang::tidy::utils::lexer
Definition: LexerUtils.cpp:14
clang::tidy::utils::lexer::findPreviousTokenKind
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:50