clang-tools  14.0.0git
LexerUtils.cpp
Go to the documentation of this file.
1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 
13 namespace clang {
14 namespace tidy {
15 namespace utils {
16 namespace lexer {
17 
18 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
19  const LangOptions &LangOpts, bool SkipComments) {
20  Token Token;
21  Token.setKind(tok::unknown);
22 
23  Location = Location.getLocWithOffset(-1);
24  if (Location.isInvalid())
25  return Token;
26 
27  auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28  while (Location != StartOfFile) {
29  Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30  if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31  (!SkipComments || !Token.is(tok::comment))) {
32  break;
33  }
34  Location = Location.getLocWithOffset(-1);
35  }
36  return Token;
37 }
38 
39 SourceLocation findPreviousTokenStart(SourceLocation Start,
40  const SourceManager &SM,
41  const LangOptions &LangOpts) {
42  if (Start.isInvalid() || Start.isMacroID())
43  return SourceLocation();
44 
45  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47  return SourceLocation();
48 
49  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50 }
51 
52 SourceLocation findPreviousTokenKind(SourceLocation Start,
53  const SourceManager &SM,
54  const LangOptions &LangOpts,
55  tok::TokenKind TK) {
56  if (Start.isInvalid() || Start.isMacroID())
57  return SourceLocation();
58 
59  while (true) {
60  SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61  if (L.isInvalid() || L.isMacroID())
62  return SourceLocation();
63 
64  Token T;
65  if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66  return SourceLocation();
67 
68  if (T.is(TK))
69  return T.getLocation();
70 
71  Start = L;
72  }
73 }
74 
75 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76  const LangOptions &LangOpts) {
77  return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78 }
79 
80 Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
81  const SourceManager &SM,
82  const LangOptions &LangOpts) {
83  Optional<Token> CurrentToken;
84  do {
85  CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
86  } while (CurrentToken && CurrentToken->is(tok::comment));
87  return CurrentToken;
88 }
89 
91  const SourceManager &SM,
92  const LangOptions &LangOpts) {
93  assert(Range.isValid() && "Invalid Range for relexing provided");
94  SourceLocation Loc = Range.getBegin();
95 
96  while (Loc < Range.getEnd()) {
97  if (Loc.isMacroID())
98  return true;
99 
100  llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
101 
102  if (!Tok)
103  return true;
104 
105  if (Tok->is(tok::hash))
106  return true;
107 
108  Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
109  }
110 
111  return false;
112 }
113 
114 llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
115  CharSourceRange Range,
116  const ASTContext &Context,
117  const SourceManager &SM) {
118  assert((TK == tok::kw_const || TK == tok::kw_volatile ||
119  TK == tok::kw_restrict) &&
120  "TK is not a qualifier keyword");
121  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
122  StringRef File = SM.getBufferData(LocInfo.first);
123  Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
124  File.begin(), File.data() + LocInfo.second, File.end());
125  llvm::Optional<Token> LastMatchBeforeTemplate;
126  llvm::Optional<Token> LastMatchAfterTemplate;
127  bool SawTemplate = false;
128  Token Tok;
129  while (!RawLexer.LexFromRawLexer(Tok) &&
130  Range.getEnd() != Tok.getLocation() &&
131  !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
132  if (Tok.is(tok::raw_identifier)) {
133  IdentifierInfo &Info = Context.Idents.get(
134  StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
135  Tok.setIdentifierInfo(&Info);
136  Tok.setKind(Info.getTokenID());
137  }
138  if (Tok.is(tok::less))
139  SawTemplate = true;
140  else if (Tok.isOneOf(tok::greater, tok::greatergreater))
141  LastMatchAfterTemplate = None;
142  else if (Tok.is(TK)) {
143  if (SawTemplate)
144  LastMatchAfterTemplate = Tok;
145  else
146  LastMatchBeforeTemplate = Tok;
147  }
148  }
149  return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
150  : LastMatchBeforeTemplate;
151 }
152 
153 static bool breakAndReturnEnd(const Stmt &S) {
154  return isa<CompoundStmt, DeclStmt, NullStmt>(S);
155 }
156 
157 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
158  return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
159 }
160 
161 // Given a Stmt which does not include it's semicolon this method returns the
162 // SourceLocation of the semicolon.
163 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
164  const SourceManager &SM,
165  const LangOptions &LangOpts) {
166 
167  if (EndLoc.isMacroID()) {
168  // Assuming EndLoc points to a function call foo within macro F.
169  // This method is supposed to return location of the semicolon within
170  // those macro arguments:
171  // F ( foo() ; )
172  // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
173  const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
174  Optional<Token> NextTok =
175  findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
176 
177  // Was the next token found successfully?
178  // All macro issues are simply resolved by ensuring it's a semicolon.
179  if (NextTok && NextTok->is(tok::TokenKind::semi)) {
180  // Ideally this would return `F` with spelling location `;` (NextTok)
181  // following the examle above. For now simply return NextTok location.
182  return NextTok->getLocation();
183  }
184 
185  // Fallthrough to 'normal handling'.
186  // F ( foo() ) ;
187  // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
188  }
189 
190  Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);
191 
192  // Testing for semicolon again avoids some issues with macros.
193  if (NextTok && NextTok->is(tok::TokenKind::semi))
194  return NextTok->getLocation();
195 
196  return SourceLocation();
197 }
198 
199 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
200  const LangOptions &LangOpts) {
201 
202  const Stmt *LastChild = &S;
203  while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
204  !breakAndReturnEndPlus1Token(*LastChild)) {
205  for (const Stmt *Child : LastChild->children())
206  LastChild = Child;
207  }
208 
209  if (!breakAndReturnEnd(*LastChild) &&
210  breakAndReturnEndPlus1Token(*LastChild))
211  return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
212 
213  return S.getEndLoc();
214 }
215 
216 } // namespace lexer
217 } // namespace utils
218 } // namespace tidy
219 } // namespace clang
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
clang::tidy::utils::lexer::breakAndReturnEndPlus1Token
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:157
clang::tidy::utils::lexer::getSemicolonAfterStmtEndLoc
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:163
Location
Definition: Modularize.cpp:382
clang::tidy::utils::lexer::rangeContainsExpansionsOrDirectives
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:90
clang::tidy::utils::lexer::findPreviousTokenStart
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:39
clang::tidy::utils::lexer::breakAndReturnEnd
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:153
clang::tidy::utils::lexer::getPreviousToken
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:18
clang::tidy::utils::lexer::getUnifiedEndLoc
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:199
LexerUtils.h
Info
FunctionInfo Info
Definition: FunctionSizeCheck.cpp:120
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::tidy::utils::lexer::findNextAnyTokenKind
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:65
clang::tidy::utils::lexer::findNextTerminator
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:75
SM
const SourceManager & SM
Definition: IncludeCleaner.cpp:108
clang::tidy::utils::lexer::getQualifyingToken
llvm::Optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:114
clang::tidy::utils::lexer::findNextTokenSkippingComments
Optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:80
clang::tidy::utils::lexer::findPreviousTokenKind
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:52