extra/doxygen/LexerUtils_8cpp_source.html

//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "LexerUtils.h"

#include "clang/AST/AST.h"

#include "clang/Basic/SourceManager.h"

#include <optional>

#include <utility>


namespace clang::tidy::utils::lexer {


std::pair<Token, SourceLocation>

getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,

                         const LangOptions &LangOpts, bool SkipComments) {

  Token Token;

  Token.setKind(tok::unknown);


  Location = Location.getLocWithOffset(-1);

  if (Location.isInvalid())

    return {Token, Location};


  auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));

  while (Location != StartOfFile) {

    Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);

    if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&

        (!SkipComments || !Token.is(tok::comment))) {

      break;

    }

    Location = Location.getLocWithOffset(-1);

  }

  return {Token, Location};

}


Token getPreviousToken(SourceLocation Location, const SourceManager &SM,

                       const LangOptions &LangOpts, bool SkipComments) {

  auto [Token, Start] =

      getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);

  return Token;

}


SourceLocation findPreviousTokenStart(SourceLocation Start,

                                      const SourceManager &SM,

                                      const LangOptions &LangOpts) {

  if (Start.isInvalid() || Start.isMacroID())

    return {};


  SourceLocation BeforeStart = Start.getLocWithOffset(-1);

  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())

    return {};


  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);

}


SourceLocation findPreviousTokenKind(SourceLocation Start,

                                     const SourceManager &SM,

                                     const LangOptions &LangOpts,

                                     tok::TokenKind TK) {

  if (Start.isInvalid() || Start.isMacroID())

    return {};


  while (true) {

    SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);

    if (L.isInvalid() || L.isMacroID())

      return {};


    Token T;

    if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))

      return {};


    if (T.is(TK))

      return T.getLocation();


    Start = L;

  }

}


SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,

                                  const LangOptions &LangOpts) {

  return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);

}


std::optional<Token>

findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,

                               const LangOptions &LangOpts) {

  // `Lexer::findNextToken` will ignore comment

  if (Start.isMacroID())

    return std::nullopt;

  Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);

  // Break down the source location.

  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);

  bool InvalidTemp = false;

  StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);

  if (InvalidTemp)

    return std::nullopt;

  // Lex from the start of the given location.

  Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),

          File.data() + LocInfo.second, File.end());

  L.SetCommentRetentionState(true);

  // Find the token.

  Token Tok;

  L.LexFromRawLexer(Tok);

  return Tok;

}


std::optional<Token>

findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,

                              const LangOptions &LangOpts) {

  while (Start.isValid()) {

    std::optional<Token> CurrentToken =

        Lexer::findNextToken(Start, SM, LangOpts);

    if (!CurrentToken || !CurrentToken->is(tok::comment))

      return CurrentToken;


    Start = CurrentToken->getLocation();

  }


  return std::nullopt;

}


bool rangeContainsExpansionsOrDirectives(SourceRange Range,

                                         const SourceManager &SM,

                                         const LangOptions &LangOpts) {

  assert(Range.isValid() && "Invalid Range for relexing provided");

  SourceLocation Loc = Range.getBegin();


  while (Loc <= Range.getEnd()) {

    if (Loc.isMacroID())

      return true;


    std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);


    if (!Tok)

      return true;


    if (Tok->is(tok::hash))

      return true;


    Loc = Tok->getLocation();

  }


  return false;

}


std::optional<Token> getQualifyingToken(tok::TokenKind TK,

                                        CharSourceRange Range,

                                        const ASTContext &Context,

                                        const SourceManager &SM) {

  assert((TK == tok::kw_const || TK == tok::kw_volatile ||

          TK == tok::kw_restrict) &&

         "TK is not a qualifier keyword");

  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());

  StringRef File = SM.getBufferData(LocInfo.first);

  Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),

                 File.begin(), File.data() + LocInfo.second, File.end());

  std::optional<Token> LastMatchBeforeTemplate;

  std::optional<Token> LastMatchAfterTemplate;

  bool SawTemplate = false;

  Token Tok;

  while (!RawLexer.LexFromRawLexer(Tok) &&

         Range.getEnd() != Tok.getLocation() &&

         !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {

    if (Tok.is(tok::raw_identifier)) {

      IdentifierInfo &Info = Context.Idents.get(

          StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));

      Tok.setIdentifierInfo(&Info);

      Tok.setKind(Info.getTokenID());

    }

    if (Tok.is(tok::less))

      SawTemplate = true;

    else if (Tok.isOneOf(tok::greater, tok::greatergreater))

      LastMatchAfterTemplate = std::nullopt;

    else if (Tok.is(TK)) {

      if (SawTemplate)

        LastMatchAfterTemplate = Tok;

      else

        LastMatchBeforeTemplate = Tok;

    }

  }

  return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate

                                                : LastMatchBeforeTemplate;

}


static bool breakAndReturnEnd(const Stmt &S) {

  return isa<CompoundStmt, DeclStmt, NullStmt>(S);

}


static bool breakAndReturnEndPlus1Token(const Stmt &S) {

  return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,

             SEHLeaveStmt>(S);

}


// Given a Stmt which does not include it's semicolon this method returns the

// SourceLocation of the semicolon.

static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,

                                                  const SourceManager &SM,

                                                  const LangOptions &LangOpts) {


  if (EndLoc.isMacroID()) {

    // Assuming EndLoc points to a function call foo within macro F.

    // This method is supposed to return location of the semicolon within

    // those macro arguments:

    //  F     (      foo()               ;   )

    //  ^ EndLoc         ^ SpellingLoc   ^ next token of SpellingLoc

    const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);

    std::optional<Token> NextTok =

        findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);


    // Was the next token found successfully?

    // All macro issues are simply resolved by ensuring it's a semicolon.

    if (NextTok && NextTok->is(tok::TokenKind::semi)) {

      // Ideally this would return `F` with spelling location `;` (NextTok)

      // following the example above. For now simply return NextTok location.

      return NextTok->getLocation();

    }


    // Fallthrough to 'normal handling'.

    //  F     (      foo()              ) ;

    //  ^ EndLoc         ^ SpellingLoc  ) ^ next token of EndLoc

  }


  std::optional<Token> NextTok =

      findNextTokenSkippingComments(EndLoc, SM, LangOpts);


  // Testing for semicolon again avoids some issues with macros.

  if (NextTok && NextTok->is(tok::TokenKind::semi))

    return NextTok->getLocation();


  return {};

}


SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,

                                const LangOptions &LangOpts) {


  const Stmt *LastChild = &S;

  while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&

         !breakAndReturnEndPlus1Token(*LastChild)) {

    for (const Stmt *Child : LastChild->children())

      LastChild = Child;

  }


  if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))

    return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);


  return S.getEndLoc();

}


SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,

                                               const SourceManager &SM) {

  if (!FuncDecl)

    return {};


  const LangOptions &LangOpts = FuncDecl->getLangOpts();


  if (FuncDecl->getNumParams() == 0) {

    // Start at the beginning of the function declaration, and find the closing

    // parenthesis after which we would place the noexcept specifier.

    Token CurrentToken;

    SourceLocation CurrentLocation = FuncDecl->getBeginLoc();

    while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,

                               true)) {

      if (CurrentToken.is(tok::r_paren))

        return CurrentLocation.getLocWithOffset(1);


      CurrentLocation = CurrentToken.getEndLoc();

    }


    // Failed to find the closing parenthesis, so just return an invalid

    // SourceLocation.

    return {};

  }


  // FunctionDecl with parameters

  const SourceLocation NoexceptLoc =

      FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();

  if (NoexceptLoc.isValid())

    return Lexer::findLocationAfterToken(

        NoexceptLoc, tok::r_paren, SM, LangOpts,

        /*SkipTrailingWhitespaceAndNewLine=*/true);


  return {};

}


} // namespace clang::tidy::utils::lexer

Info
FunctionInfo Info
Definition: FunctionSizeCheck.cpp:119

Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38

Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:44

LexerUtils.h

clang::tidy::utils::lexer
Definition: LexerUtils.cpp:15

clang::tidy::utils::lexer::getLocationForNoexceptSpecifier
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM)
For a given FunctionDecl returns the location where you would need to place the noexcept specifier.
Definition: LexerUtils.cpp:252

clang::tidy::utils::lexer::getPreviousTokenAndStart
std::pair< Token, SourceLocation > getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Definition: LexerUtils.cpp:18

clang::tidy::utils::lexer::getUnifiedEndLoc
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts)
Stmt->getEndLoc does not always behave the same way depending on Token type.
Definition: LexerUtils.cpp:236

clang::tidy::utils::lexer::rangeContainsExpansionsOrDirectives
bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Re-lex the provide Range and return false if either a macro spans multiple tokens,...
Definition: LexerUtils.cpp:125

clang::tidy::utils::lexer::findNextTerminator
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:82

clang::tidy::utils::lexer::findNextAnyTokenKind
SourceLocation findNextAnyTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, TokenKind TK, TokenKinds... TKs)
Definition: LexerUtils.h:68

clang::tidy::utils::lexer::getPreviousToken
Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments)
Returns previous token or tok::unknown if not found.
Definition: LexerUtils.cpp:39

clang::tidy::utils::lexer::findNextTokenSkippingComments
std::optional< Token > findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:111

clang::tidy::utils::lexer::findPreviousTokenStart
SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:46

clang::tidy::utils::lexer::getSemicolonAfterStmtEndLoc
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:199

clang::tidy::utils::lexer::breakAndReturnEndPlus1Token
static bool breakAndReturnEndPlus1Token(const Stmt &S)
Definition: LexerUtils.cpp:192

clang::tidy::utils::lexer::breakAndReturnEnd
static bool breakAndReturnEnd(const Stmt &S)
Definition: LexerUtils.cpp:188

clang::tidy::utils::lexer::findPreviousTokenKind
SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
Definition: LexerUtils.cpp:59

clang::tidy::utils::lexer::getQualifyingToken
std::optional< Token > getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM)
Assuming that Range spans a CVR-qualified type, returns the token in Range that is responsible for th...
Definition: LexerUtils.cpp:149

clang::tidy::utils::lexer::findNextTokenIncludingComments
std::optional< Token > findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
Definition: LexerUtils.cpp:88

Location
Definition: Modularize.cpp:382