doxygen/CommentParser_8cpp_source.html

//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "clang/AST/CommentParser.h"

#include "clang/AST/CommentCommandTraits.h"

#include "clang/AST/CommentSema.h"

#include "clang/Basic/CharInfo.h"

#include "clang/Basic/DiagnosticComment.h"

#include "clang/Basic/SourceManager.h"

#include "llvm/Support/ErrorHandling.h"


namespace clang {


static inline bool isWhitespace(llvm::StringRef S) {

  for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {

    if (!isWhitespace(*I))

      return false;

  }

  return true;

}


namespace comments {


/// Re-lexes a sequence of tok::text tokens.

class TextTokenRetokenizer {

  llvm::BumpPtrAllocator &Allocator;

  Parser &P;


  /// This flag is set when there are no more tokens we can fetch from lexer.

  bool NoMoreInterestingTokens;


  /// Token buffer: tokens we have processed and lookahead.

  SmallVector<Token, 16> Toks;


  /// A position in \c Toks.

  struct Position {

    const char *BufferStart;

    const char *BufferEnd;

    const char *BufferPtr;

    SourceLocation BufferStartLoc;

    unsigned CurToken;

  };


  /// Current position in Toks.

  Position Pos;


  bool isEnd() const {

    return Pos.CurToken >= Toks.size();

  }


  /// Sets up the buffer pointers to point to current token.

  void setupBuffer() {

    assert(!isEnd());

    const Token &Tok = Toks[Pos.CurToken];


    Pos.BufferStart = Tok.getText().begin();

    Pos.BufferEnd = Tok.getText().end();

    Pos.BufferPtr = Pos.BufferStart;

    Pos.BufferStartLoc = Tok.getLocation();

  }


  SourceLocation getSourceLocation() const {

    const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;

    return Pos.BufferStartLoc.getLocWithOffset(CharNo);

  }


  char peek() const {

    assert(!isEnd());

    assert(Pos.BufferPtr != Pos.BufferEnd);

    return *Pos.BufferPtr;

  }


  void consumeChar() {

    assert(!isEnd());

    assert(Pos.BufferPtr != Pos.BufferEnd);

    Pos.BufferPtr++;

    if (Pos.BufferPtr == Pos.BufferEnd) {

      Pos.CurToken++;

      if (isEnd() && !addToken())

        return;


      assert(!isEnd());

      setupBuffer();

    }

  }


  /// Extract a template type

  bool lexTemplate(SmallString<32> &WordText) {

    unsigned BracketCount = 0;

    while (!isEnd()) {

      const char C = peek();

      WordText.push_back(C);

      consumeChar();

      switch (C) {

      case '<': {

        BracketCount++;

        break;

      }

      case '>': {

        BracketCount--;

        if (!BracketCount)

          return true;

        break;

      }

      default:

        break;

      }

    }

    return false;

  }


  /// Add a token.

  /// Returns true on success, false if there are no interesting tokens to

  /// fetch from lexer.

  bool addToken() {

    if (NoMoreInterestingTokens)

      return false;


    if (P.Tok.is(tok::newline)) {

      // If we see a single newline token between text tokens, skip it.

      Token Newline = P.Tok;

      P.consumeToken();

      if (P.Tok.isNot(tok::text)) {

        P.putBack(Newline);

        NoMoreInterestingTokens = true;

        return false;

      }

    }

    if (P.Tok.isNot(tok::text)) {

      NoMoreInterestingTokens = true;

      return false;

    }


    Toks.push_back(P.Tok);

    P.consumeToken();

    if (Toks.size() == 1)

      setupBuffer();

    return true;

  }


  void consumeWhitespace() {

    while (!isEnd()) {

      if (isWhitespace(peek()))

        consumeChar();

      else

        break;

    }

  }


  void formTokenWithChars(Token &Result,

                          SourceLocation Loc,

                          const char *TokBegin,

                          unsigned TokLength,

                          StringRef Text) {

    Result.setLocation(Loc);

    Result.setKind(tok::text);

    Result.setLength(TokLength);

#ifndef NDEBUG

    Result.TextPtr = "<UNSET>";

    Result.IntVal = 7;

#endif

    Result.setText(Text);

  }


public:

  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):

      Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {

    Pos.CurToken = 0;

    addToken();

  }


  /// Extract a type argument

  bool lexType(Token &Tok) {

    if (isEnd())

      return false;


    // Save current position in case we need to rollback because the type is

    // empty.

    Position SavedPos = Pos;


    // Consume any leading whitespace.

    consumeWhitespace();

    SmallString<32> WordText;

    const char *WordBegin = Pos.BufferPtr;

    SourceLocation Loc = getSourceLocation();


    while (!isEnd()) {

      const char C = peek();

      // For non-whitespace characters we check if it's a template or otherwise

      // continue reading the text into a word.

      if (!isWhitespace(C)) {

        if (C == '<') {

          if (!lexTemplate(WordText))

            return false;

        } else {

          WordText.push_back(C);

          consumeChar();

        }

      } else {

        consumeChar();

        break;

      }

    }


    const unsigned Length = WordText.size();

    if (Length == 0) {

      Pos = SavedPos;

      return false;

    }


    char *TextPtr = Allocator.Allocate<char>(Length + 1);


    memcpy(TextPtr, WordText.c_str(), Length + 1);

    StringRef Text = StringRef(TextPtr, Length);


    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);

    return true;

  }


  // Check if this line starts with @par or \par

  bool startsWithParCommand() {

    unsigned Offset = 1;


    // Skip all whitespace characters at the beginning.

    // This needs to backtrack because Pos has already advanced past the

    // actual \par or @par command by the time this function is called.

    while (isWhitespace(*(Pos.BufferPtr - Offset)))

      Offset++;


    // Once we've reached the whitespace, backtrack and check if the previous

    // four characters are \par or @par.

    llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);

    return LineStart.starts_with("\\par") || LineStart.starts_with("@par");

  }


  /// Extract a par command argument-header.

  bool lexParHeading(Token &Tok) {

    if (isEnd())

      return false;


    Position SavedPos = Pos;


    consumeWhitespace();

    SmallString<32> WordText;

    const char *WordBegin = Pos.BufferPtr;

    SourceLocation Loc = getSourceLocation();


    if (!startsWithParCommand())

      return false;


    // Read until the end of this token, which is effectively the end of the

    // line. This gets us the content of the par header, if there is one.

    while (!isEnd()) {

      WordText.push_back(peek());

      if (Pos.BufferPtr + 1 == Pos.BufferEnd) {

        consumeChar();

        break;

      }

      consumeChar();

    }


    unsigned Length = WordText.size();

    if (Length == 0) {

      Pos = SavedPos;

      return false;

    }


    char *TextPtr = Allocator.Allocate<char>(Length + 1);


    memcpy(TextPtr, WordText.c_str(), Length + 1);

    StringRef Text = StringRef(TextPtr, Length);


    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);

    return true;

  }


  /// Extract a word -- sequence of non-whitespace characters.

  bool lexWord(Token &Tok) {

    if (isEnd())

      return false;


    Position SavedPos = Pos;


    consumeWhitespace();

    SmallString<32> WordText;

    const char *WordBegin = Pos.BufferPtr;

    SourceLocation Loc = getSourceLocation();

    while (!isEnd()) {

      const char C = peek();

      if (!isWhitespace(C)) {

        WordText.push_back(C);

        consumeChar();

      } else

        break;

    }

    const unsigned Length = WordText.size();

    if (Length == 0) {

      Pos = SavedPos;

      return false;

    }


    char *TextPtr = Allocator.Allocate<char>(Length + 1);


    memcpy(TextPtr, WordText.c_str(), Length + 1);

    StringRef Text = StringRef(TextPtr, Length);


    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);

    return true;

  }


  bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {

    if (isEnd())

      return false;


    Position SavedPos = Pos;


    consumeWhitespace();

    SmallString<32> WordText;

    const char *WordBegin = Pos.BufferPtr;

    SourceLocation Loc = getSourceLocation();

    bool Error = false;

    if (!isEnd()) {

      const char C = peek();

      if (C == OpenDelim) {

        WordText.push_back(C);

        consumeChar();

      } else

        Error = true;

    }

    char C = '\0';

    while (!Error && !isEnd()) {

      C = peek();

      WordText.push_back(C);

      consumeChar();

      if (C == CloseDelim)

        break;

    }

    if (!Error && C != CloseDelim)

      Error = true;


    if (Error) {

      Pos = SavedPos;

      return false;

    }


    const unsigned Length = WordText.size();

    char *TextPtr = Allocator.Allocate<char>(Length + 1);


    memcpy(TextPtr, WordText.c_str(), Length + 1);

    StringRef Text = StringRef(TextPtr, Length);


    formTokenWithChars(Tok, Loc, WordBegin,

                       Pos.BufferPtr - WordBegin, Text);

    return true;

  }


  /// Put back tokens that we didn't consume.

  void putBackLeftoverTokens() {

    if (isEnd())

      return;


    bool HavePartialTok = false;

    Token PartialTok;

    if (Pos.BufferPtr != Pos.BufferStart) {

      formTokenWithChars(PartialTok, getSourceLocation(),

                         Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,

                         StringRef(Pos.BufferPtr,

                                   Pos.BufferEnd - Pos.BufferPtr));

      HavePartialTok = true;

      Pos.CurToken++;

    }


    P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));

    Pos.CurToken = Toks.size();


    if (HavePartialTok)

      P.putBack(PartialTok);

  }

};


Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,

               const SourceManager &SourceMgr, DiagnosticsEngine &Diags,

               const CommandTraits &Traits):

    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),

    Traits(Traits) {

  consumeToken();

}


void Parser::parseParamCommandArgs(ParamCommandComment *PC,

                                   TextTokenRetokenizer &Retokenizer) {

  Token Arg;

  // Check if argument looks like direction specification: [dir]

  // e.g., [in], [out], [in,out]

  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))

    S.actOnParamCommandDirectionArg(PC,

                                    Arg.getLocation(),

                                    Arg.getEndLocation(),

                                    Arg.getText());


  if (Retokenizer.lexWord(Arg))

    S.actOnParamCommandParamNameArg(PC,

                                    Arg.getLocation(),

                                    Arg.getEndLocation(),

                                    Arg.getText());

}


void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,

                                    TextTokenRetokenizer &Retokenizer) {

  Token Arg;

  if (Retokenizer.lexWord(Arg))

    S.actOnTParamCommandParamNameArg(TPC,

                                     Arg.getLocation(),

                                     Arg.getEndLocation(),

                                     Arg.getText());

}


ArrayRef<Comment::Argument>

Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {

  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))

      Comment::Argument[NumArgs];

  unsigned ParsedArgs = 0;

  Token Arg;

  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {

    Args[ParsedArgs] = Comment::Argument{

        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};

    ParsedArgs++;

  }


  return llvm::ArrayRef(Args, ParsedArgs);

}


ArrayRef<Comment::Argument>

Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,

                              unsigned NumArgs) {

  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))

      Comment::Argument[NumArgs];

  unsigned ParsedArgs = 0;

  Token Arg;


  while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {

    Args[ParsedArgs] = Comment::Argument{

        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};

    ParsedArgs++;

  }


  return llvm::ArrayRef(Args, ParsedArgs);

}


ArrayRef<Comment::Argument>

Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,

                            unsigned NumArgs) {

  assert(NumArgs > 0);

  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))

      Comment::Argument[NumArgs];

  unsigned ParsedArgs = 0;

  Token Arg;


  while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {

    Args[ParsedArgs] = Comment::Argument{

        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};

    ParsedArgs++;

  }


  return llvm::ArrayRef(Args, ParsedArgs);

}


BlockCommandComment *Parser::parseBlockCommand() {

  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));


  ParamCommandComment *PC = nullptr;

  TParamCommandComment *TPC = nullptr;

  BlockCommandComment *BC = nullptr;

  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());

  CommandMarkerKind CommandMarker =

      Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;

  if (Info->IsParamCommand) {

    PC = S.actOnParamCommandStart(Tok.getLocation(),

                                  Tok.getEndLocation(),

                                  Tok.getCommandID(),

                                  CommandMarker);

  } else if (Info->IsTParamCommand) {

    TPC = S.actOnTParamCommandStart(Tok.getLocation(),

                                    Tok.getEndLocation(),

                                    Tok.getCommandID(),

                                    CommandMarker);

  } else {

    BC = S.actOnBlockCommandStart(Tok.getLocation(),

                                  Tok.getEndLocation(),

                                  Tok.getCommandID(),

                                  CommandMarker);

  }

  consumeToken();


  if (isTokBlockCommand()) {

    // Block command ahead.  We can't nest block commands, so pretend that this

    // command has an empty argument.

    ParagraphComment *Paragraph = S.actOnParagraphComment({});

    if (PC) {

      S.actOnParamCommandFinish(PC, Paragraph);

      return PC;

    } else if (TPC) {

      S.actOnTParamCommandFinish(TPC, Paragraph);

      return TPC;

    } else {

      S.actOnBlockCommandFinish(BC, Paragraph);

      return BC;

    }

  }


  if (PC || TPC || Info->NumArgs > 0) {

    // In order to parse command arguments we need to retokenize a few

    // following text tokens.

    TextTokenRetokenizer Retokenizer(Allocator, *this);


    if (PC)

      parseParamCommandArgs(PC, Retokenizer);

    else if (TPC)

      parseTParamCommandArgs(TPC, Retokenizer);

    else if (Info->IsThrowsCommand)

      S.actOnBlockCommandArgs(

          BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));

    else if (Info->IsParCommand)

      S.actOnBlockCommandArgs(BC,

                              parseParCommandArgs(Retokenizer, Info->NumArgs));

    else

      S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));


    Retokenizer.putBackLeftoverTokens();

  }


  // If there's a block command ahead, we will attach an empty paragraph to

  // this command.

  bool EmptyParagraph = false;

  if (isTokBlockCommand())

    EmptyParagraph = true;

  else if (Tok.is(tok::newline)) {

    Token PrevTok = Tok;

    consumeToken();

    EmptyParagraph = isTokBlockCommand();

    putBack(PrevTok);

  }


  ParagraphComment *Paragraph;

  if (EmptyParagraph)

    Paragraph = S.actOnParagraphComment({});

  else {

    BlockContentComment *Block = parseParagraphOrBlockCommand();

    // Since we have checked for a block command, we should have parsed a

    // paragraph.

    Paragraph = cast<ParagraphComment>(Block);

  }


  if (PC) {

    S.actOnParamCommandFinish(PC, Paragraph);

    return PC;

  } else if (TPC) {

    S.actOnTParamCommandFinish(TPC, Paragraph);

    return TPC;

  } else {

    S.actOnBlockCommandFinish(BC, Paragraph);

    return BC;

  }

}


InlineCommandComment *Parser::parseInlineCommand() {

  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));

  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());


  const Token CommandTok = Tok;

  consumeToken();


  TextTokenRetokenizer Retokenizer(Allocator, *this);

  ArrayRef<Comment::Argument> Args =

      parseCommandArgs(Retokenizer, Info->NumArgs);


  InlineCommandComment *IC = S.actOnInlineCommand(

      CommandTok.getLocation(), CommandTok.getEndLocation(),

      CommandTok.getCommandID(), Args);


  if (Args.size() < Info->NumArgs) {

    Diag(CommandTok.getEndLocation().getLocWithOffset(1),

         diag::warn_doc_inline_command_not_enough_arguments)

        << CommandTok.is(tok::at_command) << Info->Name << Args.size()

        << Info->NumArgs

        << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());

  }


  Retokenizer.putBackLeftoverTokens();


  return IC;

}


HTMLStartTagComment *Parser::parseHTMLStartTag() {

  assert(Tok.is(tok::html_start_tag));

  HTMLStartTagComment *HST =

      S.actOnHTMLStartTagStart(Tok.getLocation(),

                               Tok.getHTMLTagStartName());

  consumeToken();


  SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;

  while (true) {

    switch (Tok.getKind()) {

    case tok::html_ident: {

      Token Ident = Tok;

      consumeToken();

      if (Tok.isNot(tok::html_equals)) {

        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),

                                                       Ident.getHTMLIdent()));

        continue;

      }

      Token Equals = Tok;

      consumeToken();

      if (Tok.isNot(tok::html_quoted_string)) {

        Diag(Tok.getLocation(),

             diag::warn_doc_html_start_tag_expected_quoted_string)

          << SourceRange(Equals.getLocation());

        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),

                                                       Ident.getHTMLIdent()));

        while (Tok.is(tok::html_equals) ||

               Tok.is(tok::html_quoted_string))

          consumeToken();

        continue;

      }

      Attrs.push_back(HTMLStartTagComment::Attribute(

                              Ident.getLocation(),

                              Ident.getHTMLIdent(),

                              Equals.getLocation(),

                              SourceRange(Tok.getLocation(),

                                          Tok.getEndLocation()),

                              Tok.getHTMLQuotedString()));

      consumeToken();

      continue;

    }


    case tok::html_greater:

      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),

                                Tok.getLocation(),

                                /* IsSelfClosing = */ false);

      consumeToken();

      return HST;


    case tok::html_slash_greater:

      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),

                                Tok.getLocation(),

                                /* IsSelfClosing = */ true);

      consumeToken();

      return HST;


    case tok::html_equals:

    case tok::html_quoted_string:

      Diag(Tok.getLocation(),

           diag::warn_doc_html_start_tag_expected_ident_or_greater);

      while (Tok.is(tok::html_equals) ||

             Tok.is(tok::html_quoted_string))

        consumeToken();

      if (Tok.is(tok::html_ident) ||

          Tok.is(tok::html_greater) ||

          Tok.is(tok::html_slash_greater))

        continue;


      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),

                                SourceLocation(),

                                /* IsSelfClosing = */ false);

      return HST;


    default:

      // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.

      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),

                                SourceLocation(),

                                /* IsSelfClosing = */ false);

      bool StartLineInvalid;

      const unsigned StartLine = SourceMgr.getPresumedLineNumber(

                                                  HST->getLocation(),

                                                  &StartLineInvalid);

      bool EndLineInvalid;

      const unsigned EndLine = SourceMgr.getPresumedLineNumber(

                                                  Tok.getLocation(),

                                                  &EndLineInvalid);

      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)

        Diag(Tok.getLocation(),

             diag::warn_doc_html_start_tag_expected_ident_or_greater)

          << HST->getSourceRange();

      else {

        Diag(Tok.getLocation(),

             diag::warn_doc_html_start_tag_expected_ident_or_greater);

        Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)

          << HST->getSourceRange();

      }

      return HST;

    }

  }

}


HTMLEndTagComment *Parser::parseHTMLEndTag() {

  assert(Tok.is(tok::html_end_tag));

  Token TokEndTag = Tok;

  consumeToken();

  SourceLocation Loc;

  if (Tok.is(tok::html_greater)) {

    Loc = Tok.getLocation();

    consumeToken();

  }


  return S.actOnHTMLEndTag(TokEndTag.getLocation(),

                           Loc,

                           TokEndTag.getHTMLTagEndName());

}


BlockContentComment *Parser::parseParagraphOrBlockCommand() {

  SmallVector<InlineContentComment *, 8> Content;


  while (true) {

    switch (Tok.getKind()) {

    case tok::verbatim_block_begin:

    case tok::verbatim_line_name:

    case tok::eof:

      break; // Block content or EOF ahead, finish this parapgaph.


    case tok::unknown_command:

      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),

                                              Tok.getEndLocation(),

                                              Tok.getUnknownCommandName()));

      consumeToken();

      continue;


    case tok::backslash_command:

    case tok::at_command: {

      const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());

      if (Info->IsBlockCommand) {

        if (Content.size() == 0)

          return parseBlockCommand();

        break; // Block command ahead, finish this parapgaph.

      }

      if (Info->IsVerbatimBlockEndCommand) {

        Diag(Tok.getLocation(),

             diag::warn_verbatim_block_end_without_start)

          << Tok.is(tok::at_command)

          << Info->Name

          << SourceRange(Tok.getLocation(), Tok.getEndLocation());

        consumeToken();

        continue;

      }

      if (Info->IsUnknownCommand) {

        Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),

                                                Tok.getEndLocation(),

                                                Info->getID()));

        consumeToken();

        continue;

      }

      assert(Info->IsInlineCommand);

      Content.push_back(parseInlineCommand());

      continue;

    }


    case tok::newline: {

      consumeToken();

      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {

        consumeToken();

        break; // Two newlines -- end of paragraph.

      }

      // Also allow [tok::newline, tok::text, tok::newline] if the middle

      // tok::text is just whitespace.

      if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {

        Token WhitespaceTok = Tok;

        consumeToken();

        if (Tok.is(tok::newline) || Tok.is(tok::eof)) {

          consumeToken();

          break;

        }

        // We have [tok::newline, tok::text, non-newline].  Put back tok::text.

        putBack(WhitespaceTok);

      }

      if (Content.size() > 0)

        Content.back()->addTrailingNewline();

      continue;

    }


    // Don't deal with HTML tag soup now.

    case tok::html_start_tag:

      Content.push_back(parseHTMLStartTag());

      continue;


    case tok::html_end_tag:

      Content.push_back(parseHTMLEndTag());

      continue;


    case tok::text:

      Content.push_back(S.actOnText(Tok.getLocation(),

                                    Tok.getEndLocation(),

                                    Tok.getText()));

      consumeToken();

      continue;


    case tok::verbatim_block_line:

    case tok::verbatim_block_end:

    case tok::verbatim_line_text:

    case tok::html_ident:

    case tok::html_equals:

    case tok::html_quoted_string:

    case tok::html_greater:

    case tok::html_slash_greater:

      llvm_unreachable("should not see this token");

    }

    break;

  }


  return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));

}


VerbatimBlockComment *Parser::parseVerbatimBlock() {

  assert(Tok.is(tok::verbatim_block_begin));


  VerbatimBlockComment *VB =

      S.actOnVerbatimBlockStart(Tok.getLocation(),

                                Tok.getVerbatimBlockID());

  consumeToken();


  // Don't create an empty line if verbatim opening command is followed

  // by a newline.

  if (Tok.is(tok::newline))

    consumeToken();


  SmallVector<VerbatimBlockLineComment *, 8> Lines;

  while (Tok.is(tok::verbatim_block_line) ||

         Tok.is(tok::newline)) {

    VerbatimBlockLineComment *Line;

    if (Tok.is(tok::verbatim_block_line)) {

      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),

                                      Tok.getVerbatimBlockText());

      consumeToken();

      if (Tok.is(tok::newline)) {

        consumeToken();

      }

    } else {

      // Empty line, just a tok::newline.

      Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");

      consumeToken();

    }

    Lines.push_back(Line);

  }


  if (Tok.is(tok::verbatim_block_end)) {

    const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());

    S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,

                               S.copyArray(llvm::ArrayRef(Lines)));

    consumeToken();

  } else {

    // Unterminated \\verbatim block

    S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",

                               S.copyArray(llvm::ArrayRef(Lines)));

  }


  return VB;

}


VerbatimLineComment *Parser::parseVerbatimLine() {

  assert(Tok.is(tok::verbatim_line_name));


  Token NameTok = Tok;

  consumeToken();


  SourceLocation TextBegin;

  StringRef Text;

  // Next token might not be a tok::verbatim_line_text if verbatim line

  // starting command comes just before a newline or comment end.

  if (Tok.is(tok::verbatim_line_text)) {

    TextBegin = Tok.getLocation();

    Text = Tok.getVerbatimLineText();

  } else {

    TextBegin = NameTok.getEndLocation();

    Text = "";

  }


  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),

                                                NameTok.getVerbatimLineID(),

                                                TextBegin,

                                                Text);

  consumeToken();

  return VL;

}


BlockContentComment *Parser::parseBlockContent() {

  switch (Tok.getKind()) {

  case tok::text:

  case tok::unknown_command:

  case tok::backslash_command:

  case tok::at_command:

  case tok::html_start_tag:

  case tok::html_end_tag:

    return parseParagraphOrBlockCommand();


  case tok::verbatim_block_begin:

    return parseVerbatimBlock();


  case tok::verbatim_line_name:

    return parseVerbatimLine();


  case tok::eof:

  case tok::newline:

  case tok::verbatim_block_line:

  case tok::verbatim_block_end:

  case tok::verbatim_line_text:

  case tok::html_ident:

  case tok::html_equals:

  case tok::html_quoted_string:

  case tok::html_greater:

  case tok::html_slash_greater:

    llvm_unreachable("should not see this token");

  }

  llvm_unreachable("bogus token kind");

}


FullComment *Parser::parseFullComment() {

  // Skip newlines at the beginning of the comment.

  while (Tok.is(tok::newline))

    consumeToken();


  SmallVector<BlockContentComment *, 8> Blocks;

  while (Tok.isNot(tok::eof)) {

    Blocks.push_back(parseBlockContent());


    // Skip extra newlines after paragraph end.

    while (Tok.is(tok::newline))

      consumeToken();

  }

  return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));

}


} // end namespace comments

} // end namespace clang

P
StringRef P
Definition: ASTMatchersInternal.cpp:574

CharInfo.h

E
Expr * E
Definition: CheckExprLifetime.cpp:210

CommentCommandTraits.h

CommentParser.h

CommentSema.h

DiagnosticComment.h

Text
StringRef Text
Definition: Format.cpp:3057

Loc
SourceLocation Loc
Definition: SemaObjC.cpp:759

SourceManager.h
Defines the SourceManager interface.

memcpy
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Definition: __clang_cuda_device_functions.h:1563

clang::DiagnosticsEngine
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231

clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:88

clang::SourceLocation::getLocWithOffset
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
Definition: SourceLocation.h:137

clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:663

clang::SourceManager::getPresumedLineNumber
unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Definition: SourceManager.cpp:1424

clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:213

clang::comments::BlockCommandComment
A command that has zero or more word-like arguments (number of word-like arguments depends on command...
Definition: Comment.h:604

clang::comments::BlockContentComment
Block content (contains inline content).
Definition: Comment.h:538

clang::comments::CommandTraits
This class provides information about commands that can be used in comments.
Definition: CommentCommandTraits.h:149

clang::comments::CommandTraits::getCommandInfo
const CommandInfo * getCommandInfo(StringRef Name) const
Definition: CommentCommandTraits.h:167

clang::comments::Comment::getLocation
SourceLocation getLocation() const LLVM_READONLY
Definition: Comment.h:249

clang::comments::Comment::getSourceRange
SourceRange getSourceRange() const LLVM_READONLY
Definition: Comment.h:243

clang::comments::FullComment
A full comment attached to a declaration, contains block content.
Definition: Comment.h:1083

clang::comments::HTMLEndTagComment
A closing HTML tag.
Definition: Comment.h:519

clang::comments::HTMLStartTagComment::Attribute
Definition: Comment.h:435

clang::comments::HTMLStartTagComment
An opening HTML tag with attributes.
Definition: Comment.h:433

clang::comments::InlineCommandComment
A command with word-like arguments that is considered inline content.
Definition: Comment.h:335

clang::comments::Lexer
Comment lexer.
Definition: CommentLexer.h:220

clang::comments::ParagraphComment
A single paragraph that contains inline content.
Definition: Comment.h:555

clang::comments::ParamCommandComment
Doxygen \param command.
Definition: Comment.h:711

clang::comments::Parser
Doxygen comment parser.
Definition: CommentParser.h:29

clang::comments::Parser::parseVerbatimLine
VerbatimLineComment * parseVerbatimLine()
Definition: CommentParser.cpp:861

clang::comments::Parser::parseInlineCommand
InlineCommandComment * parseInlineCommand()
Definition: CommentParser.cpp:570

clang::comments::Parser::parseCommandArgs
ArrayRef< Comment::Argument > parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs)
Definition: CommentParser.cpp:423

clang::comments::Parser::parseThrowCommandArgs
ArrayRef< Comment::Argument > parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs)
Parse arguments for.
Definition: CommentParser.cpp:438

clang::comments::Parser::parseTParamCommandArgs
void parseTParamCommandArgs(TParamCommandComment *TPC, TextTokenRetokenizer &Retokenizer)
Parse arguments for \tparam command.
Definition: CommentParser.cpp:412

clang::comments::Parser::parseParagraphOrBlockCommand
BlockContentComment * parseParagraphOrBlockCommand()
Definition: CommentParser.cpp:714

clang::comments::Parser::parseHTMLEndTag
HTMLEndTagComment * parseHTMLEndTag()
Definition: CommentParser.cpp:699

clang::comments::Parser::parseVerbatimBlock
VerbatimBlockComment * parseVerbatimBlock()
Definition: CommentParser.cpp:815

clang::comments::Parser::parseBlockCommand
BlockCommandComment * parseBlockCommand()
Definition: CommentParser.cpp:472

clang::comments::Parser::parseBlockContent
BlockContentComment * parseBlockContent()
Definition: CommentParser.cpp:887

clang::comments::Parser::parseHTMLStartTag
HTMLStartTagComment * parseHTMLStartTag()
Definition: CommentParser.cpp:598

clang::comments::Parser::parseFullComment
FullComment * parseFullComment()
Definition: CommentParser.cpp:918

clang::comments::Parser::parseParCommandArgs
ArrayRef< Comment::Argument > parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs)
Definition: CommentParser.cpp:455

clang::comments::Parser::parseParamCommandArgs
void parseParamCommandArgs(ParamCommandComment *PC, TextTokenRetokenizer &Retokenizer)
Parse arguments for \param command.
Definition: CommentParser.cpp:394

clang::comments::Sema
Definition: CommentSema.h:32

clang::comments::Sema::actOnFullComment
FullComment * actOnFullComment(ArrayRef< BlockContentComment * > Blocks)
Definition: CommentSema.cpp:516

clang::comments::Sema::actOnParamCommandDirectionArg
void actOnParamCommandDirectionArg(ParamCommandComment *Command, SourceLocation ArgLocBegin, SourceLocation ArgLocEnd, StringRef Arg)
Definition: CommentSema.cpp:229

clang::comments::Sema::actOnTParamCommandStart
TParamCommandComment * actOnTParamCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, CommandMarkerKind CommandMarker)
Definition: CommentSema.cpp:279

clang::comments::Sema::actOnTParamCommandFinish
void actOnTParamCommandFinish(TParamCommandComment *Command, ParagraphComment *Paragraph)
Definition: CommentSema.cpp:354

clang::comments::Sema::actOnBlockCommandFinish
void actOnBlockCommandFinish(BlockCommandComment *Command, ParagraphComment *Paragraph)
Definition: CommentSema.cpp:66

clang::comments::Sema::actOnText
TextComment * actOnText(SourceLocation LocBegin, SourceLocation LocEnd, StringRef Text)
Definition: CommentSema.cpp:386

clang::comments::Sema::actOnBlockCommandArgs
void actOnBlockCommandArgs(BlockCommandComment *Command, ArrayRef< BlockCommandComment::Argument > Args)
Definition: CommentSema.cpp:61

clang::comments::Sema::actOnBlockCommandStart
BlockCommandComment * actOnBlockCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, CommandMarkerKind CommandMarker)
Definition: CommentSema.cpp:49

clang::comments::Sema::actOnParamCommandParamNameArg
void actOnParamCommandParamNameArg(ParamCommandComment *Command, SourceLocation ArgLocBegin, SourceLocation ArgLocEnd, StringRef Arg)
Definition: CommentSema.cpp:256

clang::comments::Sema::actOnHTMLEndTag
HTMLEndTagComment * actOnHTMLEndTag(SourceLocation LocBegin, SourceLocation LocEnd, StringRef TagName)
Definition: CommentSema.cpp:448

clang::comments::Sema::actOnInlineCommand
InlineCommandComment * actOnInlineCommand(SourceLocation CommandLocBegin, SourceLocation CommandLocEnd, unsigned CommandID, ArrayRef< Comment::Argument > Args)
Definition: CommentSema.cpp:361

clang::comments::Sema::actOnVerbatimLine
VerbatimLineComment * actOnVerbatimLine(SourceLocation LocBegin, unsigned CommandID, SourceLocation TextBegin, StringRef Text)
Definition: CommentSema.cpp:415

clang::comments::Sema::actOnVerbatimBlockFinish
void actOnVerbatimBlockFinish(VerbatimBlockComment *Block, SourceLocation CloseNameLocBegin, StringRef CloseName, ArrayRef< VerbatimBlockLineComment * > Lines)
Definition: CommentSema.cpp:406

clang::comments::Sema::actOnVerbatimBlockLine
VerbatimBlockLineComment * actOnVerbatimBlockLine(SourceLocation Loc, StringRef Text)
Definition: CommentSema.cpp:401

clang::comments::Sema::actOnParamCommandFinish
void actOnParamCommandFinish(ParamCommandComment *Command, ParagraphComment *Paragraph)
Definition: CommentSema.cpp:273

clang::comments::Sema::copyArray
ArrayRef< T > copyArray(ArrayRef< T > Source)
Returns a copy of array, owned by Sema's allocator.
Definition: CommentSema.h:80

clang::comments::Sema::actOnHTMLStartTagStart
HTMLStartTagComment * actOnHTMLStartTagStart(SourceLocation LocBegin, StringRef TagName)
Definition: CommentSema.cpp:430

clang::comments::Sema::actOnHTMLStartTagFinish
void actOnHTMLStartTagFinish(HTMLStartTagComment *Tag, ArrayRef< HTMLStartTagComment::Attribute > Attrs, SourceLocation GreaterLoc, bool IsSelfClosing)
Definition: CommentSema.cpp:435

clang::comments::Sema::actOnParamCommandStart
ParamCommandComment * actOnParamCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, CommandMarkerKind CommandMarker)
Definition: CommentSema.cpp:79

clang::comments::Sema::actOnVerbatimBlockStart
VerbatimBlockComment * actOnVerbatimBlockStart(SourceLocation Loc, unsigned CommandID)
Definition: CommentSema.cpp:392

clang::comments::Sema::actOnTParamCommandParamNameArg
void actOnTParamCommandParamNameArg(TParamCommandComment *Command, SourceLocation ArgLocBegin, SourceLocation ArgLocEnd, StringRef Arg)
Definition: CommentSema.cpp:297

clang::comments::Sema::actOnUnknownCommand
InlineContentComment * actOnUnknownCommand(SourceLocation LocBegin, SourceLocation LocEnd, StringRef CommandName)
Definition: CommentSema.cpp:371

clang::comments::Sema::actOnParagraphComment
ParagraphComment * actOnParagraphComment(ArrayRef< InlineContentComment * > Content)
Definition: CommentSema.cpp:44

clang::comments::TParamCommandComment
Doxygen \tparam command, describes a template parameter.
Definition: Comment.h:793

clang::comments::TextTokenRetokenizer
Re-lexes a sequence of tok::text tokens.
Definition: CommentParser.cpp:30

clang::comments::TextTokenRetokenizer::lexParHeading
bool lexParHeading(Token &Tok)
Extract a par command argument-header.
Definition: CommentParser.cpp:242

clang::comments::TextTokenRetokenizer::putBackLeftoverTokens
void putBackLeftoverTokens()
Put back tokens that we didn't consume.
Definition: CommentParser.cpp:363

clang::comments::TextTokenRetokenizer::lexDelimitedSeq
bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim)
Definition: CommentParser.cpp:316

clang::comments::TextTokenRetokenizer::TextTokenRetokenizer
TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P)
Definition: CommentParser.cpp:171

clang::comments::TextTokenRetokenizer::startsWithParCommand
bool startsWithParCommand()
Definition: CommentParser.cpp:226

clang::comments::TextTokenRetokenizer::lexType
bool lexType(Token &Tok)
Extract a type argument.
Definition: CommentParser.cpp:178

clang::comments::TextTokenRetokenizer::lexWord
bool lexWord(Token &Tok)
Extract a word – sequence of non-whitespace characters.
Definition: CommentParser.cpp:283

clang::comments::Token
Comment token.
Definition: CommentLexer.h:55

clang::comments::Token::getHTMLQuotedString
StringRef getHTMLQuotedString() const LLVM_READONLY
Definition: CommentLexer.h:194

clang::comments::Token::isNot
bool isNot(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:93

clang::comments::Token::getEndLocation
SourceLocation getEndLocation() const LLVM_READONLY
Definition: CommentLexer.h:83

clang::comments::Token::getCommandID
unsigned getCommandID() const LLVM_READONLY
Definition: CommentLexer.h:120

clang::comments::Token::getUnknownCommandName
StringRef getUnknownCommandName() const LLVM_READONLY
Definition: CommentLexer.h:109

clang::comments::Token::getText
StringRef getText() const LLVM_READONLY
Definition: CommentLexer.h:98

clang::comments::Token::getHTMLIdent
StringRef getHTMLIdent() const LLVM_READONLY
Definition: CommentLexer.h:183

clang::comments::Token::getVerbatimBlockText
StringRef getVerbatimBlockText() const LLVM_READONLY
Definition: CommentLexer.h:140

clang::comments::Token::getVerbatimLineID
unsigned getVerbatimLineID() const LLVM_READONLY
Definition: CommentLexer.h:151

clang::comments::Token::getVerbatimBlockID
unsigned getVerbatimBlockID() const LLVM_READONLY
Definition: CommentLexer.h:130

clang::comments::Token::is
bool is(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:92

clang::comments::Token::getLocation
SourceLocation getLocation() const LLVM_READONLY
Definition: CommentLexer.h:80

clang::comments::Token::getVerbatimLineText
StringRef getVerbatimLineText() const LLVM_READONLY
Definition: CommentLexer.h:161

clang::comments::Token::getKind
tok::TokenKind getKind() const LLVM_READONLY
Definition: CommentLexer.h:89

clang::comments::Token::getHTMLTagStartName
StringRef getHTMLTagStartName() const LLVM_READONLY
Definition: CommentLexer.h:172

clang::comments::Token::getHTMLTagEndName
StringRef getHTMLTagEndName() const LLVM_READONLY
Definition: CommentLexer.h:205

clang::comments::VerbatimBlockComment
A verbatim block command (e.
Definition: Comment.h:879

clang::comments::VerbatimBlockLineComment
A line of text contained in a verbatim block.
Definition: Comment.h:854

clang::comments::VerbatimLineComment
A verbatim line command.
Definition: Comment.h:930

llvm::ArrayRef
Definition: LLVM.h:31

llvm::SmallString
Definition: LLVM.h:34

llvm::SmallVector
Definition: LLVM.h:35

clang::comments::tok::verbatim_block_line
@ verbatim_block_line
Definition: CommentLexer.h:40

clang::comments::tok::at_command
@ at_command
Definition: CommentLexer.h:38

clang::comments::tok::html_slash_greater
@ html_slash_greater
Definition: CommentLexer.h:49

clang::comments::tok::html_quoted_string
@ html_quoted_string
Definition: CommentLexer.h:47

clang::comments::tok::verbatim_line_text
@ verbatim_line_text
Definition: CommentLexer.h:43

clang::comments::tok::verbatim_block_end
@ verbatim_block_end
Definition: CommentLexer.h:41

clang::comments::tok::text
@ text
Definition: CommentLexer.h:35

clang::comments::tok::verbatim_block_begin
@ verbatim_block_begin
Definition: CommentLexer.h:39

clang::comments::tok::html_end_tag
@ html_end_tag
Definition: CommentLexer.h:50

clang::comments::tok::unknown_command
@ unknown_command
Definition: CommentLexer.h:36

clang::comments::tok::backslash_command
@ backslash_command
Definition: CommentLexer.h:37

clang::comments::tok::html_ident
@ html_ident
Definition: CommentLexer.h:45

clang::comments::tok::html_equals
@ html_equals
Definition: CommentLexer.h:46

clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33

clang::comments::tok::html_greater
@ html_greater
Definition: CommentLexer.h:48

clang::comments::tok::newline
@ newline
Definition: CommentLexer.h:34

clang::comments::tok::html_start_tag
@ html_start_tag
Definition: CommentLexer.h:44

clang::comments::tok::verbatim_line_name
@ verbatim_line_name
Definition: CommentLexer.h:42

clang::comments::CommandMarkerKind
CommandMarkerKind
Describes the syntax that was used in a documentation command.
Definition: Comment.h:38

clang::comments::CMK_Backslash
@ CMK_Backslash
Command started with a backslash character:
Definition: Comment.h:43

clang::comments::CMK_At
@ CMK_At
Command started with an 'at' character:
Definition: Comment.h:49

clang
The JSON file list parser is used to communicate input to InstallAPI.
Definition: CalledOnceCheck.h:17

clang::LinkageSpecLanguageIDs::C
@ C

clang::DeclaratorContext::Block
@ Block

clang::ObjCSubstitutionContext::Result
@ Result
The result type of a method or function.

clang::isWhitespace
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition: CharInfo.h:108

clang::SourceLocIdentKind::Line
@ Line

false
#define false
Definition: stdbool.h:26

clang::comments::CommandInfo
Information about a single command.
Definition: CommentCommandTraits.h:32

clang::comments::CommandInfo::IsBlockCommand
unsigned IsBlockCommand
True if this command is a block command (of any kind).
Definition: CommentCommandTraits.h:58

clang::comments::CommandInfo::IsTParamCommand
unsigned IsTParamCommand
True if this command is introducing documentation for a template parameter (\tparam or an alias).
Definition: CommentCommandTraits.h:77

clang::comments::CommandInfo::Name
const char * Name
Definition: CommentCommandTraits.h:37

clang::comments::CommandInfo::IsParCommand
unsigned IsParCommand
True if this is a \par command.
Definition: CommentCommandTraits.h:93

clang::comments::CommandInfo::IsVerbatimBlockEndCommand
unsigned IsVerbatimBlockEndCommand
True if this command is an end command for a verbatim-like block.
Definition: CommentCommandTraits.h:109

clang::comments::CommandInfo::IsParamCommand
unsigned IsParamCommand
True if this command is introducing documentation for a function parameter (\param or an alias).
Definition: CommentCommandTraits.h:72

clang::comments::CommandInfo::IsThrowsCommand
unsigned IsThrowsCommand
True if this command is \throws or an alias.
Definition: CommentCommandTraits.h:81

clang::comments::CommandInfo::IsInlineCommand
unsigned IsInlineCommand
True if this command is a inline command (of any kind).
Definition: CommentCommandTraits.h:54

clang::comments::CommandInfo::IsUnknownCommand
unsigned IsUnknownCommand
True if this command is unknown.
Definition: CommentCommandTraits.h:144

clang::comments::CommandInfo::getID
unsigned getID() const
Definition: CommentCommandTraits.h:33

clang::comments::CommandInfo::NumArgs
unsigned NumArgs
Number of word-like arguments for a given block command, except for \param and \tparam commands – the...
Definition: CommentCommandTraits.h:50

clang::comments::Comment::Argument
Definition: Comment.h:221