clang API Documentation

HTMLRewrite.cpp
Go to the documentation of this file.
00001 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file defines the HTMLRewriter clas, which is used to translate the
00011 //  text of a source file into prettified HTML.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "clang/Lex/Preprocessor.h"
00016 #include "clang/Rewrite/Rewriter.h"
00017 #include "clang/Rewrite/HTMLRewrite.h"
00018 #include "clang/Lex/TokenConcatenation.h"
00019 #include "clang/Lex/Preprocessor.h"
00020 #include "clang/Basic/SourceManager.h"
00021 #include "llvm/ADT/SmallString.h"
00022 #include "llvm/ADT/OwningPtr.h"
00023 #include "llvm/Support/ErrorHandling.h"
00024 #include "llvm/Support/MemoryBuffer.h"
00025 #include "llvm/Support/raw_ostream.h"
00026 using namespace clang;
00027 
00028 
00029 /// HighlightRange - Highlight a range in the source code with the specified
00030 /// start/end tags.  B/E must be in the same file.  This ensures that
00031 /// start/end tags are placed at the start/end of each line if the range is
00032 /// multiline.
00033 void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
00034                           const char *StartTag, const char *EndTag) {
00035   SourceManager &SM = R.getSourceMgr();
00036   B = SM.getExpansionLoc(B);
00037   E = SM.getExpansionLoc(E);
00038   FileID FID = SM.getFileID(B);
00039   assert(SM.getFileID(E) == FID && "B/E not in the same file!");
00040 
00041   unsigned BOffset = SM.getFileOffset(B);
00042   unsigned EOffset = SM.getFileOffset(E);
00043 
00044   // Include the whole end token in the range.
00045   EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
00046 
00047   bool Invalid = false;
00048   const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
00049   if (Invalid)
00050     return;
00051   
00052   HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
00053                  BufferStart, StartTag, EndTag);
00054 }
00055 
00056 /// HighlightRange - This is the same as the above method, but takes
00057 /// decomposed file locations.
00058 void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
00059                           const char *BufferStart,
00060                           const char *StartTag, const char *EndTag) {
00061   // Insert the tag at the absolute start/end of the range.
00062   RB.InsertTextAfter(B, StartTag);
00063   RB.InsertTextBefore(E, EndTag);
00064 
00065   // Scan the range to see if there is a \r or \n.  If so, and if the line is
00066   // not blank, insert tags on that line as well.
00067   bool HadOpenTag = true;
00068 
00069   unsigned LastNonWhiteSpace = B;
00070   for (unsigned i = B; i != E; ++i) {
00071     switch (BufferStart[i]) {
00072     case '\r':
00073     case '\n':
00074       // Okay, we found a newline in the range.  If we have an open tag, we need
00075       // to insert a close tag at the first non-whitespace before the newline.
00076       if (HadOpenTag)
00077         RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
00078 
00079       // Instead of inserting an open tag immediately after the newline, we
00080       // wait until we see a non-whitespace character.  This prevents us from
00081       // inserting tags around blank lines, and also allows the open tag to
00082       // be put *after* whitespace on a non-blank line.
00083       HadOpenTag = false;
00084       break;
00085     case '\0':
00086     case ' ':
00087     case '\t':
00088     case '\f':
00089     case '\v':
00090       // Ignore whitespace.
00091       break;
00092 
00093     default:
00094       // If there is no tag open, do it now.
00095       if (!HadOpenTag) {
00096         RB.InsertTextAfter(i, StartTag);
00097         HadOpenTag = true;
00098       }
00099 
00100       // Remember this character.
00101       LastNonWhiteSpace = i;
00102       break;
00103     }
00104   }
00105 }
00106 
00107 void html::EscapeText(Rewriter &R, FileID FID,
00108                       bool EscapeSpaces, bool ReplaceTabs) {
00109 
00110   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
00111   const char* C = Buf->getBufferStart();
00112   const char* FileEnd = Buf->getBufferEnd();
00113 
00114   assert (C <= FileEnd);
00115 
00116   RewriteBuffer &RB = R.getEditBuffer(FID);
00117 
00118   unsigned ColNo = 0;
00119   for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
00120     switch (*C) {
00121     default: ++ColNo; break;
00122     case '\n':
00123     case '\r':
00124       ColNo = 0;
00125       break;
00126 
00127     case ' ':
00128       if (EscapeSpaces)
00129         RB.ReplaceText(FilePos, 1, "&nbsp;");
00130       ++ColNo;
00131       break;
00132     case '\f':
00133       RB.ReplaceText(FilePos, 1, "<hr>");
00134       ColNo = 0;
00135       break;
00136 
00137     case '\t': {
00138       if (!ReplaceTabs)
00139         break;
00140       unsigned NumSpaces = 8-(ColNo&7);
00141       if (EscapeSpaces)
00142         RB.ReplaceText(FilePos, 1,
00143                        StringRef("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
00144                                        "&nbsp;&nbsp;&nbsp;", 6*NumSpaces));
00145       else
00146         RB.ReplaceText(FilePos, 1, StringRef("        ", NumSpaces));
00147       ColNo += NumSpaces;
00148       break;
00149     }
00150     case '<':
00151       RB.ReplaceText(FilePos, 1, "&lt;");
00152       ++ColNo;
00153       break;
00154 
00155     case '>':
00156       RB.ReplaceText(FilePos, 1, "&gt;");
00157       ++ColNo;
00158       break;
00159 
00160     case '&':
00161       RB.ReplaceText(FilePos, 1, "&amp;");
00162       ++ColNo;
00163       break;
00164     }
00165   }
00166 }
00167 
00168 std::string html::EscapeText(const std::string& s, bool EscapeSpaces,
00169                              bool ReplaceTabs) {
00170 
00171   unsigned len = s.size();
00172   std::string Str;
00173   llvm::raw_string_ostream os(Str);
00174 
00175   for (unsigned i = 0 ; i < len; ++i) {
00176 
00177     char c = s[i];
00178     switch (c) {
00179     default:
00180       os << c; break;
00181 
00182     case ' ':
00183       if (EscapeSpaces) os << "&nbsp;";
00184       else os << ' ';
00185       break;
00186 
00187     case '\t':
00188       if (ReplaceTabs) {
00189         if (EscapeSpaces)
00190           for (unsigned i = 0; i < 4; ++i)
00191             os << "&nbsp;";
00192         else
00193           for (unsigned i = 0; i < 4; ++i)
00194             os << " ";
00195       }
00196       else
00197         os << c;
00198 
00199       break;
00200 
00201     case '<': os << "&lt;"; break;
00202     case '>': os << "&gt;"; break;
00203     case '&': os << "&amp;"; break;
00204     }
00205   }
00206 
00207   return os.str();
00208 }
00209 
00210 static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
00211                           unsigned B, unsigned E) {
00212   SmallString<256> Str;
00213   llvm::raw_svector_ostream OS(Str);
00214 
00215   OS << "<tr><td class=\"num\" id=\"LN"
00216      << LineNo << "\">"
00217      << LineNo << "</td><td class=\"line\">";
00218 
00219   if (B == E) { // Handle empty lines.
00220     OS << " </td></tr>";
00221     RB.InsertTextBefore(B, OS.str());
00222   } else {
00223     RB.InsertTextBefore(B, OS.str());
00224     RB.InsertTextBefore(E, "</td></tr>");
00225   }
00226 }
00227 
00228 void html::AddLineNumbers(Rewriter& R, FileID FID) {
00229 
00230   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
00231   const char* FileBeg = Buf->getBufferStart();
00232   const char* FileEnd = Buf->getBufferEnd();
00233   const char* C = FileBeg;
00234   RewriteBuffer &RB = R.getEditBuffer(FID);
00235 
00236   assert (C <= FileEnd);
00237 
00238   unsigned LineNo = 0;
00239   unsigned FilePos = 0;
00240 
00241   while (C != FileEnd) {
00242 
00243     ++LineNo;
00244     unsigned LineStartPos = FilePos;
00245     unsigned LineEndPos = FileEnd - FileBeg;
00246 
00247     assert (FilePos <= LineEndPos);
00248     assert (C < FileEnd);
00249 
00250     // Scan until the newline (or end-of-file).
00251 
00252     while (C != FileEnd) {
00253       char c = *C;
00254       ++C;
00255 
00256       if (c == '\n') {
00257         LineEndPos = FilePos++;
00258         break;
00259       }
00260 
00261       ++FilePos;
00262     }
00263 
00264     AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
00265   }
00266 
00267   // Add one big table tag that surrounds all of the code.
00268   RB.InsertTextBefore(0, "<table class=\"code\">\n");
00269   RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
00270 }
00271 
00272 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID,
00273                                              const char *title) {
00274 
00275   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
00276   const char* FileStart = Buf->getBufferStart();
00277   const char* FileEnd = Buf->getBufferEnd();
00278 
00279   SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
00280   SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
00281 
00282   std::string s;
00283   llvm::raw_string_ostream os(s);
00284   os << "<!doctype html>\n" // Use HTML 5 doctype
00285         "<html>\n<head>\n";
00286 
00287   if (title)
00288     os << "<title>" << html::EscapeText(title) << "</title>\n";
00289 
00290   os << "<style type=\"text/css\">\n"
00291       " body { color:#000000; background-color:#ffffff }\n"
00292       " body { font-family:Helvetica, sans-serif; font-size:10pt }\n"
00293       " h1 { font-size:14pt }\n"
00294       " .code { border-collapse:collapse; width:100%; }\n"
00295       " .code { font-family: \"Monospace\", monospace; font-size:10pt }\n"
00296       " .code { line-height: 1.2em }\n"
00297       " .comment { color: green; font-style: oblique }\n"
00298       " .keyword { color: blue }\n"
00299       " .string_literal { color: red }\n"
00300       " .directive { color: darkmagenta }\n"
00301       // Macro expansions.
00302       " .expansion { display: none; }\n"
00303       " .macro:hover .expansion { display: block; border: 2px solid #FF0000; "
00304           "padding: 2px; background-color:#FFF0F0; font-weight: normal; "
00305           "  -webkit-border-radius:5px;  -webkit-box-shadow:1px 1px 7px #000; "
00306           "position: absolute; top: -1em; left:10em; z-index: 1 } \n"
00307       " .macro { color: darkmagenta; background-color:LemonChiffon;"
00308              // Macros are position: relative to provide base for expansions.
00309              " position: relative }\n"
00310       " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n"
00311       " .num { text-align:right; font-size:8pt }\n"
00312       " .num { color:#444444 }\n"
00313       " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n"
00314       " .line { white-space: pre }\n"
00315       " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n"
00316       " .msg { -webkit-border-radius:5px }\n"
00317       " .msg { font-family:Helvetica, sans-serif; font-size:8pt }\n"
00318       " .msg { float:left }\n"
00319       " .msg { padding:0.25em 1ex 0.25em 1ex }\n"
00320       " .msg { margin-top:10px; margin-bottom:10px }\n"
00321       " .msg { font-weight:bold }\n"
00322       " .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }\n"
00323       " .msgT { padding:0x; spacing:0x }\n"
00324       " .msgEvent { background-color:#fff8b4; color:#000000 }\n"
00325       " .msgControl { background-color:#bbbbbb; color:#000000 }\n"
00326       " .mrange { background-color:#dfddf3 }\n"
00327       " .mrange { border-bottom:1px solid #6F9DBE }\n"
00328       " .PathIndex { font-weight: bold; padding:0px 5px 0px 5px; "
00329         "margin-right:5px; }\n"
00330       " .PathIndex { -webkit-border-radius:8px }\n"
00331       " .PathIndexEvent { background-color:#bfba87 }\n"
00332       " .PathIndexControl { background-color:#8c8c8c }\n"
00333       " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n"
00334       " .CodeRemovalHint { background-color:#de1010 }\n"
00335       " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n"
00336       " table.simpletable {\n"
00337       "   padding: 5px;\n"
00338       "   font-size:12pt;\n"
00339       "   margin:20px;\n"
00340       "   border-collapse: collapse; border-spacing: 0px;\n"
00341       " }\n"
00342       " td.rowname {\n"
00343       "   text-align:right; font-weight:bold; color:#444444;\n"
00344       "   padding-right:2ex; }\n"
00345       "</style>\n</head>\n<body>";
00346 
00347   // Generate header
00348   R.InsertTextBefore(StartLoc, os.str());
00349   // Generate footer
00350 
00351   R.InsertTextAfter(EndLoc, "</body></html>\n");
00352 }
00353 
00354 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
00355 /// information about keywords, macro expansions etc.  This uses the macro
00356 /// table state from the end of the file, so it won't be perfectly perfect,
00357 /// but it will be reasonably close.
00358 void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
00359   RewriteBuffer &RB = R.getEditBuffer(FID);
00360 
00361   const SourceManager &SM = PP.getSourceManager();
00362   const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
00363   Lexer L(FID, FromFile, SM, PP.getLangOpts());
00364   const char *BufferStart = L.getBufferStart();
00365 
00366   // Inform the preprocessor that we want to retain comments as tokens, so we
00367   // can highlight them.
00368   L.SetCommentRetentionState(true);
00369 
00370   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
00371   // macros.
00372   Token Tok;
00373   L.LexFromRawLexer(Tok);
00374 
00375   while (Tok.isNot(tok::eof)) {
00376     // Since we are lexing unexpanded tokens, all tokens are from the main
00377     // FileID.
00378     unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
00379     unsigned TokLen = Tok.getLength();
00380     switch (Tok.getKind()) {
00381     default: break;
00382     case tok::identifier:
00383       llvm_unreachable("tok::identifier in raw lexing mode!");
00384     case tok::raw_identifier: {
00385       // Fill in Result.IdentifierInfo and update the token kind,
00386       // looking up the identifier in the identifier table.
00387       PP.LookUpIdentifierInfo(Tok);
00388 
00389       // If this is a pp-identifier, for a keyword, highlight it as such.
00390       if (Tok.isNot(tok::identifier))
00391         HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
00392                        "<span class='keyword'>", "</span>");
00393       break;
00394     }
00395     case tok::comment:
00396       HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
00397                      "<span class='comment'>", "</span>");
00398       break;
00399     case tok::utf8_string_literal:
00400       // Chop off the u part of u8 prefix
00401       ++TokOffs;
00402       --TokLen;
00403       // FALL THROUGH to chop the 8
00404     case tok::wide_string_literal:
00405     case tok::utf16_string_literal:
00406     case tok::utf32_string_literal:
00407       // Chop off the L, u, U or 8 prefix
00408       ++TokOffs;
00409       --TokLen;
00410       // FALL THROUGH.
00411     case tok::string_literal:
00412       // FIXME: Exclude the optional ud-suffix from the highlighted range.
00413       HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
00414                      "<span class='string_literal'>", "</span>");
00415       break;
00416     case tok::hash: {
00417       // If this is a preprocessor directive, all tokens to end of line are too.
00418       if (!Tok.isAtStartOfLine())
00419         break;
00420 
00421       // Eat all of the tokens until we get to the next one at the start of
00422       // line.
00423       unsigned TokEnd = TokOffs+TokLen;
00424       L.LexFromRawLexer(Tok);
00425       while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
00426         TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
00427         L.LexFromRawLexer(Tok);
00428       }
00429 
00430       // Find end of line.  This is a hack.
00431       HighlightRange(RB, TokOffs, TokEnd, BufferStart,
00432                      "<span class='directive'>", "</span>");
00433 
00434       // Don't skip the next token.
00435       continue;
00436     }
00437     }
00438 
00439     L.LexFromRawLexer(Tok);
00440   }
00441 }
00442 
00443 /// HighlightMacros - This uses the macro table state from the end of the
00444 /// file, to re-expand macros and insert (into the HTML) information about the
00445 /// macro expansions.  This won't be perfectly perfect, but it will be
00446 /// reasonably close.
00447 void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
00448   // Re-lex the raw token stream into a token buffer.
00449   const SourceManager &SM = PP.getSourceManager();
00450   std::vector<Token> TokenStream;
00451 
00452   const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
00453   Lexer L(FID, FromFile, SM, PP.getLangOpts());
00454 
00455   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
00456   // macros.
00457   while (1) {
00458     Token Tok;
00459     L.LexFromRawLexer(Tok);
00460 
00461     // If this is a # at the start of a line, discard it from the token stream.
00462     // We don't want the re-preprocess step to see #defines, #includes or other
00463     // preprocessor directives.
00464     if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
00465       continue;
00466 
00467     // If this is a ## token, change its kind to unknown so that repreprocessing
00468     // it will not produce an error.
00469     if (Tok.is(tok::hashhash))
00470       Tok.setKind(tok::unknown);
00471 
00472     // If this raw token is an identifier, the raw lexer won't have looked up
00473     // the corresponding identifier info for it.  Do this now so that it will be
00474     // macro expanded when we re-preprocess it.
00475     if (Tok.is(tok::raw_identifier))
00476       PP.LookUpIdentifierInfo(Tok);
00477 
00478     TokenStream.push_back(Tok);
00479 
00480     if (Tok.is(tok::eof)) break;
00481   }
00482 
00483   // Temporarily change the diagnostics object so that we ignore any generated
00484   // diagnostics from this pass.
00485   DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
00486                       new IgnoringDiagConsumer);
00487 
00488   // FIXME: This is a huge hack; we reuse the input preprocessor because we want
00489   // its state, but we aren't actually changing it (we hope). This should really
00490   // construct a copy of the preprocessor.
00491   Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
00492   DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
00493   TmpPP.setDiagnostics(TmpDiags);
00494 
00495   // Inform the preprocessor that we don't want comments.
00496   TmpPP.SetCommentRetentionState(false, false);
00497 
00498   // Enter the tokens we just lexed.  This will cause them to be macro expanded
00499   // but won't enter sub-files (because we removed #'s).
00500   TmpPP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false);
00501 
00502   TokenConcatenation ConcatInfo(TmpPP);
00503 
00504   // Lex all the tokens.
00505   Token Tok;
00506   TmpPP.Lex(Tok);
00507   while (Tok.isNot(tok::eof)) {
00508     // Ignore non-macro tokens.
00509     if (!Tok.getLocation().isMacroID()) {
00510       TmpPP.Lex(Tok);
00511       continue;
00512     }
00513 
00514     // Okay, we have the first token of a macro expansion: highlight the
00515     // expansion by inserting a start tag before the macro expansion and
00516     // end tag after it.
00517     std::pair<SourceLocation, SourceLocation> LLoc =
00518       SM.getExpansionRange(Tok.getLocation());
00519 
00520     // Ignore tokens whose instantiation location was not the main file.
00521     if (SM.getFileID(LLoc.first) != FID) {
00522       TmpPP.Lex(Tok);
00523       continue;
00524     }
00525 
00526     assert(SM.getFileID(LLoc.second) == FID &&
00527            "Start and end of expansion must be in the same ultimate file!");
00528 
00529     std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
00530     unsigned LineLen = Expansion.size();
00531 
00532     Token PrevPrevTok;
00533     Token PrevTok = Tok;
00534     // Okay, eat this token, getting the next one.
00535     TmpPP.Lex(Tok);
00536 
00537     // Skip all the rest of the tokens that are part of this macro
00538     // instantiation.  It would be really nice to pop up a window with all the
00539     // spelling of the tokens or something.
00540     while (!Tok.is(tok::eof) &&
00541            SM.getExpansionLoc(Tok.getLocation()) == LLoc.first) {
00542       // Insert a newline if the macro expansion is getting large.
00543       if (LineLen > 60) {
00544         Expansion += "<br>";
00545         LineLen = 0;
00546       }
00547 
00548       LineLen -= Expansion.size();
00549 
00550       // If the tokens were already space separated, or if they must be to avoid
00551       // them being implicitly pasted, add a space between them.
00552       if (Tok.hasLeadingSpace() ||
00553           ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
00554         Expansion += ' ';
00555 
00556       // Escape any special characters in the token text.
00557       Expansion += EscapeText(TmpPP.getSpelling(Tok));
00558       LineLen += Expansion.size();
00559 
00560       PrevPrevTok = PrevTok;
00561       PrevTok = Tok;
00562       TmpPP.Lex(Tok);
00563     }
00564 
00565 
00566     // Insert the expansion as the end tag, so that multi-line macros all get
00567     // highlighted.
00568     Expansion = "<span class='expansion'>" + Expansion + "</span></span>";
00569 
00570     HighlightRange(R, LLoc.first, LLoc.second,
00571                    "<span class='macro'>", Expansion.c_str());
00572   }
00573 
00574   // Restore diagnostics object back to its own thing.
00575   TmpPP.setDiagnostics(*OldDiags);
00576 }