clang  8.0.0svn
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code simply runs the preprocessor on the input file and prints out the
11 // result. This is the traditional behavior of the -E option.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Lex/MacroInfo.h"
21 #include "clang/Lex/PPCallbacks.h"
22 #include "clang/Lex/Pragma.h"
23 #include "clang/Lex/Preprocessor.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdio>
31 using namespace clang;
32 
33 /// PrintMacroDefinition - Print a macro definition in a form that will be
34 /// properly accepted back as a definition.
35 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
36  Preprocessor &PP, raw_ostream &OS) {
37  OS << "#define " << II.getName();
38 
39  if (MI.isFunctionLike()) {
40  OS << '(';
41  if (!MI.param_empty()) {
43  for (; AI+1 != E; ++AI) {
44  OS << (*AI)->getName();
45  OS << ',';
46  }
47 
48  // Last argument.
49  if ((*AI)->getName() == "__VA_ARGS__")
50  OS << "...";
51  else
52  OS << (*AI)->getName();
53  }
54 
55  if (MI.isGNUVarargs())
56  OS << "..."; // #define foo(x...)
57 
58  OS << ')';
59  }
60 
61  // GCC always emits a space, even if the macro body is empty. However, do not
62  // want to emit two spaces if the first token has a leading space.
63  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
64  OS << ' ';
65 
66  SmallString<128> SpellingBuffer;
67  for (const auto &T : MI.tokens()) {
68  if (T.hasLeadingSpace())
69  OS << ' ';
70 
71  OS << PP.getSpelling(T, SpellingBuffer);
72  }
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // Preprocessed token printer
77 //===----------------------------------------------------------------------===//
78 
79 namespace {
80 class PrintPPOutputPPCallbacks : public PPCallbacks {
81  Preprocessor &PP;
83  TokenConcatenation ConcatInfo;
84 public:
85  raw_ostream &OS;
86 private:
87  unsigned CurLine;
88 
89  bool EmittedTokensOnThisLine;
90  bool EmittedDirectiveOnThisLine;
92  SmallString<512> CurFilename;
93  bool Initialized;
94  bool DisableLineMarkers;
95  bool DumpDefines;
96  bool DumpIncludeDirectives;
97  bool UseLineDirectives;
98  bool IsFirstFileEntered;
99 public:
100  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
101  bool defines, bool DumpIncludeDirectives,
102  bool UseLineDirectives)
103  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
104  DisableLineMarkers(lineMarkers), DumpDefines(defines),
105  DumpIncludeDirectives(DumpIncludeDirectives),
106  UseLineDirectives(UseLineDirectives) {
107  CurLine = 0;
108  CurFilename += "<uninit>";
109  EmittedTokensOnThisLine = false;
110  EmittedDirectiveOnThisLine = false;
111  FileType = SrcMgr::C_User;
112  Initialized = false;
113  IsFirstFileEntered = false;
114  }
115 
116  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
117  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
118 
119  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
120  bool hasEmittedDirectiveOnThisLine() const {
121  return EmittedDirectiveOnThisLine;
122  }
123 
124  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
125 
126  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
128  FileID PrevFID) override;
129  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
130  StringRef FileName, bool IsAngled,
131  CharSourceRange FilenameRange, const FileEntry *File,
132  StringRef SearchPath, StringRef RelativePath,
133  const Module *Imported,
134  SrcMgr::CharacteristicKind FileType) override;
135  void Ident(SourceLocation Loc, StringRef str) override;
136  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
137  PragmaMessageKind Kind, StringRef Str) override;
138  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
139  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
141  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
142  diag::Severity Map, StringRef Str) override;
143  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
144  ArrayRef<int> Ids) override;
145  void PragmaWarningPush(SourceLocation Loc, int Level) override;
146  void PragmaWarningPop(SourceLocation Loc) override;
147  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
148  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
149 
150  bool HandleFirstTokOnLine(Token &Tok);
151 
152  /// Move to the line of the provided source location. This will
153  /// return true if the output stream required adjustment or if
154  /// the requested location is on the first line.
155  bool MoveToLine(SourceLocation Loc) {
156  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
157  if (PLoc.isInvalid())
158  return false;
159  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
160  }
161  bool MoveToLine(unsigned LineNo);
162 
163  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
164  const Token &Tok) {
165  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
166  }
167  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
168  unsigned ExtraLen=0);
169  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
170  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
171 
172  /// MacroDefined - This hook is called whenever a macro definition is seen.
173  void MacroDefined(const Token &MacroNameTok,
174  const MacroDirective *MD) override;
175 
176  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
177  void MacroUndefined(const Token &MacroNameTok,
178  const MacroDefinition &MD,
179  const MacroDirective *Undef) override;
180 
181  void BeginModule(const Module *M);
182  void EndModule(const Module *M);
183 };
184 } // end anonymous namespace
185 
186 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
187  const char *Extra,
188  unsigned ExtraLen) {
189  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
190 
191  // Emit #line directives or GNU line markers depending on what mode we're in.
192  if (UseLineDirectives) {
193  OS << "#line" << ' ' << LineNo << ' ' << '"';
194  OS.write_escaped(CurFilename);
195  OS << '"';
196  } else {
197  OS << '#' << ' ' << LineNo << ' ' << '"';
198  OS.write_escaped(CurFilename);
199  OS << '"';
200 
201  if (ExtraLen)
202  OS.write(Extra, ExtraLen);
203 
204  if (FileType == SrcMgr::C_System)
205  OS.write(" 3", 2);
206  else if (FileType == SrcMgr::C_ExternCSystem)
207  OS.write(" 3 4", 4);
208  }
209  OS << '\n';
210 }
211 
212 /// MoveToLine - Move the output to the source line specified by the location
213 /// object. We can do this by emitting some number of \n's, or be emitting a
214 /// #line directive. This returns false if already at the specified line, true
215 /// if some newlines were emitted.
216 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
217  // If this line is "close enough" to the original line, just print newlines,
218  // otherwise print a #line directive.
219  if (LineNo-CurLine <= 8) {
220  if (LineNo-CurLine == 1)
221  OS << '\n';
222  else if (LineNo == CurLine)
223  return false; // Spelling line moved, but expansion line didn't.
224  else {
225  const char *NewLines = "\n\n\n\n\n\n\n\n";
226  OS.write(NewLines, LineNo-CurLine);
227  }
228  } else if (!DisableLineMarkers) {
229  // Emit a #line or line marker.
230  WriteLineInfo(LineNo, nullptr, 0);
231  } else {
232  // Okay, we're in -P mode, which turns off line markers. However, we still
233  // need to emit a newline between tokens on different lines.
234  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
235  }
236 
237  CurLine = LineNo;
238  return true;
239 }
240 
241 bool
242 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
243  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
244  OS << '\n';
245  EmittedTokensOnThisLine = false;
246  EmittedDirectiveOnThisLine = false;
247  if (ShouldUpdateCurrentLine)
248  ++CurLine;
249  return true;
250  }
251 
252  return false;
253 }
254 
255 /// FileChanged - Whenever the preprocessor enters or exits a #include file
256 /// it invokes this handler. Update our conception of the current source
257 /// position.
258 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
259  FileChangeReason Reason,
260  SrcMgr::CharacteristicKind NewFileType,
261  FileID PrevFID) {
262  // Unless we are exiting a #include, make sure to skip ahead to the line the
263  // #include directive was at.
264  SourceManager &SourceMgr = SM;
265 
266  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
267  if (UserLoc.isInvalid())
268  return;
269 
270  unsigned NewLine = UserLoc.getLine();
271 
272  if (Reason == PPCallbacks::EnterFile) {
273  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
274  if (IncludeLoc.isValid())
275  MoveToLine(IncludeLoc);
276  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
277  // GCC emits the # directive for this directive on the line AFTER the
278  // directive and emits a bunch of spaces that aren't needed. This is because
279  // otherwise we will emit a line marker for THIS line, which requires an
280  // extra blank line after the directive to avoid making all following lines
281  // off by one. We can do better by simply incrementing NewLine here.
282  NewLine += 1;
283  }
284 
285  CurLine = NewLine;
286 
287  CurFilename.clear();
288  CurFilename += UserLoc.getFilename();
289  FileType = NewFileType;
290 
291  if (DisableLineMarkers) {
292  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
293  return;
294  }
295 
296  if (!Initialized) {
297  WriteLineInfo(CurLine);
298  Initialized = true;
299  }
300 
301  // Do not emit an enter marker for the main file (which we expect is the first
302  // entered file). This matches gcc, and improves compatibility with some tools
303  // which track the # line markers as a way to determine when the preprocessed
304  // output is in the context of the main file.
305  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
306  IsFirstFileEntered = true;
307  return;
308  }
309 
310  switch (Reason) {
312  WriteLineInfo(CurLine, " 1", 2);
313  break;
315  WriteLineInfo(CurLine, " 2", 2);
316  break;
319  WriteLineInfo(CurLine);
320  break;
321  }
322 }
323 
324 void PrintPPOutputPPCallbacks::InclusionDirective(
325  SourceLocation HashLoc,
326  const Token &IncludeTok,
327  StringRef FileName,
328  bool IsAngled,
329  CharSourceRange FilenameRange,
330  const FileEntry *File,
331  StringRef SearchPath,
332  StringRef RelativePath,
333  const Module *Imported,
334  SrcMgr::CharacteristicKind FileType) {
335  // In -dI mode, dump #include directives prior to dumping their content or
336  // interpretation.
337  if (DumpIncludeDirectives) {
338  startNewLineIfNeeded();
339  MoveToLine(HashLoc);
340  const std::string TokenText = PP.getSpelling(IncludeTok);
341  assert(!TokenText.empty());
342  OS << "#" << TokenText << " "
343  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
344  << " /* clang -E -dI */";
345  setEmittedDirectiveOnThisLine();
346  startNewLineIfNeeded();
347  }
348 
349  // When preprocessing, turn implicit imports into module import pragmas.
350  if (Imported) {
351  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
352  case tok::pp_include:
353  case tok::pp_import:
354  case tok::pp_include_next:
355  startNewLineIfNeeded();
356  MoveToLine(HashLoc);
357  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
358  << " /* clang -E: implicit import for "
359  << "#" << PP.getSpelling(IncludeTok) << " "
360  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
361  << " */";
362  // Since we want a newline after the pragma, but not a #<line>, start a
363  // new line immediately.
364  EmittedTokensOnThisLine = true;
365  startNewLineIfNeeded();
366  break;
367 
368  case tok::pp___include_macros:
369  // #__include_macros has no effect on a user of a preprocessed source
370  // file; the only effect is on preprocessing.
371  //
372  // FIXME: That's not *quite* true: it causes the module in question to
373  // be loaded, which can affect downstream diagnostics.
374  break;
375 
376  default:
377  llvm_unreachable("unknown include directive kind");
378  break;
379  }
380  }
381 }
382 
383 /// Handle entering the scope of a module during a module compilation.
384 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
385  startNewLineIfNeeded();
386  OS << "#pragma clang module begin " << M->getFullModuleName(true);
387  setEmittedDirectiveOnThisLine();
388 }
389 
390 /// Handle leaving the scope of a module during a module compilation.
391 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
392  startNewLineIfNeeded();
393  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
394  setEmittedDirectiveOnThisLine();
395 }
396 
397 /// Ident - Handle #ident directives when read by the preprocessor.
398 ///
399 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
400  MoveToLine(Loc);
401 
402  OS.write("#ident ", strlen("#ident "));
403  OS.write(S.begin(), S.size());
404  EmittedTokensOnThisLine = true;
405 }
406 
407 /// MacroDefined - This hook is called whenever a macro definition is seen.
408 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
409  const MacroDirective *MD) {
410  const MacroInfo *MI = MD->getMacroInfo();
411  // Only print out macro definitions in -dD mode.
412  if (!DumpDefines ||
413  // Ignore __FILE__ etc.
414  MI->isBuiltinMacro()) return;
415 
416  MoveToLine(MI->getDefinitionLoc());
417  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
418  setEmittedDirectiveOnThisLine();
419 }
420 
421 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
422  const MacroDefinition &MD,
423  const MacroDirective *Undef) {
424  // Only print out macro definitions in -dD mode.
425  if (!DumpDefines) return;
426 
427  MoveToLine(MacroNameTok.getLocation());
428  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
429  setEmittedDirectiveOnThisLine();
430 }
431 
432 static void outputPrintable(raw_ostream &OS, StringRef Str) {
433  for (unsigned char Char : Str) {
434  if (isPrintable(Char) && Char != '\\' && Char != '"')
435  OS << (char)Char;
436  else // Output anything hard as an octal escape.
437  OS << '\\'
438  << (char)('0' + ((Char >> 6) & 7))
439  << (char)('0' + ((Char >> 3) & 7))
440  << (char)('0' + ((Char >> 0) & 7));
441  }
442 }
443 
444 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
445  StringRef Namespace,
446  PragmaMessageKind Kind,
447  StringRef Str) {
448  startNewLineIfNeeded();
449  MoveToLine(Loc);
450  OS << "#pragma ";
451  if (!Namespace.empty())
452  OS << Namespace << ' ';
453  switch (Kind) {
454  case PMK_Message:
455  OS << "message(\"";
456  break;
457  case PMK_Warning:
458  OS << "warning \"";
459  break;
460  case PMK_Error:
461  OS << "error \"";
462  break;
463  }
464 
465  outputPrintable(OS, Str);
466  OS << '"';
467  if (Kind == PMK_Message)
468  OS << ')';
469  setEmittedDirectiveOnThisLine();
470 }
471 
472 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
473  StringRef DebugType) {
474  startNewLineIfNeeded();
475  MoveToLine(Loc);
476 
477  OS << "#pragma clang __debug ";
478  OS << DebugType;
479 
480  setEmittedDirectiveOnThisLine();
481 }
482 
483 void PrintPPOutputPPCallbacks::
484 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
485  startNewLineIfNeeded();
486  MoveToLine(Loc);
487  OS << "#pragma " << Namespace << " diagnostic push";
488  setEmittedDirectiveOnThisLine();
489 }
490 
491 void PrintPPOutputPPCallbacks::
492 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
493  startNewLineIfNeeded();
494  MoveToLine(Loc);
495  OS << "#pragma " << Namespace << " diagnostic pop";
496  setEmittedDirectiveOnThisLine();
497 }
498 
499 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
500  StringRef Namespace,
501  diag::Severity Map,
502  StringRef Str) {
503  startNewLineIfNeeded();
504  MoveToLine(Loc);
505  OS << "#pragma " << Namespace << " diagnostic ";
506  switch (Map) {
508  OS << "remark";
509  break;
511  OS << "warning";
512  break;
514  OS << "error";
515  break;
517  OS << "ignored";
518  break;
520  OS << "fatal";
521  break;
522  }
523  OS << " \"" << Str << '"';
524  setEmittedDirectiveOnThisLine();
525 }
526 
527 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
528  StringRef WarningSpec,
529  ArrayRef<int> Ids) {
530  startNewLineIfNeeded();
531  MoveToLine(Loc);
532  OS << "#pragma warning(" << WarningSpec << ':';
533  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
534  OS << ' ' << *I;
535  OS << ')';
536  setEmittedDirectiveOnThisLine();
537 }
538 
539 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
540  int Level) {
541  startNewLineIfNeeded();
542  MoveToLine(Loc);
543  OS << "#pragma warning(push";
544  if (Level >= 0)
545  OS << ", " << Level;
546  OS << ')';
547  setEmittedDirectiveOnThisLine();
548 }
549 
550 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
551  startNewLineIfNeeded();
552  MoveToLine(Loc);
553  OS << "#pragma warning(pop)";
554  setEmittedDirectiveOnThisLine();
555 }
556 
557 void PrintPPOutputPPCallbacks::
558 PragmaAssumeNonNullBegin(SourceLocation Loc) {
559  startNewLineIfNeeded();
560  MoveToLine(Loc);
561  OS << "#pragma clang assume_nonnull begin";
562  setEmittedDirectiveOnThisLine();
563 }
564 
565 void PrintPPOutputPPCallbacks::
566 PragmaAssumeNonNullEnd(SourceLocation Loc) {
567  startNewLineIfNeeded();
568  MoveToLine(Loc);
569  OS << "#pragma clang assume_nonnull end";
570  setEmittedDirectiveOnThisLine();
571 }
572 
573 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
574 /// is called for the first token on each new line. If this really is the start
575 /// of a new logical line, handle it and return true, otherwise return false.
576 /// This may not be the start of a logical line because the "start of line"
577 /// marker is set for spelling lines, not expansion ones.
578 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
579  // Figure out what line we went to and insert the appropriate number of
580  // newline characters.
581  if (!MoveToLine(Tok.getLocation()))
582  return false;
583 
584  // Print out space characters so that the first token on a line is
585  // indented for easy reading.
586  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
587 
588  // The first token on a line can have a column number of 1, yet still expect
589  // leading white space, if a macro expansion in column 1 starts with an empty
590  // macro argument, or an empty nested macro expansion. In this case, move the
591  // token to column 2.
592  if (ColNo == 1 && Tok.hasLeadingSpace())
593  ColNo = 2;
594 
595  // This hack prevents stuff like:
596  // #define HASH #
597  // HASH define foo bar
598  // From having the # character end up at column 1, which makes it so it
599  // is not handled as a #define next time through the preprocessor if in
600  // -fpreprocessed mode.
601  if (ColNo <= 1 && Tok.is(tok::hash))
602  OS << ' ';
603 
604  // Otherwise, indent the appropriate number of spaces.
605  for (; ColNo > 1; --ColNo)
606  OS << ' ';
607 
608  return true;
609 }
610 
611 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
612  unsigned Len) {
613  unsigned NumNewlines = 0;
614  for (; Len; --Len, ++TokStr) {
615  if (*TokStr != '\n' &&
616  *TokStr != '\r')
617  continue;
618 
619  ++NumNewlines;
620 
621  // If we have \n\r or \r\n, skip both and count as one line.
622  if (Len != 1 &&
623  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
624  TokStr[0] != TokStr[1]) {
625  ++TokStr;
626  --Len;
627  }
628  }
629 
630  if (NumNewlines == 0) return;
631 
632  CurLine += NumNewlines;
633 }
634 
635 
636 namespace {
637 struct UnknownPragmaHandler : public PragmaHandler {
638  const char *Prefix;
639  PrintPPOutputPPCallbacks *Callbacks;
640 
641  // Set to true if tokens should be expanded
642  bool ShouldExpandTokens;
643 
644  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
645  bool RequireTokenExpansion)
646  : Prefix(prefix), Callbacks(callbacks),
647  ShouldExpandTokens(RequireTokenExpansion) {}
648  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
649  Token &PragmaTok) override {
650  // Figure out what line we went to and insert the appropriate number of
651  // newline characters.
652  Callbacks->startNewLineIfNeeded();
653  Callbacks->MoveToLine(PragmaTok.getLocation());
654  Callbacks->OS.write(Prefix, strlen(Prefix));
655 
656  if (ShouldExpandTokens) {
657  // The first token does not have expanded macros. Expand them, if
658  // required.
659  auto Toks = llvm::make_unique<Token[]>(1);
660  Toks[0] = PragmaTok;
661  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
662  /*DisableMacroExpansion=*/false);
663  PP.Lex(PragmaTok);
664  }
665  Token PrevToken;
666  Token PrevPrevToken;
667  PrevToken.startToken();
668  PrevPrevToken.startToken();
669 
670  // Read and print all of the pragma tokens.
671  while (PragmaTok.isNot(tok::eod)) {
672  if (PragmaTok.hasLeadingSpace() ||
673  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
674  Callbacks->OS << ' ';
675  std::string TokSpell = PP.getSpelling(PragmaTok);
676  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
677 
678  PrevPrevToken = PrevToken;
679  PrevToken = PragmaTok;
680 
681  if (ShouldExpandTokens)
682  PP.Lex(PragmaTok);
683  else
684  PP.LexUnexpandedToken(PragmaTok);
685  }
686  Callbacks->setEmittedDirectiveOnThisLine();
687  }
688 };
689 } // end anonymous namespace
690 
691 
693  PrintPPOutputPPCallbacks *Callbacks,
694  raw_ostream &OS) {
695  bool DropComments = PP.getLangOpts().TraditionalCPP &&
697 
698  char Buffer[256];
699  Token PrevPrevTok, PrevTok;
700  PrevPrevTok.startToken();
701  PrevTok.startToken();
702  while (1) {
703  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
704  Callbacks->startNewLineIfNeeded();
705  Callbacks->MoveToLine(Tok.getLocation());
706  }
707 
708  // If this token is at the start of a line, emit newlines if needed.
709  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
710  // done.
711  } else if (Tok.hasLeadingSpace() ||
712  // If we haven't emitted a token on this line yet, PrevTok isn't
713  // useful to look at and no concatenation could happen anyway.
714  (Callbacks->hasEmittedTokensOnThisLine() &&
715  // Don't print "-" next to "-", it would form "--".
716  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
717  OS << ' ';
718  }
719 
720  if (DropComments && Tok.is(tok::comment)) {
721  // Skip comments. Normally the preprocessor does not generate
722  // tok::comment nodes at all when not keeping comments, but under
723  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
724  SourceLocation StartLoc = Tok.getLocation();
725  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
726  } else if (Tok.is(tok::eod)) {
727  // Don't print end of directive tokens, since they are typically newlines
728  // that mess up our line tracking. These come from unknown pre-processor
729  // directives or hash-prefixed comments in standalone assembly files.
730  PP.Lex(Tok);
731  continue;
732  } else if (Tok.is(tok::annot_module_include)) {
733  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
734  // appropriate output here. Ignore this token entirely.
735  PP.Lex(Tok);
736  continue;
737  } else if (Tok.is(tok::annot_module_begin)) {
738  // FIXME: We retrieve this token after the FileChanged callback, and
739  // retrieve the module_end token before the FileChanged callback, so
740  // we render this within the file and render the module end outside the
741  // file, but this is backwards from the token locations: the module_begin
742  // token is at the include location (outside the file) and the module_end
743  // token is at the EOF location (within the file).
744  Callbacks->BeginModule(
745  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
746  PP.Lex(Tok);
747  continue;
748  } else if (Tok.is(tok::annot_module_end)) {
749  Callbacks->EndModule(
750  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
751  PP.Lex(Tok);
752  continue;
753  } else if (Tok.isAnnotation()) {
754  // Ignore annotation tokens created by pragmas - the pragmas themselves
755  // will be reproduced in the preprocessed output.
756  PP.Lex(Tok);
757  continue;
758  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
759  OS << II->getName();
760  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
761  Tok.getLiteralData()) {
762  OS.write(Tok.getLiteralData(), Tok.getLength());
763  } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
764  const char *TokPtr = Buffer;
765  unsigned Len = PP.getSpelling(Tok, TokPtr);
766  OS.write(TokPtr, Len);
767 
768  // Tokens that can contain embedded newlines need to adjust our current
769  // line number.
770  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
771  Callbacks->HandleNewlinesInToken(TokPtr, Len);
772  } else {
773  std::string S = PP.getSpelling(Tok);
774  OS.write(&S[0], S.size());
775 
776  // Tokens that can contain embedded newlines need to adjust our current
777  // line number.
778  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
779  Callbacks->HandleNewlinesInToken(&S[0], S.size());
780  }
781  Callbacks->setEmittedTokensOnThisLine();
782 
783  if (Tok.is(tok::eof)) break;
784 
785  PrevPrevTok = PrevTok;
786  PrevTok = Tok;
787  PP.Lex(Tok);
788  }
789 }
790 
791 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
792 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
793  return LHS->first->getName().compare(RHS->first->getName());
794 }
795 
796 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
797  // Ignore unknown pragmas.
798  PP.IgnorePragmas();
799 
800  // -dM mode just scans and ignores all tokens in the files, then dumps out
801  // the macro table at the end.
802  PP.EnterMainSourceFile();
803 
804  Token Tok;
805  do PP.Lex(Tok);
806  while (Tok.isNot(tok::eof));
807 
809  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
810  I != E; ++I) {
811  auto *MD = I->second.getLatest();
812  if (MD && MD->isDefined())
813  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
814  }
815  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
816 
817  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
818  MacroInfo &MI = *MacrosByID[i].second;
819  // Ignore computed macros like __LINE__ and friends.
820  if (MI.isBuiltinMacro()) continue;
821 
822  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
823  *OS << '\n';
824  }
825 }
826 
827 /// DoPrintPreprocessedInput - This implements -E mode.
828 ///
830  const PreprocessorOutputOptions &Opts) {
831  // Show macros with no output is handled specially.
832  if (!Opts.ShowCPP) {
833  assert(Opts.ShowMacros && "Not yet implemented!");
834  DoPrintMacros(PP, OS);
835  return;
836  }
837 
838  // Inform the preprocessor whether we want it to retain comments or not, due
839  // to -C or -CC.
841 
842  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
843  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
845 
846  // Expand macros in pragmas with -fms-extensions. The assumption is that
847  // the majority of pragmas in such a file will be Microsoft pragmas.
848  // Remember the handlers we will add so that we can remove them later.
849  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
850  new UnknownPragmaHandler(
851  "#pragma", Callbacks,
852  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
853 
854  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
855  "#pragma GCC", Callbacks,
856  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
857 
858  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
859  "#pragma clang", Callbacks,
860  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
861 
862  PP.AddPragmaHandler(MicrosoftExtHandler.get());
863  PP.AddPragmaHandler("GCC", GCCHandler.get());
864  PP.AddPragmaHandler("clang", ClangHandler.get());
865 
866  // The tokens after pragma omp need to be expanded.
867  //
868  // OpenMP [2.1, Directive format]
869  // Preprocessing tokens following the #pragma omp are subject to macro
870  // replacement.
871  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
872  new UnknownPragmaHandler("#pragma omp", Callbacks,
873  /*RequireTokenExpansion=*/true));
874  PP.AddPragmaHandler("omp", OpenMPHandler.get());
875 
876  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
877 
878  // After we have configured the preprocessor, enter the main file.
879  PP.EnterMainSourceFile();
880 
881  // Consume all of the tokens that come from the predefines buffer. Those
882  // should not be emitted into the output and are guaranteed to be at the
883  // start.
884  const SourceManager &SourceMgr = PP.getSourceManager();
885  Token Tok;
886  do {
887  PP.Lex(Tok);
888  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
889  break;
890 
891  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
892  if (PLoc.isInvalid())
893  break;
894 
895  if (strcmp(PLoc.getFilename(), "<built-in>"))
896  break;
897  } while (true);
898 
899  // Read all the preprocessed tokens, printing them out to the stream.
900  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
901  *OS << '\n';
902 
903  // Remove the handlers we just added to leave the preprocessor in a sane state
904  // so that it can be reused (for example by a clang::Parser instance).
905  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
906  PP.RemovePragmaHandler("GCC", GCCHandler.get());
907  PP.RemovePragmaHandler("clang", ClangHandler.get());
908  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
909 }
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:181
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:903
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:565
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1841
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:80
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:78
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:36
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:118
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:243
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:850
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:815
Describes a module or submodule.
Definition: Module.h:65
param_iterator param_end() const
Definition: MacroInfo.h:182
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:214
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
const FormatToken & Tok
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
Defines the Diagnostic-related interfaces.
Present this diagnostic as an error.
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:32
macro_iterator macro_end(bool IncludeExternalMacros=true) const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:179
return Out str()
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
Represents a character-granular source range.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
static void outputPrintable(raw_ostream &OS, StringRef Str)
unsigned getLine() const
Return the presumed line number of this location.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:934
bool getCommentRetentionState() const
Definition: Preprocessor.h:855
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
const SourceManager & SM
Definition: Format.cpp:1490
SourceManager & getSourceManager() const
Definition: Preprocessor.h:819
const char * getFilename() const
Return the presumed filename of this location.
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:291
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
Encodes a location in the source.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:60
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:266
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:140
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:391
bool param_empty() const
Definition: MacroInfo.h:180
StringRef getName() const
Return the actual identifier string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isFunctionLike() const
Definition: MacroInfo.h:200
Present this diagnostic as a remark.
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:59
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
unsigned getLength() const
Definition: Token.h:127
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:40
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:124
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const
AvoidConcat - If printing PrevTok immediately followed by Tok would cause the two individual tokens t...
bool tokens_empty() const
Definition: MacroInfo.h:245
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:246
Do not present this diagnostic, ignore it.
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:216
unsigned ShowLineMarkers
Show #line markers.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:283
bool isGNUVarargs() const
Definition: MacroInfo.h:207
MacroMap::const_iterator macro_iterator
unsigned ShowCPP
Print normal preprocessed output.
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:581
Present this diagnostic as a fatal error.
Present this diagnostic as a warning.
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
void * getAnnotationValue() const
Definition: Token.h:224
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:270
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:908
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:125