clang  6.0.0svn
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code simply runs the preprocessor on the input file and prints out the
11 // result. This is the traditional behavior of the -E option.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Lex/MacroInfo.h"
21 #include "clang/Lex/PPCallbacks.h"
22 #include "clang/Lex/Pragma.h"
23 #include "clang/Lex/Preprocessor.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdio>
31 using namespace clang;
32 
33 /// PrintMacroDefinition - Print a macro definition in a form that will be
34 /// properly accepted back as a definition.
35 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
36  Preprocessor &PP, raw_ostream &OS) {
37  OS << "#define " << II.getName();
38 
39  if (MI.isFunctionLike()) {
40  OS << '(';
41  if (!MI.param_empty()) {
43  for (; AI+1 != E; ++AI) {
44  OS << (*AI)->getName();
45  OS << ',';
46  }
47 
48  // Last argument.
49  if ((*AI)->getName() == "__VA_ARGS__")
50  OS << "...";
51  else
52  OS << (*AI)->getName();
53  }
54 
55  if (MI.isGNUVarargs())
56  OS << "..."; // #define foo(x...)
57 
58  OS << ')';
59  }
60 
61  // GCC always emits a space, even if the macro body is empty. However, do not
62  // want to emit two spaces if the first token has a leading space.
63  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
64  OS << ' ';
65 
66  SmallString<128> SpellingBuffer;
67  for (const auto &T : MI.tokens()) {
68  if (T.hasLeadingSpace())
69  OS << ' ';
70 
71  OS << PP.getSpelling(T, SpellingBuffer);
72  }
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // Preprocessed token printer
77 //===----------------------------------------------------------------------===//
78 
79 namespace {
80 class PrintPPOutputPPCallbacks : public PPCallbacks {
81  Preprocessor &PP;
83  TokenConcatenation ConcatInfo;
84 public:
85  raw_ostream &OS;
86 private:
87  unsigned CurLine;
88 
89  bool EmittedTokensOnThisLine;
90  bool EmittedDirectiveOnThisLine;
92  SmallString<512> CurFilename;
93  bool Initialized;
94  bool DisableLineMarkers;
95  bool DumpDefines;
96  bool DumpIncludeDirectives;
97  bool UseLineDirectives;
98  bool IsFirstFileEntered;
99 public:
100  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
101  bool defines, bool DumpIncludeDirectives,
102  bool UseLineDirectives)
103  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
104  DisableLineMarkers(lineMarkers), DumpDefines(defines),
105  DumpIncludeDirectives(DumpIncludeDirectives),
106  UseLineDirectives(UseLineDirectives) {
107  CurLine = 0;
108  CurFilename += "<uninit>";
109  EmittedTokensOnThisLine = false;
110  EmittedDirectiveOnThisLine = false;
111  FileType = SrcMgr::C_User;
112  Initialized = false;
113  IsFirstFileEntered = false;
114  }
115 
116  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
117  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
118 
119  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
120  bool hasEmittedDirectiveOnThisLine() const {
121  return EmittedDirectiveOnThisLine;
122  }
123 
124  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
125 
126  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
128  FileID PrevFID) override;
129  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
130  StringRef FileName, bool IsAngled,
131  CharSourceRange FilenameRange, const FileEntry *File,
132  StringRef SearchPath, StringRef RelativePath,
133  const Module *Imported) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136  PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141  diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143  ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
147  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
148 
149  bool HandleFirstTokOnLine(Token &Tok);
150 
151  /// Move to the line of the provided source location. This will
152  /// return true if the output stream required adjustment or if
153  /// the requested location is on the first line.
154  bool MoveToLine(SourceLocation Loc) {
155  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
156  if (PLoc.isInvalid())
157  return false;
158  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
159  }
160  bool MoveToLine(unsigned LineNo);
161 
162  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
163  const Token &Tok) {
164  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
165  }
166  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
167  unsigned ExtraLen=0);
168  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
169  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
170 
171  /// MacroDefined - This hook is called whenever a macro definition is seen.
172  void MacroDefined(const Token &MacroNameTok,
173  const MacroDirective *MD) override;
174 
175  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
176  void MacroUndefined(const Token &MacroNameTok,
177  const MacroDefinition &MD,
178  const MacroDirective *Undef) override;
179 
180  void BeginModule(const Module *M);
181  void EndModule(const Module *M);
182 };
183 } // end anonymous namespace
184 
185 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
186  const char *Extra,
187  unsigned ExtraLen) {
188  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
189 
190  // Emit #line directives or GNU line markers depending on what mode we're in.
191  if (UseLineDirectives) {
192  OS << "#line" << ' ' << LineNo << ' ' << '"';
193  OS.write_escaped(CurFilename);
194  OS << '"';
195  } else {
196  OS << '#' << ' ' << LineNo << ' ' << '"';
197  OS.write_escaped(CurFilename);
198  OS << '"';
199 
200  if (ExtraLen)
201  OS.write(Extra, ExtraLen);
202 
203  if (FileType == SrcMgr::C_System)
204  OS.write(" 3", 2);
205  else if (FileType == SrcMgr::C_ExternCSystem)
206  OS.write(" 3 4", 4);
207  }
208  OS << '\n';
209 }
210 
211 /// MoveToLine - Move the output to the source line specified by the location
212 /// object. We can do this by emitting some number of \n's, or be emitting a
213 /// #line directive. This returns false if already at the specified line, true
214 /// if some newlines were emitted.
215 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
216  // If this line is "close enough" to the original line, just print newlines,
217  // otherwise print a #line directive.
218  if (LineNo-CurLine <= 8) {
219  if (LineNo-CurLine == 1)
220  OS << '\n';
221  else if (LineNo == CurLine)
222  return false; // Spelling line moved, but expansion line didn't.
223  else {
224  const char *NewLines = "\n\n\n\n\n\n\n\n";
225  OS.write(NewLines, LineNo-CurLine);
226  }
227  } else if (!DisableLineMarkers) {
228  // Emit a #line or line marker.
229  WriteLineInfo(LineNo, nullptr, 0);
230  } else {
231  // Okay, we're in -P mode, which turns off line markers. However, we still
232  // need to emit a newline between tokens on different lines.
233  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
234  }
235 
236  CurLine = LineNo;
237  return true;
238 }
239 
240 bool
241 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
242  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
243  OS << '\n';
244  EmittedTokensOnThisLine = false;
245  EmittedDirectiveOnThisLine = false;
246  if (ShouldUpdateCurrentLine)
247  ++CurLine;
248  return true;
249  }
250 
251  return false;
252 }
253 
254 /// FileChanged - Whenever the preprocessor enters or exits a #include file
255 /// it invokes this handler. Update our conception of the current source
256 /// position.
257 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
258  FileChangeReason Reason,
259  SrcMgr::CharacteristicKind NewFileType,
260  FileID PrevFID) {
261  // Unless we are exiting a #include, make sure to skip ahead to the line the
262  // #include directive was at.
263  SourceManager &SourceMgr = SM;
264 
265  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
266  if (UserLoc.isInvalid())
267  return;
268 
269  unsigned NewLine = UserLoc.getLine();
270 
271  if (Reason == PPCallbacks::EnterFile) {
272  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
273  if (IncludeLoc.isValid())
274  MoveToLine(IncludeLoc);
275  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
276  // GCC emits the # directive for this directive on the line AFTER the
277  // directive and emits a bunch of spaces that aren't needed. This is because
278  // otherwise we will emit a line marker for THIS line, which requires an
279  // extra blank line after the directive to avoid making all following lines
280  // off by one. We can do better by simply incrementing NewLine here.
281  NewLine += 1;
282  }
283 
284  CurLine = NewLine;
285 
286  CurFilename.clear();
287  CurFilename += UserLoc.getFilename();
288  FileType = NewFileType;
289 
290  if (DisableLineMarkers) {
291  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
292  return;
293  }
294 
295  if (!Initialized) {
296  WriteLineInfo(CurLine);
297  Initialized = true;
298  }
299 
300  // Do not emit an enter marker for the main file (which we expect is the first
301  // entered file). This matches gcc, and improves compatibility with some tools
302  // which track the # line markers as a way to determine when the preprocessed
303  // output is in the context of the main file.
304  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
305  IsFirstFileEntered = true;
306  return;
307  }
308 
309  switch (Reason) {
311  WriteLineInfo(CurLine, " 1", 2);
312  break;
314  WriteLineInfo(CurLine, " 2", 2);
315  break;
318  WriteLineInfo(CurLine);
319  break;
320  }
321 }
322 
323 void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
324  const Token &IncludeTok,
325  StringRef FileName,
326  bool IsAngled,
327  CharSourceRange FilenameRange,
328  const FileEntry *File,
329  StringRef SearchPath,
330  StringRef RelativePath,
331  const Module *Imported) {
332  // In -dI mode, dump #include directives prior to dumping their content or
333  // interpretation.
334  if (DumpIncludeDirectives) {
335  startNewLineIfNeeded();
336  MoveToLine(HashLoc);
337  const std::string TokenText = PP.getSpelling(IncludeTok);
338  assert(!TokenText.empty());
339  OS << "#" << TokenText << " "
340  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
341  << " /* clang -E -dI */";
342  setEmittedDirectiveOnThisLine();
343  startNewLineIfNeeded();
344  }
345 
346  // When preprocessing, turn implicit imports into module import pragmas.
347  if (Imported) {
348  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
349  case tok::pp_include:
350  case tok::pp_import:
351  case tok::pp_include_next:
352  startNewLineIfNeeded();
353  MoveToLine(HashLoc);
354  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
355  << " /* clang -E: implicit import for "
356  << "#" << PP.getSpelling(IncludeTok) << " "
357  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
358  << " */";
359  // Since we want a newline after the pragma, but not a #<line>, start a
360  // new line immediately.
361  EmittedTokensOnThisLine = true;
362  startNewLineIfNeeded();
363  break;
364 
365  case tok::pp___include_macros:
366  // #__include_macros has no effect on a user of a preprocessed source
367  // file; the only effect is on preprocessing.
368  //
369  // FIXME: That's not *quite* true: it causes the module in question to
370  // be loaded, which can affect downstream diagnostics.
371  break;
372 
373  default:
374  llvm_unreachable("unknown include directive kind");
375  break;
376  }
377  }
378 }
379 
380 /// Handle entering the scope of a module during a module compilation.
381 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
382  startNewLineIfNeeded();
383  OS << "#pragma clang module begin " << M->getFullModuleName(true);
384  setEmittedDirectiveOnThisLine();
385 }
386 
387 /// Handle leaving the scope of a module during a module compilation.
388 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
389  startNewLineIfNeeded();
390  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
391  setEmittedDirectiveOnThisLine();
392 }
393 
394 /// Ident - Handle #ident directives when read by the preprocessor.
395 ///
396 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
397  MoveToLine(Loc);
398 
399  OS.write("#ident ", strlen("#ident "));
400  OS.write(S.begin(), S.size());
401  EmittedTokensOnThisLine = true;
402 }
403 
404 /// MacroDefined - This hook is called whenever a macro definition is seen.
405 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
406  const MacroDirective *MD) {
407  const MacroInfo *MI = MD->getMacroInfo();
408  // Only print out macro definitions in -dD mode.
409  if (!DumpDefines ||
410  // Ignore __FILE__ etc.
411  MI->isBuiltinMacro()) return;
412 
413  MoveToLine(MI->getDefinitionLoc());
414  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
415  setEmittedDirectiveOnThisLine();
416 }
417 
418 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
419  const MacroDefinition &MD,
420  const MacroDirective *Undef) {
421  // Only print out macro definitions in -dD mode.
422  if (!DumpDefines) return;
423 
424  MoveToLine(MacroNameTok.getLocation());
425  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
426  setEmittedDirectiveOnThisLine();
427 }
428 
429 static void outputPrintable(raw_ostream &OS, StringRef Str) {
430  for (unsigned char Char : Str) {
431  if (isPrintable(Char) && Char != '\\' && Char != '"')
432  OS << (char)Char;
433  else // Output anything hard as an octal escape.
434  OS << '\\'
435  << (char)('0' + ((Char >> 6) & 7))
436  << (char)('0' + ((Char >> 3) & 7))
437  << (char)('0' + ((Char >> 0) & 7));
438  }
439 }
440 
441 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
442  StringRef Namespace,
443  PragmaMessageKind Kind,
444  StringRef Str) {
445  startNewLineIfNeeded();
446  MoveToLine(Loc);
447  OS << "#pragma ";
448  if (!Namespace.empty())
449  OS << Namespace << ' ';
450  switch (Kind) {
451  case PMK_Message:
452  OS << "message(\"";
453  break;
454  case PMK_Warning:
455  OS << "warning \"";
456  break;
457  case PMK_Error:
458  OS << "error \"";
459  break;
460  }
461 
462  outputPrintable(OS, Str);
463  OS << '"';
464  if (Kind == PMK_Message)
465  OS << ')';
466  setEmittedDirectiveOnThisLine();
467 }
468 
469 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
470  StringRef DebugType) {
471  startNewLineIfNeeded();
472  MoveToLine(Loc);
473 
474  OS << "#pragma clang __debug ";
475  OS << DebugType;
476 
477  setEmittedDirectiveOnThisLine();
478 }
479 
480 void PrintPPOutputPPCallbacks::
481 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
482  startNewLineIfNeeded();
483  MoveToLine(Loc);
484  OS << "#pragma " << Namespace << " diagnostic push";
485  setEmittedDirectiveOnThisLine();
486 }
487 
488 void PrintPPOutputPPCallbacks::
489 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
490  startNewLineIfNeeded();
491  MoveToLine(Loc);
492  OS << "#pragma " << Namespace << " diagnostic pop";
493  setEmittedDirectiveOnThisLine();
494 }
495 
496 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
497  StringRef Namespace,
498  diag::Severity Map,
499  StringRef Str) {
500  startNewLineIfNeeded();
501  MoveToLine(Loc);
502  OS << "#pragma " << Namespace << " diagnostic ";
503  switch (Map) {
505  OS << "remark";
506  break;
508  OS << "warning";
509  break;
511  OS << "error";
512  break;
514  OS << "ignored";
515  break;
517  OS << "fatal";
518  break;
519  }
520  OS << " \"" << Str << '"';
521  setEmittedDirectiveOnThisLine();
522 }
523 
524 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
525  StringRef WarningSpec,
526  ArrayRef<int> Ids) {
527  startNewLineIfNeeded();
528  MoveToLine(Loc);
529  OS << "#pragma warning(" << WarningSpec << ':';
530  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
531  OS << ' ' << *I;
532  OS << ')';
533  setEmittedDirectiveOnThisLine();
534 }
535 
536 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
537  int Level) {
538  startNewLineIfNeeded();
539  MoveToLine(Loc);
540  OS << "#pragma warning(push";
541  if (Level >= 0)
542  OS << ", " << Level;
543  OS << ')';
544  setEmittedDirectiveOnThisLine();
545 }
546 
547 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
548  startNewLineIfNeeded();
549  MoveToLine(Loc);
550  OS << "#pragma warning(pop)";
551  setEmittedDirectiveOnThisLine();
552 }
553 
554 void PrintPPOutputPPCallbacks::
555 PragmaAssumeNonNullBegin(SourceLocation Loc) {
556  startNewLineIfNeeded();
557  MoveToLine(Loc);
558  OS << "#pragma clang assume_nonnull begin";
559  setEmittedDirectiveOnThisLine();
560 }
561 
562 void PrintPPOutputPPCallbacks::
563 PragmaAssumeNonNullEnd(SourceLocation Loc) {
564  startNewLineIfNeeded();
565  MoveToLine(Loc);
566  OS << "#pragma clang assume_nonnull end";
567  setEmittedDirectiveOnThisLine();
568 }
569 
570 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
571 /// is called for the first token on each new line. If this really is the start
572 /// of a new logical line, handle it and return true, otherwise return false.
573 /// This may not be the start of a logical line because the "start of line"
574 /// marker is set for spelling lines, not expansion ones.
575 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
576  // Figure out what line we went to and insert the appropriate number of
577  // newline characters.
578  if (!MoveToLine(Tok.getLocation()))
579  return false;
580 
581  // Print out space characters so that the first token on a line is
582  // indented for easy reading.
583  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
584 
585  // The first token on a line can have a column number of 1, yet still expect
586  // leading white space, if a macro expansion in column 1 starts with an empty
587  // macro argument, or an empty nested macro expansion. In this case, move the
588  // token to column 2.
589  if (ColNo == 1 && Tok.hasLeadingSpace())
590  ColNo = 2;
591 
592  // This hack prevents stuff like:
593  // #define HASH #
594  // HASH define foo bar
595  // From having the # character end up at column 1, which makes it so it
596  // is not handled as a #define next time through the preprocessor if in
597  // -fpreprocessed mode.
598  if (ColNo <= 1 && Tok.is(tok::hash))
599  OS << ' ';
600 
601  // Otherwise, indent the appropriate number of spaces.
602  for (; ColNo > 1; --ColNo)
603  OS << ' ';
604 
605  return true;
606 }
607 
608 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
609  unsigned Len) {
610  unsigned NumNewlines = 0;
611  for (; Len; --Len, ++TokStr) {
612  if (*TokStr != '\n' &&
613  *TokStr != '\r')
614  continue;
615 
616  ++NumNewlines;
617 
618  // If we have \n\r or \r\n, skip both and count as one line.
619  if (Len != 1 &&
620  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
621  TokStr[0] != TokStr[1]) {
622  ++TokStr;
623  --Len;
624  }
625  }
626 
627  if (NumNewlines == 0) return;
628 
629  CurLine += NumNewlines;
630 }
631 
632 
633 namespace {
634 struct UnknownPragmaHandler : public PragmaHandler {
635  const char *Prefix;
636  PrintPPOutputPPCallbacks *Callbacks;
637 
638  // Set to true if tokens should be expanded
639  bool ShouldExpandTokens;
640 
641  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
642  bool RequireTokenExpansion)
643  : Prefix(prefix), Callbacks(callbacks),
644  ShouldExpandTokens(RequireTokenExpansion) {}
645  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
646  Token &PragmaTok) override {
647  // Figure out what line we went to and insert the appropriate number of
648  // newline characters.
649  Callbacks->startNewLineIfNeeded();
650  Callbacks->MoveToLine(PragmaTok.getLocation());
651  Callbacks->OS.write(Prefix, strlen(Prefix));
652 
653  if (ShouldExpandTokens) {
654  // The first token does not have expanded macros. Expand them, if
655  // required.
656  auto Toks = llvm::make_unique<Token[]>(1);
657  Toks[0] = PragmaTok;
658  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
659  /*DisableMacroExpansion=*/false);
660  PP.Lex(PragmaTok);
661  }
662  Token PrevToken;
663  Token PrevPrevToken;
664  PrevToken.startToken();
665  PrevPrevToken.startToken();
666 
667  // Read and print all of the pragma tokens.
668  while (PragmaTok.isNot(tok::eod)) {
669  if (PragmaTok.hasLeadingSpace() ||
670  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
671  Callbacks->OS << ' ';
672  std::string TokSpell = PP.getSpelling(PragmaTok);
673  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
674 
675  PrevPrevToken = PrevToken;
676  PrevToken = PragmaTok;
677 
678  if (ShouldExpandTokens)
679  PP.Lex(PragmaTok);
680  else
681  PP.LexUnexpandedToken(PragmaTok);
682  }
683  Callbacks->setEmittedDirectiveOnThisLine();
684  }
685 };
686 } // end anonymous namespace
687 
688 
690  PrintPPOutputPPCallbacks *Callbacks,
691  raw_ostream &OS) {
692  bool DropComments = PP.getLangOpts().TraditionalCPP &&
694 
695  char Buffer[256];
696  Token PrevPrevTok, PrevTok;
697  PrevPrevTok.startToken();
698  PrevTok.startToken();
699  while (1) {
700  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
701  Callbacks->startNewLineIfNeeded();
702  Callbacks->MoveToLine(Tok.getLocation());
703  }
704 
705  // If this token is at the start of a line, emit newlines if needed.
706  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
707  // done.
708  } else if (Tok.hasLeadingSpace() ||
709  // If we haven't emitted a token on this line yet, PrevTok isn't
710  // useful to look at and no concatenation could happen anyway.
711  (Callbacks->hasEmittedTokensOnThisLine() &&
712  // Don't print "-" next to "-", it would form "--".
713  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
714  OS << ' ';
715  }
716 
717  if (DropComments && Tok.is(tok::comment)) {
718  // Skip comments. Normally the preprocessor does not generate
719  // tok::comment nodes at all when not keeping comments, but under
720  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
721  SourceLocation StartLoc = Tok.getLocation();
722  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
723  } else if (Tok.is(tok::eod)) {
724  // Don't print end of directive tokens, since they are typically newlines
725  // that mess up our line tracking. These come from unknown pre-processor
726  // directives or hash-prefixed comments in standalone assembly files.
727  PP.Lex(Tok);
728  continue;
729  } else if (Tok.is(tok::annot_module_include)) {
730  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
731  // appropriate output here. Ignore this token entirely.
732  PP.Lex(Tok);
733  continue;
734  } else if (Tok.is(tok::annot_module_begin)) {
735  // FIXME: We retrieve this token after the FileChanged callback, and
736  // retrieve the module_end token before the FileChanged callback, so
737  // we render this within the file and render the module end outside the
738  // file, but this is backwards from the token locations: the module_begin
739  // token is at the include location (outside the file) and the module_end
740  // token is at the EOF location (within the file).
741  Callbacks->BeginModule(
742  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
743  PP.Lex(Tok);
744  continue;
745  } else if (Tok.is(tok::annot_module_end)) {
746  Callbacks->EndModule(
747  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
748  PP.Lex(Tok);
749  continue;
750  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
751  OS << II->getName();
752  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
753  Tok.getLiteralData()) {
754  OS.write(Tok.getLiteralData(), Tok.getLength());
755  } else if (Tok.getLength() < 256) {
756  const char *TokPtr = Buffer;
757  unsigned Len = PP.getSpelling(Tok, TokPtr);
758  OS.write(TokPtr, Len);
759 
760  // Tokens that can contain embedded newlines need to adjust our current
761  // line number.
762  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
763  Callbacks->HandleNewlinesInToken(TokPtr, Len);
764  } else {
765  std::string S = PP.getSpelling(Tok);
766  OS.write(&S[0], S.size());
767 
768  // Tokens that can contain embedded newlines need to adjust our current
769  // line number.
770  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
771  Callbacks->HandleNewlinesInToken(&S[0], S.size());
772  }
773  Callbacks->setEmittedTokensOnThisLine();
774 
775  if (Tok.is(tok::eof)) break;
776 
777  PrevPrevTok = PrevTok;
778  PrevTok = Tok;
779  PP.Lex(Tok);
780  }
781 }
782 
783 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
784 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
785  return LHS->first->getName().compare(RHS->first->getName());
786 }
787 
788 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
789  // Ignore unknown pragmas.
790  PP.IgnorePragmas();
791 
792  // -dM mode just scans and ignores all tokens in the files, then dumps out
793  // the macro table at the end.
794  PP.EnterMainSourceFile();
795 
796  Token Tok;
797  do PP.Lex(Tok);
798  while (Tok.isNot(tok::eof));
799 
801  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
802  I != E; ++I) {
803  auto *MD = I->second.getLatest();
804  if (MD && MD->isDefined())
805  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
806  }
807  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
808 
809  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
810  MacroInfo &MI = *MacrosByID[i].second;
811  // Ignore computed macros like __LINE__ and friends.
812  if (MI.isBuiltinMacro()) continue;
813 
814  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
815  *OS << '\n';
816  }
817 }
818 
819 /// DoPrintPreprocessedInput - This implements -E mode.
820 ///
822  const PreprocessorOutputOptions &Opts) {
823  // Show macros with no output is handled specially.
824  if (!Opts.ShowCPP) {
825  assert(Opts.ShowMacros && "Not yet implemented!");
826  DoPrintMacros(PP, OS);
827  return;
828  }
829 
830  // Inform the preprocessor whether we want it to retain comments or not, due
831  // to -C or -CC.
833 
834  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
835  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
837 
838  // Expand macros in pragmas with -fms-extensions. The assumption is that
839  // the majority of pragmas in such a file will be Microsoft pragmas.
840  // Remember the handlers we will add so that we can remove them later.
841  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
842  new UnknownPragmaHandler(
843  "#pragma", Callbacks,
844  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
845 
846  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
847  "#pragma GCC", Callbacks,
848  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
849 
850  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
851  "#pragma clang", Callbacks,
852  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
853 
854  PP.AddPragmaHandler(MicrosoftExtHandler.get());
855  PP.AddPragmaHandler("GCC", GCCHandler.get());
856  PP.AddPragmaHandler("clang", ClangHandler.get());
857 
858  // The tokens after pragma omp need to be expanded.
859  //
860  // OpenMP [2.1, Directive format]
861  // Preprocessing tokens following the #pragma omp are subject to macro
862  // replacement.
863  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
864  new UnknownPragmaHandler("#pragma omp", Callbacks,
865  /*RequireTokenExpansion=*/true));
866  PP.AddPragmaHandler("omp", OpenMPHandler.get());
867 
868  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
869 
870  // After we have configured the preprocessor, enter the main file.
871  PP.EnterMainSourceFile();
872 
873  // Consume all of the tokens that come from the predefines buffer. Those
874  // should not be emitted into the output and are guaranteed to be at the
875  // start.
876  const SourceManager &SourceMgr = PP.getSourceManager();
877  Token Tok;
878  do {
879  PP.Lex(Tok);
880  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
881  break;
882 
883  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
884  if (PLoc.isInvalid())
885  break;
886 
887  if (strcmp(PLoc.getFilename(), "<built-in>"))
888  break;
889  } while (true);
890 
891  // Read all the preprocessed tokens, printing them out to the stream.
892  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
893  *OS << '\n';
894 
895  // Remove the handlers we just added to leave the preprocessor in a sane state
896  // so that it can be reused (for example by a clang::Parser instance).
897  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
898  PP.RemovePragmaHandler("GCC", GCCHandler.get());
899  PP.RemovePragmaHandler("clang", ClangHandler.get());
900  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
901 }
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:181
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:882
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:564
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1840
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:80
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:78
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:36
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:243
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:850
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:811
Describes a module or submodule.
Definition: Module.h:65
param_iterator param_end() const
Definition: MacroInfo.h:182
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:169
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
const FormatToken & Tok
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
Defines the Diagnostic-related interfaces.
Present this diagnostic as an error.
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:32
macro_iterator macro_end(bool IncludeExternalMacros=true) const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:179
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
Represents a character-granular source range.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
const FunctionProtoType * T
static void outputPrintable(raw_ostream &OS, StringRef Str)
unsigned getLine() const
Return the presumed line number of this location.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:124
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:913
bool getCommentRetentionState() const
Definition: Preprocessor.h:855
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
const SourceManager & SM
Definition: Format.cpp:1337
SourceManager & getSourceManager() const
Definition: Preprocessor.h:815
const char * getFilename() const
Return the presumed filename of this location.
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:291
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
Encodes a location in the source.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:266
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:140
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:391
bool param_empty() const
Definition: MacroInfo.h:180
StringRef getName() const
Return the actual identifier string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isFunctionLike() const
Definition: MacroInfo.h:200
Present this diagnostic as a remark.
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:59
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
unsigned getLength() const
Definition: Token.h:127
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:40
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:215
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:124
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const
AvoidConcat - If printing PrevTok immediately followed by Tok would cause the two individual tokens t...
bool tokens_empty() const
Definition: MacroInfo.h:245
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:246
Do not present this diagnostic, ignore it.
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:216
unsigned ShowLineMarkers
Show #line markers.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:283
bool isGNUVarargs() const
Definition: MacroInfo.h:207
MacroMap::const_iterator macro_iterator
unsigned ShowCPP
Print normal preprocessed output.
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:580
Present this diagnostic as a fatal error.
Present this diagnostic as a warning.
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
void * getAnnotationValue() const
Definition: Token.h:224
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:270
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:908
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:127