clang  9.0.0svn
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cstdio>
30 using namespace clang;
31 
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35  Preprocessor &PP, raw_ostream &OS) {
36  OS << "#define " << II.getName();
37 
38  if (MI.isFunctionLike()) {
39  OS << '(';
40  if (!MI.param_empty()) {
42  for (; AI+1 != E; ++AI) {
43  OS << (*AI)->getName();
44  OS << ',';
45  }
46 
47  // Last argument.
48  if ((*AI)->getName() == "__VA_ARGS__")
49  OS << "...";
50  else
51  OS << (*AI)->getName();
52  }
53 
54  if (MI.isGNUVarargs())
55  OS << "..."; // #define foo(x...)
56 
57  OS << ')';
58  }
59 
60  // GCC always emits a space, even if the macro body is empty. However, do not
61  // want to emit two spaces if the first token has a leading space.
62  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63  OS << ' ';
64 
65  SmallString<128> SpellingBuffer;
66  for (const auto &T : MI.tokens()) {
67  if (T.hasLeadingSpace())
68  OS << ' ';
69 
70  OS << PP.getSpelling(T, SpellingBuffer);
71  }
72 }
73 
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
77 
78 namespace {
79 class PrintPPOutputPPCallbacks : public PPCallbacks {
80  Preprocessor &PP;
82  TokenConcatenation ConcatInfo;
83 public:
84  raw_ostream &OS;
85 private:
86  unsigned CurLine;
87 
88  bool EmittedTokensOnThisLine;
89  bool EmittedDirectiveOnThisLine;
91  SmallString<512> CurFilename;
92  bool Initialized;
93  bool DisableLineMarkers;
94  bool DumpDefines;
95  bool DumpIncludeDirectives;
96  bool UseLineDirectives;
97  bool IsFirstFileEntered;
98 public:
99  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
100  bool defines, bool DumpIncludeDirectives,
101  bool UseLineDirectives)
102  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
103  DisableLineMarkers(lineMarkers), DumpDefines(defines),
104  DumpIncludeDirectives(DumpIncludeDirectives),
105  UseLineDirectives(UseLineDirectives) {
106  CurLine = 0;
107  CurFilename += "<uninit>";
108  EmittedTokensOnThisLine = false;
109  EmittedDirectiveOnThisLine = false;
110  FileType = SrcMgr::C_User;
111  Initialized = false;
112  IsFirstFileEntered = false;
113  }
114 
115  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
116  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
117 
118  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
119  bool hasEmittedDirectiveOnThisLine() const {
120  return EmittedDirectiveOnThisLine;
121  }
122 
123  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
124 
125  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
127  FileID PrevFID) override;
128  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
129  StringRef FileName, bool IsAngled,
130  CharSourceRange FilenameRange, const FileEntry *File,
131  StringRef SearchPath, StringRef RelativePath,
132  const Module *Imported,
133  SrcMgr::CharacteristicKind FileType) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136  PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141  diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143  ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146  void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
147  void PragmaExecCharsetPop(SourceLocation Loc) override;
148  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
149  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
150 
151  bool HandleFirstTokOnLine(Token &Tok);
152 
153  /// Move to the line of the provided source location. This will
154  /// return true if the output stream required adjustment or if
155  /// the requested location is on the first line.
156  bool MoveToLine(SourceLocation Loc) {
157  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
158  if (PLoc.isInvalid())
159  return false;
160  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
161  }
162  bool MoveToLine(unsigned LineNo);
163 
164  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
165  const Token &Tok) {
166  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
167  }
168  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
169  unsigned ExtraLen=0);
170  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
171  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
172 
173  /// MacroDefined - This hook is called whenever a macro definition is seen.
174  void MacroDefined(const Token &MacroNameTok,
175  const MacroDirective *MD) override;
176 
177  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
178  void MacroUndefined(const Token &MacroNameTok,
179  const MacroDefinition &MD,
180  const MacroDirective *Undef) override;
181 
182  void BeginModule(const Module *M);
183  void EndModule(const Module *M);
184 };
185 } // end anonymous namespace
186 
187 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
188  const char *Extra,
189  unsigned ExtraLen) {
190  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
191 
192  // Emit #line directives or GNU line markers depending on what mode we're in.
193  if (UseLineDirectives) {
194  OS << "#line" << ' ' << LineNo << ' ' << '"';
195  OS.write_escaped(CurFilename);
196  OS << '"';
197  } else {
198  OS << '#' << ' ' << LineNo << ' ' << '"';
199  OS.write_escaped(CurFilename);
200  OS << '"';
201 
202  if (ExtraLen)
203  OS.write(Extra, ExtraLen);
204 
205  if (FileType == SrcMgr::C_System)
206  OS.write(" 3", 2);
207  else if (FileType == SrcMgr::C_ExternCSystem)
208  OS.write(" 3 4", 4);
209  }
210  OS << '\n';
211 }
212 
213 /// MoveToLine - Move the output to the source line specified by the location
214 /// object. We can do this by emitting some number of \n's, or be emitting a
215 /// #line directive. This returns false if already at the specified line, true
216 /// if some newlines were emitted.
217 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
218  // If this line is "close enough" to the original line, just print newlines,
219  // otherwise print a #line directive.
220  if (LineNo-CurLine <= 8) {
221  if (LineNo-CurLine == 1)
222  OS << '\n';
223  else if (LineNo == CurLine)
224  return false; // Spelling line moved, but expansion line didn't.
225  else {
226  const char *NewLines = "\n\n\n\n\n\n\n\n";
227  OS.write(NewLines, LineNo-CurLine);
228  }
229  } else if (!DisableLineMarkers) {
230  // Emit a #line or line marker.
231  WriteLineInfo(LineNo, nullptr, 0);
232  } else {
233  // Okay, we're in -P mode, which turns off line markers. However, we still
234  // need to emit a newline between tokens on different lines.
235  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
236  }
237 
238  CurLine = LineNo;
239  return true;
240 }
241 
242 bool
243 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
244  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
245  OS << '\n';
246  EmittedTokensOnThisLine = false;
247  EmittedDirectiveOnThisLine = false;
248  if (ShouldUpdateCurrentLine)
249  ++CurLine;
250  return true;
251  }
252 
253  return false;
254 }
255 
256 /// FileChanged - Whenever the preprocessor enters or exits a #include file
257 /// it invokes this handler. Update our conception of the current source
258 /// position.
259 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
260  FileChangeReason Reason,
261  SrcMgr::CharacteristicKind NewFileType,
262  FileID PrevFID) {
263  // Unless we are exiting a #include, make sure to skip ahead to the line the
264  // #include directive was at.
265  SourceManager &SourceMgr = SM;
266 
267  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
268  if (UserLoc.isInvalid())
269  return;
270 
271  unsigned NewLine = UserLoc.getLine();
272 
273  if (Reason == PPCallbacks::EnterFile) {
274  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
275  if (IncludeLoc.isValid())
276  MoveToLine(IncludeLoc);
277  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
278  // GCC emits the # directive for this directive on the line AFTER the
279  // directive and emits a bunch of spaces that aren't needed. This is because
280  // otherwise we will emit a line marker for THIS line, which requires an
281  // extra blank line after the directive to avoid making all following lines
282  // off by one. We can do better by simply incrementing NewLine here.
283  NewLine += 1;
284  }
285 
286  CurLine = NewLine;
287 
288  CurFilename.clear();
289  CurFilename += UserLoc.getFilename();
290  FileType = NewFileType;
291 
292  if (DisableLineMarkers) {
293  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
294  return;
295  }
296 
297  if (!Initialized) {
298  WriteLineInfo(CurLine);
299  Initialized = true;
300  }
301 
302  // Do not emit an enter marker for the main file (which we expect is the first
303  // entered file). This matches gcc, and improves compatibility with some tools
304  // which track the # line markers as a way to determine when the preprocessed
305  // output is in the context of the main file.
306  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
307  IsFirstFileEntered = true;
308  return;
309  }
310 
311  switch (Reason) {
313  WriteLineInfo(CurLine, " 1", 2);
314  break;
316  WriteLineInfo(CurLine, " 2", 2);
317  break;
320  WriteLineInfo(CurLine);
321  break;
322  }
323 }
324 
325 void PrintPPOutputPPCallbacks::InclusionDirective(
326  SourceLocation HashLoc,
327  const Token &IncludeTok,
328  StringRef FileName,
329  bool IsAngled,
330  CharSourceRange FilenameRange,
331  const FileEntry *File,
332  StringRef SearchPath,
333  StringRef RelativePath,
334  const Module *Imported,
335  SrcMgr::CharacteristicKind FileType) {
336  // In -dI mode, dump #include directives prior to dumping their content or
337  // interpretation.
338  if (DumpIncludeDirectives) {
339  startNewLineIfNeeded();
340  MoveToLine(HashLoc);
341  const std::string TokenText = PP.getSpelling(IncludeTok);
342  assert(!TokenText.empty());
343  OS << "#" << TokenText << " "
344  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
345  << " /* clang -E -dI */";
346  setEmittedDirectiveOnThisLine();
347  startNewLineIfNeeded();
348  }
349 
350  // When preprocessing, turn implicit imports into module import pragmas.
351  if (Imported) {
352  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
353  case tok::pp_include:
354  case tok::pp_import:
355  case tok::pp_include_next:
356  startNewLineIfNeeded();
357  MoveToLine(HashLoc);
358  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
359  << " /* clang -E: implicit import for "
360  << "#" << PP.getSpelling(IncludeTok) << " "
361  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
362  << " */";
363  // Since we want a newline after the pragma, but not a #<line>, start a
364  // new line immediately.
365  EmittedTokensOnThisLine = true;
366  startNewLineIfNeeded();
367  break;
368 
369  case tok::pp___include_macros:
370  // #__include_macros has no effect on a user of a preprocessed source
371  // file; the only effect is on preprocessing.
372  //
373  // FIXME: That's not *quite* true: it causes the module in question to
374  // be loaded, which can affect downstream diagnostics.
375  break;
376 
377  default:
378  llvm_unreachable("unknown include directive kind");
379  break;
380  }
381  }
382 }
383 
384 /// Handle entering the scope of a module during a module compilation.
385 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
386  startNewLineIfNeeded();
387  OS << "#pragma clang module begin " << M->getFullModuleName(true);
388  setEmittedDirectiveOnThisLine();
389 }
390 
391 /// Handle leaving the scope of a module during a module compilation.
392 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
393  startNewLineIfNeeded();
394  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
395  setEmittedDirectiveOnThisLine();
396 }
397 
398 /// Ident - Handle #ident directives when read by the preprocessor.
399 ///
400 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
401  MoveToLine(Loc);
402 
403  OS.write("#ident ", strlen("#ident "));
404  OS.write(S.begin(), S.size());
405  EmittedTokensOnThisLine = true;
406 }
407 
408 /// MacroDefined - This hook is called whenever a macro definition is seen.
409 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
410  const MacroDirective *MD) {
411  const MacroInfo *MI = MD->getMacroInfo();
412  // Only print out macro definitions in -dD mode.
413  if (!DumpDefines ||
414  // Ignore __FILE__ etc.
415  MI->isBuiltinMacro()) return;
416 
417  MoveToLine(MI->getDefinitionLoc());
418  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
419  setEmittedDirectiveOnThisLine();
420 }
421 
422 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
423  const MacroDefinition &MD,
424  const MacroDirective *Undef) {
425  // Only print out macro definitions in -dD mode.
426  if (!DumpDefines) return;
427 
428  MoveToLine(MacroNameTok.getLocation());
429  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
430  setEmittedDirectiveOnThisLine();
431 }
432 
433 static void outputPrintable(raw_ostream &OS, StringRef Str) {
434  for (unsigned char Char : Str) {
435  if (isPrintable(Char) && Char != '\\' && Char != '"')
436  OS << (char)Char;
437  else // Output anything hard as an octal escape.
438  OS << '\\'
439  << (char)('0' + ((Char >> 6) & 7))
440  << (char)('0' + ((Char >> 3) & 7))
441  << (char)('0' + ((Char >> 0) & 7));
442  }
443 }
444 
445 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
446  StringRef Namespace,
447  PragmaMessageKind Kind,
448  StringRef Str) {
449  startNewLineIfNeeded();
450  MoveToLine(Loc);
451  OS << "#pragma ";
452  if (!Namespace.empty())
453  OS << Namespace << ' ';
454  switch (Kind) {
455  case PMK_Message:
456  OS << "message(\"";
457  break;
458  case PMK_Warning:
459  OS << "warning \"";
460  break;
461  case PMK_Error:
462  OS << "error \"";
463  break;
464  }
465 
466  outputPrintable(OS, Str);
467  OS << '"';
468  if (Kind == PMK_Message)
469  OS << ')';
470  setEmittedDirectiveOnThisLine();
471 }
472 
473 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
474  StringRef DebugType) {
475  startNewLineIfNeeded();
476  MoveToLine(Loc);
477 
478  OS << "#pragma clang __debug ";
479  OS << DebugType;
480 
481  setEmittedDirectiveOnThisLine();
482 }
483 
484 void PrintPPOutputPPCallbacks::
485 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
486  startNewLineIfNeeded();
487  MoveToLine(Loc);
488  OS << "#pragma " << Namespace << " diagnostic push";
489  setEmittedDirectiveOnThisLine();
490 }
491 
492 void PrintPPOutputPPCallbacks::
493 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
494  startNewLineIfNeeded();
495  MoveToLine(Loc);
496  OS << "#pragma " << Namespace << " diagnostic pop";
497  setEmittedDirectiveOnThisLine();
498 }
499 
500 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
501  StringRef Namespace,
502  diag::Severity Map,
503  StringRef Str) {
504  startNewLineIfNeeded();
505  MoveToLine(Loc);
506  OS << "#pragma " << Namespace << " diagnostic ";
507  switch (Map) {
509  OS << "remark";
510  break;
512  OS << "warning";
513  break;
515  OS << "error";
516  break;
518  OS << "ignored";
519  break;
521  OS << "fatal";
522  break;
523  }
524  OS << " \"" << Str << '"';
525  setEmittedDirectiveOnThisLine();
526 }
527 
528 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
529  StringRef WarningSpec,
530  ArrayRef<int> Ids) {
531  startNewLineIfNeeded();
532  MoveToLine(Loc);
533  OS << "#pragma warning(" << WarningSpec << ':';
534  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
535  OS << ' ' << *I;
536  OS << ')';
537  setEmittedDirectiveOnThisLine();
538 }
539 
540 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
541  int Level) {
542  startNewLineIfNeeded();
543  MoveToLine(Loc);
544  OS << "#pragma warning(push";
545  if (Level >= 0)
546  OS << ", " << Level;
547  OS << ')';
548  setEmittedDirectiveOnThisLine();
549 }
550 
551 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
552  startNewLineIfNeeded();
553  MoveToLine(Loc);
554  OS << "#pragma warning(pop)";
555  setEmittedDirectiveOnThisLine();
556 }
557 
558 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
559  StringRef Str) {
560  startNewLineIfNeeded();
561  MoveToLine(Loc);
562  OS << "#pragma character_execution_set(push";
563  if (!Str.empty())
564  OS << ", " << Str;
565  OS << ')';
566  setEmittedDirectiveOnThisLine();
567 }
568 
569 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
570  startNewLineIfNeeded();
571  MoveToLine(Loc);
572  OS << "#pragma character_execution_set(pop)";
573  setEmittedDirectiveOnThisLine();
574 }
575 
576 void PrintPPOutputPPCallbacks::
577 PragmaAssumeNonNullBegin(SourceLocation Loc) {
578  startNewLineIfNeeded();
579  MoveToLine(Loc);
580  OS << "#pragma clang assume_nonnull begin";
581  setEmittedDirectiveOnThisLine();
582 }
583 
584 void PrintPPOutputPPCallbacks::
585 PragmaAssumeNonNullEnd(SourceLocation Loc) {
586  startNewLineIfNeeded();
587  MoveToLine(Loc);
588  OS << "#pragma clang assume_nonnull end";
589  setEmittedDirectiveOnThisLine();
590 }
591 
592 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
593 /// is called for the first token on each new line. If this really is the start
594 /// of a new logical line, handle it and return true, otherwise return false.
595 /// This may not be the start of a logical line because the "start of line"
596 /// marker is set for spelling lines, not expansion ones.
597 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
598  // Figure out what line we went to and insert the appropriate number of
599  // newline characters.
600  if (!MoveToLine(Tok.getLocation()))
601  return false;
602 
603  // Print out space characters so that the first token on a line is
604  // indented for easy reading.
605  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
606 
607  // The first token on a line can have a column number of 1, yet still expect
608  // leading white space, if a macro expansion in column 1 starts with an empty
609  // macro argument, or an empty nested macro expansion. In this case, move the
610  // token to column 2.
611  if (ColNo == 1 && Tok.hasLeadingSpace())
612  ColNo = 2;
613 
614  // This hack prevents stuff like:
615  // #define HASH #
616  // HASH define foo bar
617  // From having the # character end up at column 1, which makes it so it
618  // is not handled as a #define next time through the preprocessor if in
619  // -fpreprocessed mode.
620  if (ColNo <= 1 && Tok.is(tok::hash))
621  OS << ' ';
622 
623  // Otherwise, indent the appropriate number of spaces.
624  for (; ColNo > 1; --ColNo)
625  OS << ' ';
626 
627  return true;
628 }
629 
630 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
631  unsigned Len) {
632  unsigned NumNewlines = 0;
633  for (; Len; --Len, ++TokStr) {
634  if (*TokStr != '\n' &&
635  *TokStr != '\r')
636  continue;
637 
638  ++NumNewlines;
639 
640  // If we have \n\r or \r\n, skip both and count as one line.
641  if (Len != 1 &&
642  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
643  TokStr[0] != TokStr[1]) {
644  ++TokStr;
645  --Len;
646  }
647  }
648 
649  if (NumNewlines == 0) return;
650 
651  CurLine += NumNewlines;
652 }
653 
654 
655 namespace {
656 struct UnknownPragmaHandler : public PragmaHandler {
657  const char *Prefix;
658  PrintPPOutputPPCallbacks *Callbacks;
659 
660  // Set to true if tokens should be expanded
661  bool ShouldExpandTokens;
662 
663  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
664  bool RequireTokenExpansion)
665  : Prefix(prefix), Callbacks(callbacks),
666  ShouldExpandTokens(RequireTokenExpansion) {}
667  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
668  Token &PragmaTok) override {
669  // Figure out what line we went to and insert the appropriate number of
670  // newline characters.
671  Callbacks->startNewLineIfNeeded();
672  Callbacks->MoveToLine(PragmaTok.getLocation());
673  Callbacks->OS.write(Prefix, strlen(Prefix));
674 
675  if (ShouldExpandTokens) {
676  // The first token does not have expanded macros. Expand them, if
677  // required.
678  auto Toks = llvm::make_unique<Token[]>(1);
679  Toks[0] = PragmaTok;
680  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
681  /*DisableMacroExpansion=*/false);
682  PP.Lex(PragmaTok);
683  }
684  Token PrevToken;
685  Token PrevPrevToken;
686  PrevToken.startToken();
687  PrevPrevToken.startToken();
688 
689  // Read and print all of the pragma tokens.
690  while (PragmaTok.isNot(tok::eod)) {
691  if (PragmaTok.hasLeadingSpace() ||
692  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
693  Callbacks->OS << ' ';
694  std::string TokSpell = PP.getSpelling(PragmaTok);
695  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
696 
697  PrevPrevToken = PrevToken;
698  PrevToken = PragmaTok;
699 
700  if (ShouldExpandTokens)
701  PP.Lex(PragmaTok);
702  else
703  PP.LexUnexpandedToken(PragmaTok);
704  }
705  Callbacks->setEmittedDirectiveOnThisLine();
706  }
707 };
708 } // end anonymous namespace
709 
710 
712  PrintPPOutputPPCallbacks *Callbacks,
713  raw_ostream &OS) {
714  bool DropComments = PP.getLangOpts().TraditionalCPP &&
716 
717  char Buffer[256];
718  Token PrevPrevTok, PrevTok;
719  PrevPrevTok.startToken();
720  PrevTok.startToken();
721  while (1) {
722  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
723  Callbacks->startNewLineIfNeeded();
724  Callbacks->MoveToLine(Tok.getLocation());
725  }
726 
727  // If this token is at the start of a line, emit newlines if needed.
728  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
729  // done.
730  } else if (Tok.hasLeadingSpace() ||
731  // If we haven't emitted a token on this line yet, PrevTok isn't
732  // useful to look at and no concatenation could happen anyway.
733  (Callbacks->hasEmittedTokensOnThisLine() &&
734  // Don't print "-" next to "-", it would form "--".
735  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
736  OS << ' ';
737  }
738 
739  if (DropComments && Tok.is(tok::comment)) {
740  // Skip comments. Normally the preprocessor does not generate
741  // tok::comment nodes at all when not keeping comments, but under
742  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
743  SourceLocation StartLoc = Tok.getLocation();
744  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
745  } else if (Tok.is(tok::eod)) {
746  // Don't print end of directive tokens, since they are typically newlines
747  // that mess up our line tracking. These come from unknown pre-processor
748  // directives or hash-prefixed comments in standalone assembly files.
749  PP.Lex(Tok);
750  continue;
751  } else if (Tok.is(tok::annot_module_include)) {
752  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
753  // appropriate output here. Ignore this token entirely.
754  PP.Lex(Tok);
755  continue;
756  } else if (Tok.is(tok::annot_module_begin)) {
757  // FIXME: We retrieve this token after the FileChanged callback, and
758  // retrieve the module_end token before the FileChanged callback, so
759  // we render this within the file and render the module end outside the
760  // file, but this is backwards from the token locations: the module_begin
761  // token is at the include location (outside the file) and the module_end
762  // token is at the EOF location (within the file).
763  Callbacks->BeginModule(
764  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
765  PP.Lex(Tok);
766  continue;
767  } else if (Tok.is(tok::annot_module_end)) {
768  Callbacks->EndModule(
769  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
770  PP.Lex(Tok);
771  continue;
772  } else if (Tok.is(tok::annot_header_unit)) {
773  // This is a header-name that has been (effectively) converted into a
774  // module-name.
775  // FIXME: The module name could contain non-identifier module name
776  // components. We don't have a good way to round-trip those.
777  Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
778  std::string Name = M->getFullModuleName();
779  OS.write(Name.data(), Name.size());
780  Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
781  } else if (Tok.isAnnotation()) {
782  // Ignore annotation tokens created by pragmas - the pragmas themselves
783  // will be reproduced in the preprocessed output.
784  PP.Lex(Tok);
785  continue;
786  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
787  OS << II->getName();
788  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
789  Tok.getLiteralData()) {
790  OS.write(Tok.getLiteralData(), Tok.getLength());
791  } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
792  const char *TokPtr = Buffer;
793  unsigned Len = PP.getSpelling(Tok, TokPtr);
794  OS.write(TokPtr, Len);
795 
796  // Tokens that can contain embedded newlines need to adjust our current
797  // line number.
798  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
799  Callbacks->HandleNewlinesInToken(TokPtr, Len);
800  } else {
801  std::string S = PP.getSpelling(Tok);
802  OS.write(S.data(), S.size());
803 
804  // Tokens that can contain embedded newlines need to adjust our current
805  // line number.
806  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
807  Callbacks->HandleNewlinesInToken(S.data(), S.size());
808  }
809  Callbacks->setEmittedTokensOnThisLine();
810 
811  if (Tok.is(tok::eof)) break;
812 
813  PrevPrevTok = PrevTok;
814  PrevTok = Tok;
815  PP.Lex(Tok);
816  }
817 }
818 
819 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
820 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
821  return LHS->first->getName().compare(RHS->first->getName());
822 }
823 
824 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
825  // Ignore unknown pragmas.
826  PP.IgnorePragmas();
827 
828  // -dM mode just scans and ignores all tokens in the files, then dumps out
829  // the macro table at the end.
830  PP.EnterMainSourceFile();
831 
832  Token Tok;
833  do PP.Lex(Tok);
834  while (Tok.isNot(tok::eof));
835 
837  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
838  I != E; ++I) {
839  auto *MD = I->second.getLatest();
840  if (MD && MD->isDefined())
841  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
842  }
843  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
844 
845  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
846  MacroInfo &MI = *MacrosByID[i].second;
847  // Ignore computed macros like __LINE__ and friends.
848  if (MI.isBuiltinMacro()) continue;
849 
850  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
851  *OS << '\n';
852  }
853 }
854 
855 /// DoPrintPreprocessedInput - This implements -E mode.
856 ///
858  const PreprocessorOutputOptions &Opts) {
859  // Show macros with no output is handled specially.
860  if (!Opts.ShowCPP) {
861  assert(Opts.ShowMacros && "Not yet implemented!");
862  DoPrintMacros(PP, OS);
863  return;
864  }
865 
866  // Inform the preprocessor whether we want it to retain comments or not, due
867  // to -C or -CC.
869 
870  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
871  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
873 
874  // Expand macros in pragmas with -fms-extensions. The assumption is that
875  // the majority of pragmas in such a file will be Microsoft pragmas.
876  // Remember the handlers we will add so that we can remove them later.
877  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
878  new UnknownPragmaHandler(
879  "#pragma", Callbacks,
880  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
881 
882  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
883  "#pragma GCC", Callbacks,
884  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
885 
886  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
887  "#pragma clang", Callbacks,
888  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
889 
890  PP.AddPragmaHandler(MicrosoftExtHandler.get());
891  PP.AddPragmaHandler("GCC", GCCHandler.get());
892  PP.AddPragmaHandler("clang", ClangHandler.get());
893 
894  // The tokens after pragma omp need to be expanded.
895  //
896  // OpenMP [2.1, Directive format]
897  // Preprocessing tokens following the #pragma omp are subject to macro
898  // replacement.
899  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
900  new UnknownPragmaHandler("#pragma omp", Callbacks,
901  /*RequireTokenExpansion=*/true));
902  PP.AddPragmaHandler("omp", OpenMPHandler.get());
903 
904  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
905 
906  // After we have configured the preprocessor, enter the main file.
907  PP.EnterMainSourceFile();
908 
909  // Consume all of the tokens that come from the predefines buffer. Those
910  // should not be emitted into the output and are guaranteed to be at the
911  // start.
912  const SourceManager &SourceMgr = PP.getSourceManager();
913  Token Tok;
914  do {
915  PP.Lex(Tok);
916  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
917  break;
918 
919  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
920  if (PLoc.isInvalid())
921  break;
922 
923  if (strcmp(PLoc.getFilename(), "<built-in>"))
924  break;
925  } while (true);
926 
927  // Read all the preprocessed tokens, printing them out to the stream.
928  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
929  *OS << '\n';
930 
931  // Remove the handlers we just added to leave the preprocessor in a sane state
932  // so that it can be reused (for example by a clang::Parser instance).
933  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
934  PP.RemovePragmaHandler("GCC", GCCHandler.get());
935  PP.RemovePragmaHandler("clang", ClangHandler.get());
936  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
937 }
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:180
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:874
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:564
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1893
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:79
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:77
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:35
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:117
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:242
tok::TokenKind getKind() const
Definition: Token.h:89
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:934
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:900
Describes a module or submodule.
Definition: Module.h:64
param_iterator param_end() const
Definition: MacroInfo.h:181
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:213
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
const FormatToken & Tok
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
Defines the Diagnostic-related interfaces.
Present this diagnostic as an error.
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:31
macro_iterator macro_end(bool IncludeExternalMacros=true) const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:178
return Out str()
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
Represents a character-granular source range.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
static void outputPrintable(raw_ostream &OS, StringRef Str)
unsigned getLine() const
Return the presumed line number of this location.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:123
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:905
bool getCommentRetentionState() const
Definition: Preprocessor.h:939
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
const SourceManager & SM
Definition: Format.cpp:1568
SourceManager & getSourceManager() const
Definition: Preprocessor.h:904
const char * getFilename() const
Return the presumed filename of this location.
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:290
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
Encodes a location in the source.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:265
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:139
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:390
bool param_empty() const
Definition: MacroInfo.h:179
StringRef getName() const
Return the actual identifier string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isFunctionLike() const
Definition: MacroInfo.h:199
Present this diagnostic as a remark.
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:58
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
unsigned getLength() const
Definition: Token.h:126
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:214
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:123
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const
AvoidConcat - If printing PrevTok immediately followed by Tok would cause the two individual tokens t...
bool tokens_empty() const
Definition: MacroInfo.h:244
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:245
Do not present this diagnostic, ignore it.
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:215
unsigned ShowLineMarkers
Show #line markers.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:282
bool isGNUVarargs() const
Definition: MacroInfo.h:206
MacroMap::const_iterator macro_iterator
unsigned ShowCPP
Print normal preprocessed output.
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:580
Present this diagnostic as a fatal error.
Present this diagnostic as a warning.
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
void * getAnnotationValue() const
Definition: Token.h:223
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:269
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:992
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:168
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:123