clang  9.0.0svn
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cstdio>
30 using namespace clang;
31 
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35  Preprocessor &PP, raw_ostream &OS) {
36  OS << "#define " << II.getName();
37 
38  if (MI.isFunctionLike()) {
39  OS << '(';
40  if (!MI.param_empty()) {
42  for (; AI+1 != E; ++AI) {
43  OS << (*AI)->getName();
44  OS << ',';
45  }
46 
47  // Last argument.
48  if ((*AI)->getName() == "__VA_ARGS__")
49  OS << "...";
50  else
51  OS << (*AI)->getName();
52  }
53 
54  if (MI.isGNUVarargs())
55  OS << "..."; // #define foo(x...)
56 
57  OS << ')';
58  }
59 
60  // GCC always emits a space, even if the macro body is empty. However, do not
61  // want to emit two spaces if the first token has a leading space.
62  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63  OS << ' ';
64 
65  SmallString<128> SpellingBuffer;
66  for (const auto &T : MI.tokens()) {
67  if (T.hasLeadingSpace())
68  OS << ' ';
69 
70  OS << PP.getSpelling(T, SpellingBuffer);
71  }
72 }
73 
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
77 
78 namespace {
79 class PrintPPOutputPPCallbacks : public PPCallbacks {
80  Preprocessor &PP;
82  TokenConcatenation ConcatInfo;
83 public:
84  raw_ostream &OS;
85 private:
86  unsigned CurLine;
87 
88  bool EmittedTokensOnThisLine;
89  bool EmittedDirectiveOnThisLine;
91  SmallString<512> CurFilename;
92  bool Initialized;
93  bool DisableLineMarkers;
94  bool DumpDefines;
95  bool DumpIncludeDirectives;
96  bool UseLineDirectives;
97  bool IsFirstFileEntered;
98 public:
99  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
100  bool defines, bool DumpIncludeDirectives,
101  bool UseLineDirectives)
102  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
103  DisableLineMarkers(lineMarkers), DumpDefines(defines),
104  DumpIncludeDirectives(DumpIncludeDirectives),
105  UseLineDirectives(UseLineDirectives) {
106  CurLine = 0;
107  CurFilename += "<uninit>";
108  EmittedTokensOnThisLine = false;
109  EmittedDirectiveOnThisLine = false;
110  FileType = SrcMgr::C_User;
111  Initialized = false;
112  IsFirstFileEntered = false;
113  }
114 
115  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
116  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
117 
118  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
119  bool hasEmittedDirectiveOnThisLine() const {
120  return EmittedDirectiveOnThisLine;
121  }
122 
123  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
124 
125  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
127  FileID PrevFID) override;
128  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
129  StringRef FileName, bool IsAngled,
130  CharSourceRange FilenameRange, const FileEntry *File,
131  StringRef SearchPath, StringRef RelativePath,
132  const Module *Imported,
133  SrcMgr::CharacteristicKind FileType) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136  PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141  diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143  ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146  void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
147  void PragmaExecCharsetPop(SourceLocation Loc) override;
148  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
149  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
150 
151  bool HandleFirstTokOnLine(Token &Tok);
152 
153  /// Move to the line of the provided source location. This will
154  /// return true if the output stream required adjustment or if
155  /// the requested location is on the first line.
156  bool MoveToLine(SourceLocation Loc) {
157  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
158  if (PLoc.isInvalid())
159  return false;
160  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
161  }
162  bool MoveToLine(unsigned LineNo);
163 
164  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
165  const Token &Tok) {
166  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
167  }
168  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
169  unsigned ExtraLen=0);
170  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
171  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
172 
173  /// MacroDefined - This hook is called whenever a macro definition is seen.
174  void MacroDefined(const Token &MacroNameTok,
175  const MacroDirective *MD) override;
176 
177  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
178  void MacroUndefined(const Token &MacroNameTok,
179  const MacroDefinition &MD,
180  const MacroDirective *Undef) override;
181 
182  void BeginModule(const Module *M);
183  void EndModule(const Module *M);
184 };
185 } // end anonymous namespace
186 
187 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
188  const char *Extra,
189  unsigned ExtraLen) {
190  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
191 
192  // Emit #line directives or GNU line markers depending on what mode we're in.
193  if (UseLineDirectives) {
194  OS << "#line" << ' ' << LineNo << ' ' << '"';
195  OS.write_escaped(CurFilename);
196  OS << '"';
197  } else {
198  OS << '#' << ' ' << LineNo << ' ' << '"';
199  OS.write_escaped(CurFilename);
200  OS << '"';
201 
202  if (ExtraLen)
203  OS.write(Extra, ExtraLen);
204 
205  if (FileType == SrcMgr::C_System)
206  OS.write(" 3", 2);
207  else if (FileType == SrcMgr::C_ExternCSystem)
208  OS.write(" 3 4", 4);
209  }
210  OS << '\n';
211 }
212 
213 /// MoveToLine - Move the output to the source line specified by the location
214 /// object. We can do this by emitting some number of \n's, or be emitting a
215 /// #line directive. This returns false if already at the specified line, true
216 /// if some newlines were emitted.
217 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
218  // If this line is "close enough" to the original line, just print newlines,
219  // otherwise print a #line directive.
220  if (LineNo-CurLine <= 8) {
221  if (LineNo-CurLine == 1)
222  OS << '\n';
223  else if (LineNo == CurLine)
224  return false; // Spelling line moved, but expansion line didn't.
225  else {
226  const char *NewLines = "\n\n\n\n\n\n\n\n";
227  OS.write(NewLines, LineNo-CurLine);
228  }
229  } else if (!DisableLineMarkers) {
230  // Emit a #line or line marker.
231  WriteLineInfo(LineNo, nullptr, 0);
232  } else {
233  // Okay, we're in -P mode, which turns off line markers. However, we still
234  // need to emit a newline between tokens on different lines.
235  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
236  }
237 
238  CurLine = LineNo;
239  return true;
240 }
241 
242 bool
243 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
244  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
245  OS << '\n';
246  EmittedTokensOnThisLine = false;
247  EmittedDirectiveOnThisLine = false;
248  if (ShouldUpdateCurrentLine)
249  ++CurLine;
250  return true;
251  }
252 
253  return false;
254 }
255 
256 /// FileChanged - Whenever the preprocessor enters or exits a #include file
257 /// it invokes this handler. Update our conception of the current source
258 /// position.
259 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
260  FileChangeReason Reason,
261  SrcMgr::CharacteristicKind NewFileType,
262  FileID PrevFID) {
263  // Unless we are exiting a #include, make sure to skip ahead to the line the
264  // #include directive was at.
265  SourceManager &SourceMgr = SM;
266 
267  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
268  if (UserLoc.isInvalid())
269  return;
270 
271  unsigned NewLine = UserLoc.getLine();
272 
273  if (Reason == PPCallbacks::EnterFile) {
274  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
275  if (IncludeLoc.isValid())
276  MoveToLine(IncludeLoc);
277  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
278  // GCC emits the # directive for this directive on the line AFTER the
279  // directive and emits a bunch of spaces that aren't needed. This is because
280  // otherwise we will emit a line marker for THIS line, which requires an
281  // extra blank line after the directive to avoid making all following lines
282  // off by one. We can do better by simply incrementing NewLine here.
283  NewLine += 1;
284  }
285 
286  CurLine = NewLine;
287 
288  CurFilename.clear();
289  CurFilename += UserLoc.getFilename();
290  FileType = NewFileType;
291 
292  if (DisableLineMarkers) {
293  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
294  return;
295  }
296 
297  if (!Initialized) {
298  WriteLineInfo(CurLine);
299  Initialized = true;
300  }
301 
302  // Do not emit an enter marker for the main file (which we expect is the first
303  // entered file). This matches gcc, and improves compatibility with some tools
304  // which track the # line markers as a way to determine when the preprocessed
305  // output is in the context of the main file.
306  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
307  IsFirstFileEntered = true;
308  return;
309  }
310 
311  switch (Reason) {
313  WriteLineInfo(CurLine, " 1", 2);
314  break;
316  WriteLineInfo(CurLine, " 2", 2);
317  break;
320  WriteLineInfo(CurLine);
321  break;
322  }
323 }
324 
325 void PrintPPOutputPPCallbacks::InclusionDirective(
326  SourceLocation HashLoc,
327  const Token &IncludeTok,
328  StringRef FileName,
329  bool IsAngled,
330  CharSourceRange FilenameRange,
331  const FileEntry *File,
332  StringRef SearchPath,
333  StringRef RelativePath,
334  const Module *Imported,
335  SrcMgr::CharacteristicKind FileType) {
336  // In -dI mode, dump #include directives prior to dumping their content or
337  // interpretation.
338  if (DumpIncludeDirectives) {
339  startNewLineIfNeeded();
340  MoveToLine(HashLoc);
341  const std::string TokenText = PP.getSpelling(IncludeTok);
342  assert(!TokenText.empty());
343  OS << "#" << TokenText << " "
344  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
345  << " /* clang -E -dI */";
346  setEmittedDirectiveOnThisLine();
347  startNewLineIfNeeded();
348  }
349 
350  // When preprocessing, turn implicit imports into module import pragmas.
351  if (Imported) {
352  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
353  case tok::pp_include:
354  case tok::pp_import:
356  startNewLineIfNeeded();
357  MoveToLine(HashLoc);
358  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
359  << " /* clang -E: implicit import for "
360  << "#" << PP.getSpelling(IncludeTok) << " "
361  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
362  << " */";
363  // Since we want a newline after the pragma, but not a #<line>, start a
364  // new line immediately.
365  EmittedTokensOnThisLine = true;
366  startNewLineIfNeeded();
367  break;
368 
370  // #__include_macros has no effect on a user of a preprocessed source
371  // file; the only effect is on preprocessing.
372  //
373  // FIXME: That's not *quite* true: it causes the module in question to
374  // be loaded, which can affect downstream diagnostics.
375  break;
376 
377  default:
378  llvm_unreachable("unknown include directive kind");
379  break;
380  }
381  }
382 }
383 
384 /// Handle entering the scope of a module during a module compilation.
385 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
386  startNewLineIfNeeded();
387  OS << "#pragma clang module begin " << M->getFullModuleName(true);
388  setEmittedDirectiveOnThisLine();
389 }
390 
391 /// Handle leaving the scope of a module during a module compilation.
392 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
393  startNewLineIfNeeded();
394  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
395  setEmittedDirectiveOnThisLine();
396 }
397 
398 /// Ident - Handle #ident directives when read by the preprocessor.
399 ///
400 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
401  MoveToLine(Loc);
402 
403  OS.write("#ident ", strlen("#ident "));
404  OS.write(S.begin(), S.size());
405  EmittedTokensOnThisLine = true;
406 }
407 
408 /// MacroDefined - This hook is called whenever a macro definition is seen.
409 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
410  const MacroDirective *MD) {
411  const MacroInfo *MI = MD->getMacroInfo();
412  // Only print out macro definitions in -dD mode.
413  if (!DumpDefines ||
414  // Ignore __FILE__ etc.
415  MI->isBuiltinMacro()) return;
416 
417  MoveToLine(MI->getDefinitionLoc());
418  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
419  setEmittedDirectiveOnThisLine();
420 }
421 
422 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
423  const MacroDefinition &MD,
424  const MacroDirective *Undef) {
425  // Only print out macro definitions in -dD mode.
426  if (!DumpDefines) return;
427 
428  MoveToLine(MacroNameTok.getLocation());
429  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
430  setEmittedDirectiveOnThisLine();
431 }
432 
433 static void outputPrintable(raw_ostream &OS, StringRef Str) {
434  for (unsigned char Char : Str) {
435  if (isPrintable(Char) && Char != '\\' && Char != '"')
436  OS << (char)Char;
437  else // Output anything hard as an octal escape.
438  OS << '\\'
439  << (char)('0' + ((Char >> 6) & 7))
440  << (char)('0' + ((Char >> 3) & 7))
441  << (char)('0' + ((Char >> 0) & 7));
442  }
443 }
444 
445 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
446  StringRef Namespace,
447  PragmaMessageKind Kind,
448  StringRef Str) {
449  startNewLineIfNeeded();
450  MoveToLine(Loc);
451  OS << "#pragma ";
452  if (!Namespace.empty())
453  OS << Namespace << ' ';
454  switch (Kind) {
455  case PMK_Message:
456  OS << "message(\"";
457  break;
458  case PMK_Warning:
459  OS << "warning \"";
460  break;
461  case PMK_Error:
462  OS << "error \"";
463  break;
464  }
465 
466  outputPrintable(OS, Str);
467  OS << '"';
468  if (Kind == PMK_Message)
469  OS << ')';
470  setEmittedDirectiveOnThisLine();
471 }
472 
473 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
474  StringRef DebugType) {
475  startNewLineIfNeeded();
476  MoveToLine(Loc);
477 
478  OS << "#pragma clang __debug ";
479  OS << DebugType;
480 
481  setEmittedDirectiveOnThisLine();
482 }
483 
484 void PrintPPOutputPPCallbacks::
485 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
486  startNewLineIfNeeded();
487  MoveToLine(Loc);
488  OS << "#pragma " << Namespace << " diagnostic push";
489  setEmittedDirectiveOnThisLine();
490 }
491 
492 void PrintPPOutputPPCallbacks::
493 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
494  startNewLineIfNeeded();
495  MoveToLine(Loc);
496  OS << "#pragma " << Namespace << " diagnostic pop";
497  setEmittedDirectiveOnThisLine();
498 }
499 
500 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
501  StringRef Namespace,
502  diag::Severity Map,
503  StringRef Str) {
504  startNewLineIfNeeded();
505  MoveToLine(Loc);
506  OS << "#pragma " << Namespace << " diagnostic ";
507  switch (Map) {
509  OS << "remark";
510  break;
512  OS << "warning";
513  break;
515  OS << "error";
516  break;
518  OS << "ignored";
519  break;
521  OS << "fatal";
522  break;
523  }
524  OS << " \"" << Str << '"';
525  setEmittedDirectiveOnThisLine();
526 }
527 
528 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
529  StringRef WarningSpec,
530  ArrayRef<int> Ids) {
531  startNewLineIfNeeded();
532  MoveToLine(Loc);
533  OS << "#pragma warning(" << WarningSpec << ':';
534  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
535  OS << ' ' << *I;
536  OS << ')';
537  setEmittedDirectiveOnThisLine();
538 }
539 
540 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
541  int Level) {
542  startNewLineIfNeeded();
543  MoveToLine(Loc);
544  OS << "#pragma warning(push";
545  if (Level >= 0)
546  OS << ", " << Level;
547  OS << ')';
548  setEmittedDirectiveOnThisLine();
549 }
550 
551 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
552  startNewLineIfNeeded();
553  MoveToLine(Loc);
554  OS << "#pragma warning(pop)";
555  setEmittedDirectiveOnThisLine();
556 }
557 
558 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
559  StringRef Str) {
560  startNewLineIfNeeded();
561  MoveToLine(Loc);
562  OS << "#pragma character_execution_set(push";
563  if (!Str.empty())
564  OS << ", " << Str;
565  OS << ')';
566  setEmittedDirectiveOnThisLine();
567 }
568 
569 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
570  startNewLineIfNeeded();
571  MoveToLine(Loc);
572  OS << "#pragma character_execution_set(pop)";
573  setEmittedDirectiveOnThisLine();
574 }
575 
576 void PrintPPOutputPPCallbacks::
577 PragmaAssumeNonNullBegin(SourceLocation Loc) {
578  startNewLineIfNeeded();
579  MoveToLine(Loc);
580  OS << "#pragma clang assume_nonnull begin";
581  setEmittedDirectiveOnThisLine();
582 }
583 
584 void PrintPPOutputPPCallbacks::
585 PragmaAssumeNonNullEnd(SourceLocation Loc) {
586  startNewLineIfNeeded();
587  MoveToLine(Loc);
588  OS << "#pragma clang assume_nonnull end";
589  setEmittedDirectiveOnThisLine();
590 }
591 
592 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
593 /// is called for the first token on each new line. If this really is the start
594 /// of a new logical line, handle it and return true, otherwise return false.
595 /// This may not be the start of a logical line because the "start of line"
596 /// marker is set for spelling lines, not expansion ones.
597 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
598  // Figure out what line we went to and insert the appropriate number of
599  // newline characters.
600  if (!MoveToLine(Tok.getLocation()))
601  return false;
602 
603  // Print out space characters so that the first token on a line is
604  // indented for easy reading.
605  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
606 
607  // The first token on a line can have a column number of 1, yet still expect
608  // leading white space, if a macro expansion in column 1 starts with an empty
609  // macro argument, or an empty nested macro expansion. In this case, move the
610  // token to column 2.
611  if (ColNo == 1 && Tok.hasLeadingSpace())
612  ColNo = 2;
613 
614  // This hack prevents stuff like:
615  // #define HASH #
616  // HASH define foo bar
617  // From having the # character end up at column 1, which makes it so it
618  // is not handled as a #define next time through the preprocessor if in
619  // -fpreprocessed mode.
620  if (ColNo <= 1 && Tok.is(tok::hash))
621  OS << ' ';
622 
623  // Otherwise, indent the appropriate number of spaces.
624  for (; ColNo > 1; --ColNo)
625  OS << ' ';
626 
627  return true;
628 }
629 
630 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
631  unsigned Len) {
632  unsigned NumNewlines = 0;
633  for (; Len; --Len, ++TokStr) {
634  if (*TokStr != '\n' &&
635  *TokStr != '\r')
636  continue;
637 
638  ++NumNewlines;
639 
640  // If we have \n\r or \r\n, skip both and count as one line.
641  if (Len != 1 &&
642  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
643  TokStr[0] != TokStr[1]) {
644  ++TokStr;
645  --Len;
646  }
647  }
648 
649  if (NumNewlines == 0) return;
650 
651  CurLine += NumNewlines;
652 }
653 
654 
655 namespace {
656 struct UnknownPragmaHandler : public PragmaHandler {
657  const char *Prefix;
658  PrintPPOutputPPCallbacks *Callbacks;
659 
660  // Set to true if tokens should be expanded
661  bool ShouldExpandTokens;
662 
663  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
664  bool RequireTokenExpansion)
665  : Prefix(prefix), Callbacks(callbacks),
666  ShouldExpandTokens(RequireTokenExpansion) {}
667  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
668  Token &PragmaTok) override {
669  // Figure out what line we went to and insert the appropriate number of
670  // newline characters.
671  Callbacks->startNewLineIfNeeded();
672  Callbacks->MoveToLine(PragmaTok.getLocation());
673  Callbacks->OS.write(Prefix, strlen(Prefix));
674 
675  if (ShouldExpandTokens) {
676  // The first token does not have expanded macros. Expand them, if
677  // required.
678  auto Toks = llvm::make_unique<Token[]>(1);
679  Toks[0] = PragmaTok;
680  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
681  /*DisableMacroExpansion=*/false,
682  /*IsReinject=*/false);
683  PP.Lex(PragmaTok);
684  }
685  Token PrevToken;
686  Token PrevPrevToken;
687  PrevToken.startToken();
688  PrevPrevToken.startToken();
689 
690  // Read and print all of the pragma tokens.
691  while (PragmaTok.isNot(tok::eod)) {
692  if (PragmaTok.hasLeadingSpace() ||
693  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
694  Callbacks->OS << ' ';
695  std::string TokSpell = PP.getSpelling(PragmaTok);
696  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
697 
698  PrevPrevToken = PrevToken;
699  PrevToken = PragmaTok;
700 
701  if (ShouldExpandTokens)
702  PP.Lex(PragmaTok);
703  else
704  PP.LexUnexpandedToken(PragmaTok);
705  }
706  Callbacks->setEmittedDirectiveOnThisLine();
707  }
708 };
709 } // end anonymous namespace
710 
711 
713  PrintPPOutputPPCallbacks *Callbacks,
714  raw_ostream &OS) {
715  bool DropComments = PP.getLangOpts().TraditionalCPP &&
717 
718  char Buffer[256];
719  Token PrevPrevTok, PrevTok;
720  PrevPrevTok.startToken();
721  PrevTok.startToken();
722  while (1) {
723  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
724  Callbacks->startNewLineIfNeeded();
725  Callbacks->MoveToLine(Tok.getLocation());
726  }
727 
728  // If this token is at the start of a line, emit newlines if needed.
729  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
730  // done.
731  } else if (Tok.hasLeadingSpace() ||
732  // If we haven't emitted a token on this line yet, PrevTok isn't
733  // useful to look at and no concatenation could happen anyway.
734  (Callbacks->hasEmittedTokensOnThisLine() &&
735  // Don't print "-" next to "-", it would form "--".
736  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
737  OS << ' ';
738  }
739 
740  if (DropComments && Tok.is(tok::comment)) {
741  // Skip comments. Normally the preprocessor does not generate
742  // tok::comment nodes at all when not keeping comments, but under
743  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
744  SourceLocation StartLoc = Tok.getLocation();
745  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
746  } else if (Tok.is(tok::eod)) {
747  // Don't print end of directive tokens, since they are typically newlines
748  // that mess up our line tracking. These come from unknown pre-processor
749  // directives or hash-prefixed comments in standalone assembly files.
750  PP.Lex(Tok);
751  continue;
752  } else if (Tok.is(tok::annot_module_include)) {
753  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
754  // appropriate output here. Ignore this token entirely.
755  PP.Lex(Tok);
756  continue;
757  } else if (Tok.is(tok::annot_module_begin)) {
758  // FIXME: We retrieve this token after the FileChanged callback, and
759  // retrieve the module_end token before the FileChanged callback, so
760  // we render this within the file and render the module end outside the
761  // file, but this is backwards from the token locations: the module_begin
762  // token is at the include location (outside the file) and the module_end
763  // token is at the EOF location (within the file).
764  Callbacks->BeginModule(
765  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
766  PP.Lex(Tok);
767  continue;
768  } else if (Tok.is(tok::annot_module_end)) {
769  Callbacks->EndModule(
770  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
771  PP.Lex(Tok);
772  continue;
773  } else if (Tok.is(tok::annot_header_unit)) {
774  // This is a header-name that has been (effectively) converted into a
775  // module-name.
776  // FIXME: The module name could contain non-identifier module name
777  // components. We don't have a good way to round-trip those.
778  Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
779  std::string Name = M->getFullModuleName();
780  OS.write(Name.data(), Name.size());
781  Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
782  } else if (Tok.isAnnotation()) {
783  // Ignore annotation tokens created by pragmas - the pragmas themselves
784  // will be reproduced in the preprocessed output.
785  PP.Lex(Tok);
786  continue;
787  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
788  OS << II->getName();
789  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
790  Tok.getLiteralData()) {
791  OS.write(Tok.getLiteralData(), Tok.getLength());
792  } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
793  const char *TokPtr = Buffer;
794  unsigned Len = PP.getSpelling(Tok, TokPtr);
795  OS.write(TokPtr, Len);
796 
797  // Tokens that can contain embedded newlines need to adjust our current
798  // line number.
799  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
800  Callbacks->HandleNewlinesInToken(TokPtr, Len);
801  } else {
802  std::string S = PP.getSpelling(Tok);
803  OS.write(S.data(), S.size());
804 
805  // Tokens that can contain embedded newlines need to adjust our current
806  // line number.
807  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
808  Callbacks->HandleNewlinesInToken(S.data(), S.size());
809  }
810  Callbacks->setEmittedTokensOnThisLine();
811 
812  if (Tok.is(tok::eof)) break;
813 
814  PrevPrevTok = PrevTok;
815  PrevTok = Tok;
816  PP.Lex(Tok);
817  }
818 }
819 
820 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
821 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
822  return LHS->first->getName().compare(RHS->first->getName());
823 }
824 
825 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
826  // Ignore unknown pragmas.
827  PP.IgnorePragmas();
828 
829  // -dM mode just scans and ignores all tokens in the files, then dumps out
830  // the macro table at the end.
831  PP.EnterMainSourceFile();
832 
833  Token Tok;
834  do PP.Lex(Tok);
835  while (Tok.isNot(tok::eof));
836 
838  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
839  I != E; ++I) {
840  auto *MD = I->second.getLatest();
841  if (MD && MD->isDefined())
842  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
843  }
844  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
845 
846  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
847  MacroInfo &MI = *MacrosByID[i].second;
848  // Ignore computed macros like __LINE__ and friends.
849  if (MI.isBuiltinMacro()) continue;
850 
851  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
852  *OS << '\n';
853  }
854 }
855 
856 /// DoPrintPreprocessedInput - This implements -E mode.
857 ///
859  const PreprocessorOutputOptions &Opts) {
860  // Show macros with no output is handled specially.
861  if (!Opts.ShowCPP) {
862  assert(Opts.ShowMacros && "Not yet implemented!");
863  DoPrintMacros(PP, OS);
864  return;
865  }
866 
867  // Inform the preprocessor whether we want it to retain comments or not, due
868  // to -C or -CC.
870 
871  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
872  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
874 
875  // Expand macros in pragmas with -fms-extensions. The assumption is that
876  // the majority of pragmas in such a file will be Microsoft pragmas.
877  // Remember the handlers we will add so that we can remove them later.
878  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
879  new UnknownPragmaHandler(
880  "#pragma", Callbacks,
881  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
882 
883  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
884  "#pragma GCC", Callbacks,
885  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
886 
887  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
888  "#pragma clang", Callbacks,
889  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
890 
891  PP.AddPragmaHandler(MicrosoftExtHandler.get());
892  PP.AddPragmaHandler("GCC", GCCHandler.get());
893  PP.AddPragmaHandler("clang", ClangHandler.get());
894 
895  // The tokens after pragma omp need to be expanded.
896  //
897  // OpenMP [2.1, Directive format]
898  // Preprocessing tokens following the #pragma omp are subject to macro
899  // replacement.
900  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
901  new UnknownPragmaHandler("#pragma omp", Callbacks,
902  /*RequireTokenExpansion=*/true));
903  PP.AddPragmaHandler("omp", OpenMPHandler.get());
904 
905  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
906 
907  // After we have configured the preprocessor, enter the main file.
908  PP.EnterMainSourceFile();
909 
910  // Consume all of the tokens that come from the predefines buffer. Those
911  // should not be emitted into the output and are guaranteed to be at the
912  // start.
913  const SourceManager &SourceMgr = PP.getSourceManager();
914  Token Tok;
915  do {
916  PP.Lex(Tok);
917  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
918  break;
919 
920  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
921  if (PLoc.isInvalid())
922  break;
923 
924  if (strcmp(PLoc.getFilename(), "<built-in>"))
925  break;
926  } while (true);
927 
928  // Read all the preprocessed tokens, printing them out to the stream.
929  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
930  *OS << '\n';
931 
932  // Remove the handlers we just added to leave the preprocessor in a sane state
933  // so that it can be reused (for example by a clang::Parser instance).
934  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
935  PP.RemovePragmaHandler("GCC", GCCHandler.get());
936  PP.RemovePragmaHandler("clang", ClangHandler.get());
937  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
938 }
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:180
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:874
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:97
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:564
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1893
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:79
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:77
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:35
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:120
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:242
long i
Definition: xmmintrin.h:1456
tok::TokenKind getKind() const
Definition: Token.h:92
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:937
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:903
Describes a module or submodule.
Definition: Module.h:64
param_iterator param_end() const
Definition: MacroInfo.h:181
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:213
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
const FormatToken & Tok
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
Defines the Diagnostic-related interfaces.
Present this diagnostic as an error.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:178
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
Represents a character-granular source range.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
static void outputPrintable(raw_ostream &OS, StringRef Str)
unsigned getLine() const
Return the presumed line number of this location.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:126
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:905
bool getCommentRetentionState() const
Definition: Preprocessor.h:942
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
const SourceManager & SM
Definition: Format.cpp:1570
SourceManager & getSourceManager() const
Definition: Preprocessor.h:907
const char * getFilename() const
Return the presumed filename of this location.
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:290
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
Encodes a location in the source.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:268
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:139
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:390
bool param_empty() const
Definition: MacroInfo.h:179
StringRef getName() const
Return the actual identifier string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isNot(tok::TokenKind K) const
Definition: Token.h:98
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isFunctionLike() const
Definition: MacroInfo.h:199
Present this diagnostic as a remark.
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:65
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
unsigned getLength() const
Definition: Token.h:129
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:217
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:123
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const
AvoidConcat - If printing PrevTok immediately followed by Tok would cause the two individual tokens t...
bool tokens_empty() const
Definition: MacroInfo.h:244
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:245
Do not present this diagnostic, ignore it.
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:215
unsigned ShowLineMarkers
Show #line markers.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:285
bool isGNUVarargs() const
Definition: MacroInfo.h:206
MacroMap::const_iterator macro_iterator
unsigned ShowCPP
Print normal preprocessed output.
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:580
Present this diagnostic as a fatal error.
Describes how and where the pragma was introduced.
Definition: Pragma.h:51
Present this diagnostic as a warning.
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
void * getAnnotationValue() const
Definition: Token.h:226
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:272
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:995
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:171
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:124