clang  9.0.0svn
PrintPreprocessedOutput.cpp
Go to the documentation of this file.
1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cstdio>
30 using namespace clang;
31 
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35  Preprocessor &PP, raw_ostream &OS) {
36  OS << "#define " << II.getName();
37 
38  if (MI.isFunctionLike()) {
39  OS << '(';
40  if (!MI.param_empty()) {
42  for (; AI+1 != E; ++AI) {
43  OS << (*AI)->getName();
44  OS << ',';
45  }
46 
47  // Last argument.
48  if ((*AI)->getName() == "__VA_ARGS__")
49  OS << "...";
50  else
51  OS << (*AI)->getName();
52  }
53 
54  if (MI.isGNUVarargs())
55  OS << "..."; // #define foo(x...)
56 
57  OS << ')';
58  }
59 
60  // GCC always emits a space, even if the macro body is empty. However, do not
61  // want to emit two spaces if the first token has a leading space.
62  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63  OS << ' ';
64 
65  SmallString<128> SpellingBuffer;
66  for (const auto &T : MI.tokens()) {
67  if (T.hasLeadingSpace())
68  OS << ' ';
69 
70  OS << PP.getSpelling(T, SpellingBuffer);
71  }
72 }
73 
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
77 
78 namespace {
79 class PrintPPOutputPPCallbacks : public PPCallbacks {
80  Preprocessor &PP;
82  TokenConcatenation ConcatInfo;
83 public:
84  raw_ostream &OS;
85 private:
86  unsigned CurLine;
87 
88  bool EmittedTokensOnThisLine;
89  bool EmittedDirectiveOnThisLine;
91  SmallString<512> CurFilename;
92  bool Initialized;
93  bool DisableLineMarkers;
94  bool DumpDefines;
95  bool DumpIncludeDirectives;
96  bool UseLineDirectives;
97  bool IsFirstFileEntered;
98 public:
99  PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
100  bool defines, bool DumpIncludeDirectives,
101  bool UseLineDirectives)
102  : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
103  DisableLineMarkers(lineMarkers), DumpDefines(defines),
104  DumpIncludeDirectives(DumpIncludeDirectives),
105  UseLineDirectives(UseLineDirectives) {
106  CurLine = 0;
107  CurFilename += "<uninit>";
108  EmittedTokensOnThisLine = false;
109  EmittedDirectiveOnThisLine = false;
110  FileType = SrcMgr::C_User;
111  Initialized = false;
112  IsFirstFileEntered = false;
113  }
114 
115  void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
116  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
117 
118  void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
119  bool hasEmittedDirectiveOnThisLine() const {
120  return EmittedDirectiveOnThisLine;
121  }
122 
123  bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
124 
125  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
127  FileID PrevFID) override;
128  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
129  StringRef FileName, bool IsAngled,
130  CharSourceRange FilenameRange, const FileEntry *File,
131  StringRef SearchPath, StringRef RelativePath,
132  const Module *Imported,
133  SrcMgr::CharacteristicKind FileType) override;
134  void Ident(SourceLocation Loc, StringRef str) override;
135  void PragmaMessage(SourceLocation Loc, StringRef Namespace,
136  PragmaMessageKind Kind, StringRef Str) override;
137  void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
138  void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
139  void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
140  void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
141  diag::Severity Map, StringRef Str) override;
142  void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
143  ArrayRef<int> Ids) override;
144  void PragmaWarningPush(SourceLocation Loc, int Level) override;
145  void PragmaWarningPop(SourceLocation Loc) override;
146  void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
147  void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
148 
149  bool HandleFirstTokOnLine(Token &Tok);
150 
151  /// Move to the line of the provided source location. This will
152  /// return true if the output stream required adjustment or if
153  /// the requested location is on the first line.
154  bool MoveToLine(SourceLocation Loc) {
155  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
156  if (PLoc.isInvalid())
157  return false;
158  return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
159  }
160  bool MoveToLine(unsigned LineNo);
161 
162  bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
163  const Token &Tok) {
164  return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
165  }
166  void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
167  unsigned ExtraLen=0);
168  bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
169  void HandleNewlinesInToken(const char *TokStr, unsigned Len);
170 
171  /// MacroDefined - This hook is called whenever a macro definition is seen.
172  void MacroDefined(const Token &MacroNameTok,
173  const MacroDirective *MD) override;
174 
175  /// MacroUndefined - This hook is called whenever a macro #undef is seen.
176  void MacroUndefined(const Token &MacroNameTok,
177  const MacroDefinition &MD,
178  const MacroDirective *Undef) override;
179 
180  void BeginModule(const Module *M);
181  void EndModule(const Module *M);
182 };
183 } // end anonymous namespace
184 
185 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
186  const char *Extra,
187  unsigned ExtraLen) {
188  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
189 
190  // Emit #line directives or GNU line markers depending on what mode we're in.
191  if (UseLineDirectives) {
192  OS << "#line" << ' ' << LineNo << ' ' << '"';
193  OS.write_escaped(CurFilename);
194  OS << '"';
195  } else {
196  OS << '#' << ' ' << LineNo << ' ' << '"';
197  OS.write_escaped(CurFilename);
198  OS << '"';
199 
200  if (ExtraLen)
201  OS.write(Extra, ExtraLen);
202 
203  if (FileType == SrcMgr::C_System)
204  OS.write(" 3", 2);
205  else if (FileType == SrcMgr::C_ExternCSystem)
206  OS.write(" 3 4", 4);
207  }
208  OS << '\n';
209 }
210 
211 /// MoveToLine - Move the output to the source line specified by the location
212 /// object. We can do this by emitting some number of \n's, or be emitting a
213 /// #line directive. This returns false if already at the specified line, true
214 /// if some newlines were emitted.
215 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
216  // If this line is "close enough" to the original line, just print newlines,
217  // otherwise print a #line directive.
218  if (LineNo-CurLine <= 8) {
219  if (LineNo-CurLine == 1)
220  OS << '\n';
221  else if (LineNo == CurLine)
222  return false; // Spelling line moved, but expansion line didn't.
223  else {
224  const char *NewLines = "\n\n\n\n\n\n\n\n";
225  OS.write(NewLines, LineNo-CurLine);
226  }
227  } else if (!DisableLineMarkers) {
228  // Emit a #line or line marker.
229  WriteLineInfo(LineNo, nullptr, 0);
230  } else {
231  // Okay, we're in -P mode, which turns off line markers. However, we still
232  // need to emit a newline between tokens on different lines.
233  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
234  }
235 
236  CurLine = LineNo;
237  return true;
238 }
239 
240 bool
241 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
242  if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
243  OS << '\n';
244  EmittedTokensOnThisLine = false;
245  EmittedDirectiveOnThisLine = false;
246  if (ShouldUpdateCurrentLine)
247  ++CurLine;
248  return true;
249  }
250 
251  return false;
252 }
253 
254 /// FileChanged - Whenever the preprocessor enters or exits a #include file
255 /// it invokes this handler. Update our conception of the current source
256 /// position.
257 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
258  FileChangeReason Reason,
259  SrcMgr::CharacteristicKind NewFileType,
260  FileID PrevFID) {
261  // Unless we are exiting a #include, make sure to skip ahead to the line the
262  // #include directive was at.
263  SourceManager &SourceMgr = SM;
264 
265  PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
266  if (UserLoc.isInvalid())
267  return;
268 
269  unsigned NewLine = UserLoc.getLine();
270 
271  if (Reason == PPCallbacks::EnterFile) {
272  SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
273  if (IncludeLoc.isValid())
274  MoveToLine(IncludeLoc);
275  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
276  // GCC emits the # directive for this directive on the line AFTER the
277  // directive and emits a bunch of spaces that aren't needed. This is because
278  // otherwise we will emit a line marker for THIS line, which requires an
279  // extra blank line after the directive to avoid making all following lines
280  // off by one. We can do better by simply incrementing NewLine here.
281  NewLine += 1;
282  }
283 
284  CurLine = NewLine;
285 
286  CurFilename.clear();
287  CurFilename += UserLoc.getFilename();
288  FileType = NewFileType;
289 
290  if (DisableLineMarkers) {
291  startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
292  return;
293  }
294 
295  if (!Initialized) {
296  WriteLineInfo(CurLine);
297  Initialized = true;
298  }
299 
300  // Do not emit an enter marker for the main file (which we expect is the first
301  // entered file). This matches gcc, and improves compatibility with some tools
302  // which track the # line markers as a way to determine when the preprocessed
303  // output is in the context of the main file.
304  if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
305  IsFirstFileEntered = true;
306  return;
307  }
308 
309  switch (Reason) {
311  WriteLineInfo(CurLine, " 1", 2);
312  break;
314  WriteLineInfo(CurLine, " 2", 2);
315  break;
318  WriteLineInfo(CurLine);
319  break;
320  }
321 }
322 
323 void PrintPPOutputPPCallbacks::InclusionDirective(
324  SourceLocation HashLoc,
325  const Token &IncludeTok,
326  StringRef FileName,
327  bool IsAngled,
328  CharSourceRange FilenameRange,
329  const FileEntry *File,
330  StringRef SearchPath,
331  StringRef RelativePath,
332  const Module *Imported,
333  SrcMgr::CharacteristicKind FileType) {
334  // In -dI mode, dump #include directives prior to dumping their content or
335  // interpretation.
336  if (DumpIncludeDirectives) {
337  startNewLineIfNeeded();
338  MoveToLine(HashLoc);
339  const std::string TokenText = PP.getSpelling(IncludeTok);
340  assert(!TokenText.empty());
341  OS << "#" << TokenText << " "
342  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
343  << " /* clang -E -dI */";
344  setEmittedDirectiveOnThisLine();
345  startNewLineIfNeeded();
346  }
347 
348  // When preprocessing, turn implicit imports into module import pragmas.
349  if (Imported) {
350  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
351  case tok::pp_include:
352  case tok::pp_import:
353  case tok::pp_include_next:
354  startNewLineIfNeeded();
355  MoveToLine(HashLoc);
356  OS << "#pragma clang module import " << Imported->getFullModuleName(true)
357  << " /* clang -E: implicit import for "
358  << "#" << PP.getSpelling(IncludeTok) << " "
359  << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
360  << " */";
361  // Since we want a newline after the pragma, but not a #<line>, start a
362  // new line immediately.
363  EmittedTokensOnThisLine = true;
364  startNewLineIfNeeded();
365  break;
366 
367  case tok::pp___include_macros:
368  // #__include_macros has no effect on a user of a preprocessed source
369  // file; the only effect is on preprocessing.
370  //
371  // FIXME: That's not *quite* true: it causes the module in question to
372  // be loaded, which can affect downstream diagnostics.
373  break;
374 
375  default:
376  llvm_unreachable("unknown include directive kind");
377  break;
378  }
379  }
380 }
381 
382 /// Handle entering the scope of a module during a module compilation.
383 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
384  startNewLineIfNeeded();
385  OS << "#pragma clang module begin " << M->getFullModuleName(true);
386  setEmittedDirectiveOnThisLine();
387 }
388 
389 /// Handle leaving the scope of a module during a module compilation.
390 void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
391  startNewLineIfNeeded();
392  OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
393  setEmittedDirectiveOnThisLine();
394 }
395 
396 /// Ident - Handle #ident directives when read by the preprocessor.
397 ///
398 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
399  MoveToLine(Loc);
400 
401  OS.write("#ident ", strlen("#ident "));
402  OS.write(S.begin(), S.size());
403  EmittedTokensOnThisLine = true;
404 }
405 
406 /// MacroDefined - This hook is called whenever a macro definition is seen.
407 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
408  const MacroDirective *MD) {
409  const MacroInfo *MI = MD->getMacroInfo();
410  // Only print out macro definitions in -dD mode.
411  if (!DumpDefines ||
412  // Ignore __FILE__ etc.
413  MI->isBuiltinMacro()) return;
414 
415  MoveToLine(MI->getDefinitionLoc());
416  PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
417  setEmittedDirectiveOnThisLine();
418 }
419 
420 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
421  const MacroDefinition &MD,
422  const MacroDirective *Undef) {
423  // Only print out macro definitions in -dD mode.
424  if (!DumpDefines) return;
425 
426  MoveToLine(MacroNameTok.getLocation());
427  OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
428  setEmittedDirectiveOnThisLine();
429 }
430 
431 static void outputPrintable(raw_ostream &OS, StringRef Str) {
432  for (unsigned char Char : Str) {
433  if (isPrintable(Char) && Char != '\\' && Char != '"')
434  OS << (char)Char;
435  else // Output anything hard as an octal escape.
436  OS << '\\'
437  << (char)('0' + ((Char >> 6) & 7))
438  << (char)('0' + ((Char >> 3) & 7))
439  << (char)('0' + ((Char >> 0) & 7));
440  }
441 }
442 
443 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
444  StringRef Namespace,
445  PragmaMessageKind Kind,
446  StringRef Str) {
447  startNewLineIfNeeded();
448  MoveToLine(Loc);
449  OS << "#pragma ";
450  if (!Namespace.empty())
451  OS << Namespace << ' ';
452  switch (Kind) {
453  case PMK_Message:
454  OS << "message(\"";
455  break;
456  case PMK_Warning:
457  OS << "warning \"";
458  break;
459  case PMK_Error:
460  OS << "error \"";
461  break;
462  }
463 
464  outputPrintable(OS, Str);
465  OS << '"';
466  if (Kind == PMK_Message)
467  OS << ')';
468  setEmittedDirectiveOnThisLine();
469 }
470 
471 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
472  StringRef DebugType) {
473  startNewLineIfNeeded();
474  MoveToLine(Loc);
475 
476  OS << "#pragma clang __debug ";
477  OS << DebugType;
478 
479  setEmittedDirectiveOnThisLine();
480 }
481 
482 void PrintPPOutputPPCallbacks::
483 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
484  startNewLineIfNeeded();
485  MoveToLine(Loc);
486  OS << "#pragma " << Namespace << " diagnostic push";
487  setEmittedDirectiveOnThisLine();
488 }
489 
490 void PrintPPOutputPPCallbacks::
491 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
492  startNewLineIfNeeded();
493  MoveToLine(Loc);
494  OS << "#pragma " << Namespace << " diagnostic pop";
495  setEmittedDirectiveOnThisLine();
496 }
497 
498 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
499  StringRef Namespace,
500  diag::Severity Map,
501  StringRef Str) {
502  startNewLineIfNeeded();
503  MoveToLine(Loc);
504  OS << "#pragma " << Namespace << " diagnostic ";
505  switch (Map) {
507  OS << "remark";
508  break;
510  OS << "warning";
511  break;
513  OS << "error";
514  break;
516  OS << "ignored";
517  break;
519  OS << "fatal";
520  break;
521  }
522  OS << " \"" << Str << '"';
523  setEmittedDirectiveOnThisLine();
524 }
525 
526 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
527  StringRef WarningSpec,
528  ArrayRef<int> Ids) {
529  startNewLineIfNeeded();
530  MoveToLine(Loc);
531  OS << "#pragma warning(" << WarningSpec << ':';
532  for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
533  OS << ' ' << *I;
534  OS << ')';
535  setEmittedDirectiveOnThisLine();
536 }
537 
538 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
539  int Level) {
540  startNewLineIfNeeded();
541  MoveToLine(Loc);
542  OS << "#pragma warning(push";
543  if (Level >= 0)
544  OS << ", " << Level;
545  OS << ')';
546  setEmittedDirectiveOnThisLine();
547 }
548 
549 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
550  startNewLineIfNeeded();
551  MoveToLine(Loc);
552  OS << "#pragma warning(pop)";
553  setEmittedDirectiveOnThisLine();
554 }
555 
556 void PrintPPOutputPPCallbacks::
557 PragmaAssumeNonNullBegin(SourceLocation Loc) {
558  startNewLineIfNeeded();
559  MoveToLine(Loc);
560  OS << "#pragma clang assume_nonnull begin";
561  setEmittedDirectiveOnThisLine();
562 }
563 
564 void PrintPPOutputPPCallbacks::
565 PragmaAssumeNonNullEnd(SourceLocation Loc) {
566  startNewLineIfNeeded();
567  MoveToLine(Loc);
568  OS << "#pragma clang assume_nonnull end";
569  setEmittedDirectiveOnThisLine();
570 }
571 
572 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
573 /// is called for the first token on each new line. If this really is the start
574 /// of a new logical line, handle it and return true, otherwise return false.
575 /// This may not be the start of a logical line because the "start of line"
576 /// marker is set for spelling lines, not expansion ones.
577 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
578  // Figure out what line we went to and insert the appropriate number of
579  // newline characters.
580  if (!MoveToLine(Tok.getLocation()))
581  return false;
582 
583  // Print out space characters so that the first token on a line is
584  // indented for easy reading.
585  unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
586 
587  // The first token on a line can have a column number of 1, yet still expect
588  // leading white space, if a macro expansion in column 1 starts with an empty
589  // macro argument, or an empty nested macro expansion. In this case, move the
590  // token to column 2.
591  if (ColNo == 1 && Tok.hasLeadingSpace())
592  ColNo = 2;
593 
594  // This hack prevents stuff like:
595  // #define HASH #
596  // HASH define foo bar
597  // From having the # character end up at column 1, which makes it so it
598  // is not handled as a #define next time through the preprocessor if in
599  // -fpreprocessed mode.
600  if (ColNo <= 1 && Tok.is(tok::hash))
601  OS << ' ';
602 
603  // Otherwise, indent the appropriate number of spaces.
604  for (; ColNo > 1; --ColNo)
605  OS << ' ';
606 
607  return true;
608 }
609 
610 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
611  unsigned Len) {
612  unsigned NumNewlines = 0;
613  for (; Len; --Len, ++TokStr) {
614  if (*TokStr != '\n' &&
615  *TokStr != '\r')
616  continue;
617 
618  ++NumNewlines;
619 
620  // If we have \n\r or \r\n, skip both and count as one line.
621  if (Len != 1 &&
622  (TokStr[1] == '\n' || TokStr[1] == '\r') &&
623  TokStr[0] != TokStr[1]) {
624  ++TokStr;
625  --Len;
626  }
627  }
628 
629  if (NumNewlines == 0) return;
630 
631  CurLine += NumNewlines;
632 }
633 
634 
635 namespace {
636 struct UnknownPragmaHandler : public PragmaHandler {
637  const char *Prefix;
638  PrintPPOutputPPCallbacks *Callbacks;
639 
640  // Set to true if tokens should be expanded
641  bool ShouldExpandTokens;
642 
643  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
644  bool RequireTokenExpansion)
645  : Prefix(prefix), Callbacks(callbacks),
646  ShouldExpandTokens(RequireTokenExpansion) {}
647  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
648  Token &PragmaTok) override {
649  // Figure out what line we went to and insert the appropriate number of
650  // newline characters.
651  Callbacks->startNewLineIfNeeded();
652  Callbacks->MoveToLine(PragmaTok.getLocation());
653  Callbacks->OS.write(Prefix, strlen(Prefix));
654 
655  if (ShouldExpandTokens) {
656  // The first token does not have expanded macros. Expand them, if
657  // required.
658  auto Toks = llvm::make_unique<Token[]>(1);
659  Toks[0] = PragmaTok;
660  PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
661  /*DisableMacroExpansion=*/false);
662  PP.Lex(PragmaTok);
663  }
664  Token PrevToken;
665  Token PrevPrevToken;
666  PrevToken.startToken();
667  PrevPrevToken.startToken();
668 
669  // Read and print all of the pragma tokens.
670  while (PragmaTok.isNot(tok::eod)) {
671  if (PragmaTok.hasLeadingSpace() ||
672  Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
673  Callbacks->OS << ' ';
674  std::string TokSpell = PP.getSpelling(PragmaTok);
675  Callbacks->OS.write(&TokSpell[0], TokSpell.size());
676 
677  PrevPrevToken = PrevToken;
678  PrevToken = PragmaTok;
679 
680  if (ShouldExpandTokens)
681  PP.Lex(PragmaTok);
682  else
683  PP.LexUnexpandedToken(PragmaTok);
684  }
685  Callbacks->setEmittedDirectiveOnThisLine();
686  }
687 };
688 } // end anonymous namespace
689 
690 
692  PrintPPOutputPPCallbacks *Callbacks,
693  raw_ostream &OS) {
694  bool DropComments = PP.getLangOpts().TraditionalCPP &&
696 
697  char Buffer[256];
698  Token PrevPrevTok, PrevTok;
699  PrevPrevTok.startToken();
700  PrevTok.startToken();
701  while (1) {
702  if (Callbacks->hasEmittedDirectiveOnThisLine()) {
703  Callbacks->startNewLineIfNeeded();
704  Callbacks->MoveToLine(Tok.getLocation());
705  }
706 
707  // If this token is at the start of a line, emit newlines if needed.
708  if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
709  // done.
710  } else if (Tok.hasLeadingSpace() ||
711  // If we haven't emitted a token on this line yet, PrevTok isn't
712  // useful to look at and no concatenation could happen anyway.
713  (Callbacks->hasEmittedTokensOnThisLine() &&
714  // Don't print "-" next to "-", it would form "--".
715  Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
716  OS << ' ';
717  }
718 
719  if (DropComments && Tok.is(tok::comment)) {
720  // Skip comments. Normally the preprocessor does not generate
721  // tok::comment nodes at all when not keeping comments, but under
722  // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
723  SourceLocation StartLoc = Tok.getLocation();
724  Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
725  } else if (Tok.is(tok::eod)) {
726  // Don't print end of directive tokens, since they are typically newlines
727  // that mess up our line tracking. These come from unknown pre-processor
728  // directives or hash-prefixed comments in standalone assembly files.
729  PP.Lex(Tok);
730  continue;
731  } else if (Tok.is(tok::annot_module_include)) {
732  // PrintPPOutputPPCallbacks::InclusionDirective handles producing
733  // appropriate output here. Ignore this token entirely.
734  PP.Lex(Tok);
735  continue;
736  } else if (Tok.is(tok::annot_module_begin)) {
737  // FIXME: We retrieve this token after the FileChanged callback, and
738  // retrieve the module_end token before the FileChanged callback, so
739  // we render this within the file and render the module end outside the
740  // file, but this is backwards from the token locations: the module_begin
741  // token is at the include location (outside the file) and the module_end
742  // token is at the EOF location (within the file).
743  Callbacks->BeginModule(
744  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
745  PP.Lex(Tok);
746  continue;
747  } else if (Tok.is(tok::annot_module_end)) {
748  Callbacks->EndModule(
749  reinterpret_cast<Module *>(Tok.getAnnotationValue()));
750  PP.Lex(Tok);
751  continue;
752  } else if (Tok.isAnnotation()) {
753  // Ignore annotation tokens created by pragmas - the pragmas themselves
754  // will be reproduced in the preprocessed output.
755  PP.Lex(Tok);
756  continue;
757  } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
758  OS << II->getName();
759  } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
760  Tok.getLiteralData()) {
761  OS.write(Tok.getLiteralData(), Tok.getLength());
762  } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
763  const char *TokPtr = Buffer;
764  unsigned Len = PP.getSpelling(Tok, TokPtr);
765  OS.write(TokPtr, Len);
766 
767  // Tokens that can contain embedded newlines need to adjust our current
768  // line number.
769  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
770  Callbacks->HandleNewlinesInToken(TokPtr, Len);
771  } else {
772  std::string S = PP.getSpelling(Tok);
773  OS.write(&S[0], S.size());
774 
775  // Tokens that can contain embedded newlines need to adjust our current
776  // line number.
777  if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
778  Callbacks->HandleNewlinesInToken(&S[0], S.size());
779  }
780  Callbacks->setEmittedTokensOnThisLine();
781 
782  if (Tok.is(tok::eof)) break;
783 
784  PrevPrevTok = PrevTok;
785  PrevTok = Tok;
786  PP.Lex(Tok);
787  }
788 }
789 
790 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
791 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
792  return LHS->first->getName().compare(RHS->first->getName());
793 }
794 
795 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
796  // Ignore unknown pragmas.
797  PP.IgnorePragmas();
798 
799  // -dM mode just scans and ignores all tokens in the files, then dumps out
800  // the macro table at the end.
801  PP.EnterMainSourceFile();
802 
803  Token Tok;
804  do PP.Lex(Tok);
805  while (Tok.isNot(tok::eof));
806 
808  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
809  I != E; ++I) {
810  auto *MD = I->second.getLatest();
811  if (MD && MD->isDefined())
812  MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
813  }
814  llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
815 
816  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
817  MacroInfo &MI = *MacrosByID[i].second;
818  // Ignore computed macros like __LINE__ and friends.
819  if (MI.isBuiltinMacro()) continue;
820 
821  PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
822  *OS << '\n';
823  }
824 }
825 
826 /// DoPrintPreprocessedInput - This implements -E mode.
827 ///
829  const PreprocessorOutputOptions &Opts) {
830  // Show macros with no output is handled specially.
831  if (!Opts.ShowCPP) {
832  assert(Opts.ShowMacros && "Not yet implemented!");
833  DoPrintMacros(PP, OS);
834  return;
835  }
836 
837  // Inform the preprocessor whether we want it to retain comments or not, due
838  // to -C or -CC.
840 
841  PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
842  PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
844 
845  // Expand macros in pragmas with -fms-extensions. The assumption is that
846  // the majority of pragmas in such a file will be Microsoft pragmas.
847  // Remember the handlers we will add so that we can remove them later.
848  std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
849  new UnknownPragmaHandler(
850  "#pragma", Callbacks,
851  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
852 
853  std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
854  "#pragma GCC", Callbacks,
855  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
856 
857  std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
858  "#pragma clang", Callbacks,
859  /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
860 
861  PP.AddPragmaHandler(MicrosoftExtHandler.get());
862  PP.AddPragmaHandler("GCC", GCCHandler.get());
863  PP.AddPragmaHandler("clang", ClangHandler.get());
864 
865  // The tokens after pragma omp need to be expanded.
866  //
867  // OpenMP [2.1, Directive format]
868  // Preprocessing tokens following the #pragma omp are subject to macro
869  // replacement.
870  std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
871  new UnknownPragmaHandler("#pragma omp", Callbacks,
872  /*RequireTokenExpansion=*/true));
873  PP.AddPragmaHandler("omp", OpenMPHandler.get());
874 
875  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
876 
877  // After we have configured the preprocessor, enter the main file.
878  PP.EnterMainSourceFile();
879 
880  // Consume all of the tokens that come from the predefines buffer. Those
881  // should not be emitted into the output and are guaranteed to be at the
882  // start.
883  const SourceManager &SourceMgr = PP.getSourceManager();
884  Token Tok;
885  do {
886  PP.Lex(Tok);
887  if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
888  break;
889 
890  PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
891  if (PLoc.isInvalid())
892  break;
893 
894  if (strcmp(PLoc.getFilename(), "<built-in>"))
895  break;
896  } while (true);
897 
898  // Read all the preprocessed tokens, printing them out to the stream.
899  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
900  *OS << '\n';
901 
902  // Remove the handlers we just added to leave the preprocessor in a sane state
903  // so that it can be reused (for example by a clang::Parser instance).
904  PP.RemovePragmaHandler(MicrosoftExtHandler.get());
905  PP.RemovePragmaHandler("GCC", GCCHandler.get());
906  PP.RemovePragmaHandler("clang", ClangHandler.get());
907  PP.RemovePragmaHandler("omp", OpenMPHandler.get());
908 }
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, Preprocessor &PP, raw_ostream &OS)
PrintMacroDefinition - Print a macro definition in a form that will be properly accepted back as a de...
param_iterator param_begin() const
Definition: MacroInfo.h:180
std::pair< const IdentifierInfo *, MacroInfo * > id_macro_pair
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition: Pragma.cpp:902
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
Defines the SourceManager interface.
TokenConcatenation class, which answers the question of "Is it safe to emit two tokens without a whit...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:564
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition: Pragma.cpp:1840
Severity
Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs to either Ignore (nothing)...
Definition: DiagnosticIDs.h:79
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:77
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:35
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:117
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:242
tok::TokenKind getKind() const
Definition: Token.h:89
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:852
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:817
Describes a module or submodule.
Definition: Module.h:64
param_iterator param_end() const
Definition: MacroInfo.h:181
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:213
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
const FormatToken & Tok
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
Defines the Diagnostic-related interfaces.
Present this diagnostic as an error.
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:31
macro_iterator macro_end(bool IncludeExternalMacros=true) const
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
IdentifierInfo *const * param_iterator
Parameters - The list of parameters for a function-like macro.
Definition: MacroInfo.h:178
return Out str()
PreprocessorOutputOptions - Options for controlling the C preprocessor output (e.g., -E).
void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, const PreprocessorOutputOptions &Opts)
DoPrintPreprocessedInput - Implement -E mode.
static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS)
Represents a character-granular source range.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
static void outputPrintable(raw_ostream &OS, StringRef Str)
unsigned getLine() const
Return the presumed line number of this location.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:123
Defines the clang::Preprocessor interface.
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition: Pragma.cpp:933
bool getCommentRetentionState() const
Definition: Preprocessor.h:857
unsigned ShowMacros
Print macro definitions.
Record the location of an inclusion directive, such as an #include or #import statement.
Represents an unpacked "presumed" location which can be presented to the user.
const SourceManager & SM
Definition: Format.cpp:1489
SourceManager & getSourceManager() const
Definition: Preprocessor.h:821
const char * getFilename() const
Return the presumed filename of this location.
unsigned ShowIncludeDirectives
Print includes, imports etc. within preprocessed output.
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:290
Kind
static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS)
Encodes a location in the source.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void Lex(Token &Result)
Lex the next token for this preprocessor.
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:265
unsigned UseLineDirectives
Use #line instead of GCC-style # N.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:139
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:390
bool param_empty() const
Definition: MacroInfo.h:179
StringRef getName() const
Return the actual identifier string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isFunctionLike() const
Definition: MacroInfo.h:199
Present this diagnostic as a remark.
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:58
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
unsigned getLength() const
Definition: Token.h:126
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:214
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:123
bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const
AvoidConcat - If printing PrevTok immediately followed by Tok would cause the two individual tokens t...
bool tokens_empty() const
Definition: MacroInfo.h:244
Defines the PPCallbacks interface.
unsigned ShowMacroComments
Show comments, even in macros.
ArrayRef< Token > tokens() const
Definition: MacroInfo.h:245
Do not present this diagnostic, ignore it.
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:215
unsigned ShowLineMarkers
Show #line markers.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:282
bool isGNUVarargs() const
Definition: MacroInfo.h:206
MacroMap::const_iterator macro_iterator
unsigned ShowCPP
Print normal preprocessed output.
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:580
Present this diagnostic as a fatal error.
Present this diagnostic as a warning.
SourceLocation getIncludeLoc() const
Return the presumed include location of this location.
void * getAnnotationValue() const
Definition: Token.h:223
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:269
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:910
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:168
static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrintPPOutputPPCallbacks *Callbacks, raw_ostream &OS)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:124