clang  10.0.0svn
DependencyDirectivesSourceMinimizer.cpp
Go to the documentation of this file.
1 //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the implementation for minimizing header and source files to the
11 /// minimum necessary preprocessor directives for evaluating includes. It
12 /// reduces the source down to #define, #include, #import, @import, and any
13 /// conditional preprocessor logic that contains one of those.
14 ///
15 //===----------------------------------------------------------------------===//
16 
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Basic/Diagnostic.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 
24 using namespace llvm;
25 using namespace clang;
27 
28 namespace {
29 
30 struct Minimizer {
31  /// Minimized output.
33  /// The known tokens encountered during the minimization.
34  SmallVectorImpl<Token> &Tokens;
35 
36  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
37  StringRef Input, DiagnosticsEngine *Diags,
38  SourceLocation InputSourceLoc)
39  : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
40  InputSourceLoc(InputSourceLoc) {}
41 
42  /// Lex the provided source and emit the minimized output.
43  ///
44  /// \returns True on error.
45  bool minimize();
46 
47 private:
48  struct IdInfo {
49  const char *Last;
50  StringRef Name;
51  };
52 
53  /// Lex an identifier.
54  ///
55  /// \pre First points at a valid identifier head.
56  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
57  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
58  const char *const End);
59  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
60  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
61  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
62  LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
63  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
64  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
65  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
66  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
67  const char *&First, const char *const End);
68  Token &makeToken(TokenKind K) {
69  Tokens.emplace_back(K, Out.size());
70  return Tokens.back();
71  }
72  void popToken() {
73  Out.resize(Tokens.back().Offset);
74  Tokens.pop_back();
75  }
76  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
77 
78  Minimizer &put(char Byte) {
79  Out.push_back(Byte);
80  return *this;
81  }
82  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
83  Minimizer &append(const char *First, const char *Last) {
84  Out.append(First, Last);
85  return *this;
86  }
87 
88  void printToNewline(const char *&First, const char *const End);
89  void printAdjacentModuleNameParts(const char *&First, const char *const End);
90  LLVM_NODISCARD bool printAtImportBody(const char *&First,
91  const char *const End);
92  void printDirectiveBody(const char *&First, const char *const End);
93  void printAdjacentMacroArgs(const char *&First, const char *const End);
94  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
95 
96  /// Reports a diagnostic if the diagnostic engine is provided. Always returns
97  /// true at the end.
98  bool reportError(const char *CurPtr, unsigned Err);
99 
100  StringMap<char> SplitIds;
101  StringRef Input;
102  DiagnosticsEngine *Diags;
103  SourceLocation InputSourceLoc;
104 };
105 
106 } // end anonymous namespace
107 
108 bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
109  if (!Diags)
110  return true;
111  assert(CurPtr >= Input.data() && "invalid buffer ptr");
112  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
113  return true;
114 }
115 
116 static void skipOverSpaces(const char *&First, const char *const End) {
117  while (First != End && isHorizontalWhitespace(*First))
118  ++First;
119 }
120 
121 LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
122  const char *Current) {
123  assert(First <= Current);
124 
125  // Check if we can even back up.
126  if (*Current != '"' || First == Current)
127  return false;
128 
129  // Check for an "R".
130  --Current;
131  if (*Current != 'R')
132  return false;
133  if (First == Current || !isIdentifierBody(*--Current))
134  return true;
135 
136  // Check for a prefix of "u", "U", or "L".
137  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
138  return First == Current || !isIdentifierBody(*--Current);
139 
140  // Check for a prefix of "u8".
141  if (*Current != '8' || First == Current || *Current-- != 'u')
142  return false;
143  return First == Current || !isIdentifierBody(*--Current);
144 }
145 
146 static void skipRawString(const char *&First, const char *const End) {
147  assert(First[0] == '"');
148  assert(First[-1] == 'R');
149 
150  const char *Last = ++First;
151  while (Last != End && *Last != '(')
152  ++Last;
153  if (Last == End) {
154  First = Last; // Hit the end... just give up.
155  return;
156  }
157 
158  StringRef Terminator(First, Last - First);
159  for (;;) {
160  // Move First to just past the next ")".
161  First = Last;
162  while (First != End && *First != ')')
163  ++First;
164  if (First == End)
165  return;
166  ++First;
167 
168  // Look ahead for the terminator sequence.
169  Last = First;
170  while (Last != End && size_t(Last - First) < Terminator.size() &&
171  Terminator[Last - First] == *Last)
172  ++Last;
173 
174  // Check if we hit it (or the end of the file).
175  if (Last == End) {
176  First = Last;
177  return;
178  }
179  if (size_t(Last - First) < Terminator.size())
180  continue;
181  if (*Last != '"')
182  continue;
183  First = Last + 1;
184  return;
185  }
186 }
187 
188 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
189 static unsigned isEOL(const char *First, const char *const End) {
190  if (First == End)
191  return 0;
192  if (End - First > 1 && isVerticalWhitespace(First[0]) &&
193  isVerticalWhitespace(First[1]) && First[0] != First[1])
194  return 2;
195  return !!isVerticalWhitespace(First[0]);
196 }
197 
198 static void skipString(const char *&First, const char *const End) {
199  assert(*First == '\'' || *First == '"' || *First == '<');
200  const char Terminator = *First == '<' ? '>' : *First;
201  for (++First; First != End && *First != Terminator; ++First) {
202  // String and character literals don't extend past the end of the line.
203  if (isVerticalWhitespace(*First))
204  return;
205  if (*First != '\\')
206  continue;
207  // Skip past backslash to the next character. This ensures that the
208  // character right after it is skipped as well, which matters if it's
209  // the terminator.
210  if (++First == End)
211  return;
212  if (!isWhitespace(*First))
213  continue;
214  // Whitespace after the backslash might indicate a line continuation.
215  const char *FirstAfterBackslashPastSpace = First;
216  skipOverSpaces(FirstAfterBackslashPastSpace, End);
217  if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
218  // Advance the character pointer to the next line for the next
219  // iteration.
220  First = FirstAfterBackslashPastSpace + NLSize - 1;
221  }
222  }
223  if (First != End)
224  ++First; // Finish off the string.
225 }
226 
227 // Returns the length of the skipped newline
228 static unsigned skipNewline(const char *&First, const char *End) {
229  if (First == End)
230  return 0;
231  assert(isVerticalWhitespace(*First));
232  unsigned Len = isEOL(First, End);
233  assert(Len && "expected newline");
234  First += Len;
235  return Len;
236 }
237 
238 static bool wasLineContinuation(const char *First, unsigned EOLLen) {
239  return *(First - (int)EOLLen - 1) == '\\';
240 }
241 
242 static void skipToNewlineRaw(const char *&First, const char *const End) {
243  for (;;) {
244  if (First == End)
245  return;
246 
247  unsigned Len = isEOL(First, End);
248  if (Len)
249  return;
250 
251  do {
252  if (++First == End)
253  return;
254  Len = isEOL(First, End);
255  } while (!Len);
256 
257  if (First[-1] != '\\')
258  return;
259 
260  First += Len;
261  // Keep skipping lines...
262  }
263 }
264 
265 static const char *findLastNonSpace(const char *First, const char *Last) {
266  assert(First <= Last);
267  while (First != Last && isHorizontalWhitespace(Last[-1]))
268  --Last;
269  return Last;
270 }
271 
272 static const char *findFirstTrailingSpace(const char *First,
273  const char *Last) {
274  const char *LastNonSpace = findLastNonSpace(First, Last);
275  if (Last == LastNonSpace)
276  return Last;
277  assert(isHorizontalWhitespace(LastNonSpace[0]));
278  return LastNonSpace + 1;
279 }
280 
281 static void skipLineComment(const char *&First, const char *const End) {
282  assert(First[0] == '/' && First[1] == '/');
283  First += 2;
284  skipToNewlineRaw(First, End);
285 }
286 
287 static void skipBlockComment(const char *&First, const char *const End) {
288  assert(First[0] == '/' && First[1] == '*');
289  if (End - First < 4) {
290  First = End;
291  return;
292  }
293  for (First += 3; First != End; ++First)
294  if (First[-1] == '*' && First[0] == '/') {
295  ++First;
296  return;
297  }
298 }
299 
300 /// \returns True if the current single quotation mark character is a C++ 14
301 /// digit separator.
302 static bool isQuoteCppDigitSeparator(const char *const Start,
303  const char *const Cur,
304  const char *const End) {
305  assert(*Cur == '\'' && "expected quotation character");
306  // skipLine called in places where we don't expect a valid number
307  // body before `start` on the same line, so always return false at the start.
308  if (Start == Cur)
309  return false;
310  // The previous character must be a valid PP number character.
311  // Make sure that the L, u, U, u8 prefixes don't get marked as a
312  // separator though.
313  char Prev = *(Cur - 1);
314  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
315  return false;
316  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
317  return false;
318  if (!isPreprocessingNumberBody(Prev))
319  return false;
320  // The next character should be a valid identifier body character.
321  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
322 }
323 
324 static void skipLine(const char *&First, const char *const End) {
325  for (;;) {
326  assert(First <= End);
327  if (First == End)
328  return;
329 
330  if (isVerticalWhitespace(*First)) {
331  skipNewline(First, End);
332  return;
333  }
334  const char *Start = First;
335  while (First != End && !isVerticalWhitespace(*First)) {
336  // Iterate over strings correctly to avoid comments and newlines.
337  if (*First == '"' ||
338  (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
339  if (isRawStringLiteral(Start, First))
340  skipRawString(First, End);
341  else
342  skipString(First, End);
343  continue;
344  }
345 
346  // Iterate over comments correctly.
347  if (*First != '/' || End - First < 2) {
348  ++First;
349  continue;
350  }
351 
352  if (First[1] == '/') {
353  // "//...".
354  skipLineComment(First, End);
355  continue;
356  }
357 
358  if (First[1] != '*') {
359  ++First;
360  continue;
361  }
362 
363  // "/*...*/".
364  skipBlockComment(First, End);
365  }
366  if (First == End)
367  return;
368 
369  // Skip over the newline.
370  unsigned Len = skipNewline(First, End);
371  if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
372  break;
373  }
374 }
375 
376 static void skipDirective(StringRef Name, const char *&First,
377  const char *const End) {
378  if (llvm::StringSwitch<bool>(Name)
379  .Case("warning", true)
380  .Case("error", true)
381  .Default(false))
382  // Do not process quotes or comments.
383  skipToNewlineRaw(First, End);
384  else
385  skipLine(First, End);
386 }
387 
388 void Minimizer::printToNewline(const char *&First, const char *const End) {
389  while (First != End && !isVerticalWhitespace(*First)) {
390  const char *Last = First;
391  do {
392  // Iterate over strings correctly to avoid comments and newlines.
393  if (*Last == '"' || *Last == '\'' ||
394  (*Last == '<' && top() == pp_include)) {
395  if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
396  skipRawString(Last, End);
397  else
398  skipString(Last, End);
399  continue;
400  }
401  if (*Last != '/' || End - Last < 2) {
402  ++Last;
403  continue; // Gather the rest up to print verbatim.
404  }
405 
406  if (Last[1] != '/' && Last[1] != '*') {
407  ++Last;
408  continue;
409  }
410 
411  // Deal with "//..." and "/*...*/".
412  append(First, findFirstTrailingSpace(First, Last));
413  First = Last;
414 
415  if (Last[1] == '/') {
416  skipLineComment(First, End);
417  return;
418  }
419 
420  put(' ');
421  skipBlockComment(First, End);
422  skipOverSpaces(First, End);
423  Last = First;
424  } while (Last != End && !isVerticalWhitespace(*Last));
425 
426  // Print out the string.
427  const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
428  if (Last == End || LastBeforeTrailingSpace == First ||
429  LastBeforeTrailingSpace[-1] != '\\') {
430  append(First, LastBeforeTrailingSpace);
431  First = Last;
432  skipNewline(First, End);
433  return;
434  }
435 
436  // Print up to the backslash, backing up over spaces. Preserve at least one
437  // space, as the space matters when tokens are separated by a line
438  // continuation.
439  append(First, findFirstTrailingSpace(
440  First, LastBeforeTrailingSpace - 1));
441 
442  First = Last;
443  skipNewline(First, End);
444  skipOverSpaces(First, End);
445  }
446 }
447 
448 static void skipWhitespace(const char *&First, const char *const End) {
449  for (;;) {
450  assert(First <= End);
451  skipOverSpaces(First, End);
452 
453  if (End - First < 2)
454  return;
455 
456  if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
457  skipNewline(++First, End);
458  continue;
459  }
460 
461  // Check for a non-comment character.
462  if (First[0] != '/')
463  return;
464 
465  // "// ...".
466  if (First[1] == '/') {
467  skipLineComment(First, End);
468  return;
469  }
470 
471  // Cannot be a comment.
472  if (First[1] != '*')
473  return;
474 
475  // "/*...*/".
476  skipBlockComment(First, End);
477  }
478 }
479 
480 void Minimizer::printAdjacentModuleNameParts(const char *&First,
481  const char *const End) {
482  // Skip over parts of the body.
483  const char *Last = First;
484  do
485  ++Last;
486  while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
487  append(First, Last);
488  First = Last;
489 }
490 
491 bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
492  for (;;) {
493  skipWhitespace(First, End);
494  if (First == End)
495  return true;
496 
497  if (isVerticalWhitespace(*First)) {
498  skipNewline(First, End);
499  continue;
500  }
501 
502  // Found a semicolon.
503  if (*First == ';') {
504  put(*First++).put('\n');
505  return false;
506  }
507 
508  // Don't handle macro expansions inside @import for now.
509  if (!isIdentifierBody(*First) && *First != '.')
510  return true;
511 
512  printAdjacentModuleNameParts(First, End);
513  }
514 }
515 
516 void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
517  skipWhitespace(First, End); // Skip initial whitespace.
518  printToNewline(First, End);
519  while (Out.back() == ' ')
520  Out.pop_back();
521  put('\n');
522 }
523 
524 LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
525  const char *const End) {
526  assert(isIdentifierBody(*First) && "invalid identifer");
527  const char *Last = First + 1;
528  while (Last != End && isIdentifierBody(*Last))
529  ++Last;
530  return Last;
531 }
532 
533 LLVM_NODISCARD static const char *
534 getIdentifierContinuation(const char *First, const char *const End) {
535  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
536  return nullptr;
537 
538  ++First;
539  skipNewline(First, End);
540  if (First == End)
541  return nullptr;
542  return isIdentifierBody(First[0]) ? First : nullptr;
543 }
544 
545 Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
546  const char *const End) {
547  const char *Last = lexRawIdentifier(First, End);
548  const char *Next = getIdentifierContinuation(Last, End);
549  if (LLVM_LIKELY(!Next))
550  return IdInfo{Last, StringRef(First, Last - First)};
551 
552  // Slow path, where identifiers are split over lines.
553  SmallVector<char, 64> Id(First, Last);
554  while (Next) {
555  Last = lexRawIdentifier(Next, End);
556  Id.append(Next, Last);
557  Next = getIdentifierContinuation(Last, End);
558  }
559  return IdInfo{
560  Last,
561  SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
562 }
563 
564 void Minimizer::printAdjacentMacroArgs(const char *&First,
565  const char *const End) {
566  // Skip over parts of the body.
567  const char *Last = First;
568  do
569  ++Last;
570  while (Last != End &&
571  (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
572  append(First, Last);
573  First = Last;
574 }
575 
576 bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
577  assert(*First == '(');
578  put(*First++);
579  for (;;) {
580  skipWhitespace(First, End);
581  if (First == End)
582  return true;
583 
584  if (*First == ')') {
585  put(*First++);
586  return false;
587  }
588 
589  // This is intentionally fairly liberal.
590  if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
591  return true;
592 
593  printAdjacentMacroArgs(First, End);
594  }
595 }
596 
597 /// Looks for an identifier starting from Last.
598 ///
599 /// Updates "First" to just past the next identifier, if any. Returns true iff
600 /// the identifier matches "Id".
601 bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
602  const char *const End) {
603  skipWhitespace(First, End);
604  if (First == End || !isIdentifierHead(*First))
605  return false;
606 
607  IdInfo FoundId = lexIdentifier(First, End);
608  First = FoundId.Last;
609  return FoundId.Name == Id;
610 }
611 
612 bool Minimizer::lexAt(const char *&First, const char *const End) {
613  // Handle "@import".
614  const char *ImportLoc = First++;
615  if (!isNextIdentifier("import", First, End)) {
616  skipLine(First, End);
617  return false;
618  }
619  makeToken(decl_at_import);
620  append("@import ");
621  if (printAtImportBody(First, End))
622  return reportError(
623  ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
624  skipWhitespace(First, End);
625  if (First == End)
626  return false;
627  if (!isVerticalWhitespace(*First))
628  return reportError(
629  ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
630  skipNewline(First, End);
631  return false;
632 }
633 
634 bool Minimizer::lexModule(const char *&First, const char *const End) {
635  IdInfo Id = lexIdentifier(First, End);
636  First = Id.Last;
637  bool Export = false;
638  if (Id.Name == "export") {
639  Export = true;
640  skipWhitespace(First, End);
641  if (!isIdentifierBody(*First)) {
642  skipLine(First, End);
643  return false;
644  }
645  Id = lexIdentifier(First, End);
646  First = Id.Last;
647  }
648 
649  if (Id.Name != "module" && Id.Name != "import") {
650  skipLine(First, End);
651  return false;
652  }
653 
654  skipWhitespace(First, End);
655 
656  // Ignore this as a module directive if the next character can't be part of
657  // an import.
658 
659  switch (*First) {
660  case ':':
661  case '<':
662  case '"':
663  break;
664  default:
665  if (!isIdentifierBody(*First)) {
666  skipLine(First, End);
667  return false;
668  }
669  }
670 
671  if (Export) {
672  makeToken(cxx_export_decl);
673  append("export ");
674  }
675 
676  if (Id.Name == "module")
677  makeToken(cxx_module_decl);
678  else
679  makeToken(cxx_import_decl);
680  append(Id.Name);
681  append(" ");
682  printToNewline(First, End);
683  append("\n");
684  return false;
685 }
686 
687 bool Minimizer::lexDefine(const char *&First, const char *const End) {
688  makeToken(pp_define);
689  append("#define ");
690  skipWhitespace(First, End);
691 
692  if (!isIdentifierHead(*First))
693  return reportError(First, diag::err_pp_macro_not_identifier);
694 
695  IdInfo Id = lexIdentifier(First, End);
696  const char *Last = Id.Last;
697  append(Id.Name);
698  if (Last == End)
699  return false;
700  if (*Last == '(') {
701  size_t Size = Out.size();
702  if (printMacroArgs(Last, End)) {
703  // Be robust to bad macro arguments, since they can show up in disabled
704  // code.
705  Out.resize(Size);
706  append("(/* invalid */\n");
707  skipLine(Last, End);
708  return false;
709  }
710  }
711  skipWhitespace(Last, End);
712  if (Last == End)
713  return false;
714  if (!isVerticalWhitespace(*Last))
715  put(' ');
716  printDirectiveBody(Last, End);
717  First = Last;
718  return false;
719 }
720 
721 bool Minimizer::lexPragma(const char *&First, const char *const End) {
722  // #pragma.
723  skipWhitespace(First, End);
724  if (First == End || !isIdentifierHead(*First))
725  return false;
726 
727  IdInfo FoundId = lexIdentifier(First, End);
728  First = FoundId.Last;
729  if (FoundId.Name == "once") {
730  // #pragma once
731  skipLine(First, End);
732  makeToken(pp_pragma_once);
733  append("#pragma once\n");
734  return false;
735  }
736 
737  if (FoundId.Name != "clang") {
738  skipLine(First, End);
739  return false;
740  }
741 
742  // #pragma clang.
743  if (!isNextIdentifier("module", First, End)) {
744  skipLine(First, End);
745  return false;
746  }
747 
748  // #pragma clang module.
749  if (!isNextIdentifier("import", First, End)) {
750  skipLine(First, End);
751  return false;
752  }
753 
754  // #pragma clang module import.
755  makeToken(pp_pragma_import);
756  append("#pragma clang module import ");
757  printDirectiveBody(First, End);
758  return false;
759 }
760 
761 bool Minimizer::lexEndif(const char *&First, const char *const End) {
762  // Strip out "#else" if it's empty.
763  if (top() == pp_else)
764  popToken();
765 
766  // Strip out "#elif" if they're empty.
767  while (top() == pp_elif)
768  popToken();
769 
770  // If "#if" is empty, strip it and skip the "#endif".
771  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
772  popToken();
773  skipLine(First, End);
774  return false;
775  }
776 
777  return lexDefault(pp_endif, "endif", First, End);
778 }
779 
780 bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
781  const char *&First, const char *const End) {
782  makeToken(Kind);
783  put('#').append(Directive).put(' ');
784  printDirectiveBody(First, End);
785  return false;
786 }
787 
788 static bool isStartOfRelevantLine(char First) {
789  switch (First) {
790  case '#':
791  case '@':
792  case 'i':
793  case 'e':
794  case 'm':
795  return true;
796  }
797  return false;
798 }
799 
800 bool Minimizer::lexPPLine(const char *&First, const char *const End) {
801  assert(First != End);
802 
803  skipWhitespace(First, End);
804  assert(First <= End);
805  if (First == End)
806  return false;
807 
808  if (!isStartOfRelevantLine(*First)) {
809  skipLine(First, End);
810  assert(First <= End);
811  return false;
812  }
813 
814  // Handle "@import".
815  if (*First == '@')
816  return lexAt(First, End);
817 
818  if (*First == 'i' || *First == 'e' || *First == 'm')
819  return lexModule(First, End);
820 
821  // Handle preprocessing directives.
822  ++First; // Skip over '#'.
823  skipWhitespace(First, End);
824 
825  if (First == End)
826  return reportError(First, diag::err_pp_expected_eol);
827 
828  if (!isIdentifierHead(*First)) {
829  skipLine(First, End);
830  return false;
831  }
832 
833  // Figure out the token.
834  IdInfo Id = lexIdentifier(First, End);
835  First = Id.Last;
836  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
837  .Case("include", pp_include)
838  .Case("__include_macros", pp___include_macros)
839  .Case("define", pp_define)
840  .Case("undef", pp_undef)
841  .Case("import", pp_import)
842  .Case("include_next", pp_include_next)
843  .Case("if", pp_if)
844  .Case("ifdef", pp_ifdef)
845  .Case("ifndef", pp_ifndef)
846  .Case("elif", pp_elif)
847  .Case("else", pp_else)
848  .Case("endif", pp_endif)
849  .Case("pragma", pp_pragma_import)
850  .Default(pp_none);
851  if (Kind == pp_none) {
852  skipDirective(Id.Name, First, End);
853  return false;
854  }
855 
856  if (Kind == pp_endif)
857  return lexEndif(First, End);
858 
859  if (Kind == pp_define)
860  return lexDefine(First, End);
861 
862  if (Kind == pp_pragma_import)
863  return lexPragma(First, End);
864 
865  // Everything else.
866  return lexDefault(Kind, Id.Name, First, End);
867 }
868 
869 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
870  if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
871  First[2] == '\xbf')
872  First += 3;
873 }
874 
875 bool Minimizer::minimizeImpl(const char *First, const char *const End) {
876  skipUTF8ByteOrderMark(First, End);
877  while (First != End)
878  if (lexPPLine(First, End))
879  return true;
880  return false;
881 }
882 
883 bool Minimizer::minimize() {
884  bool Error = minimizeImpl(Input.begin(), Input.end());
885 
886  if (!Error) {
887  // Add a trailing newline and an EOF on success.
888  if (!Out.empty() && Out.back() != '\n')
889  Out.push_back('\n');
890  makeToken(pp_eof);
891  }
892 
893  // Null-terminate the output. This way the memory buffer that's passed to
894  // Clang will not have to worry about the terminating '\0'.
895  Out.push_back(0);
896  Out.pop_back();
897  return Error;
898 }
899 
902  struct Directive {
903  enum DirectiveKind {
904  If, // if/ifdef/ifndef
905  Else // elif,else
906  };
907  int Offset;
908  DirectiveKind Kind;
909  };
911  for (const Token &T : Input) {
912  switch (T.K) {
913  case pp_if:
914  case pp_ifdef:
915  case pp_ifndef:
916  Offsets.push_back({T.Offset, Directive::If});
917  break;
918 
919  case pp_elif:
920  case pp_else: {
921  if (Offsets.empty())
922  return true;
923  int PreviousOffset = Offsets.back().Offset;
924  Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
925  Offsets.push_back({T.Offset, Directive::Else});
926  break;
927  }
928 
929  case pp_endif: {
930  if (Offsets.empty())
931  return true;
932  int PreviousOffset = Offsets.back().Offset;
933  Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
934  do {
935  Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
936  if (Kind == Directive::If)
937  break;
938  } while (!Offsets.empty());
939  break;
940  }
941  default:
942  break;
943  }
944  }
945  return false;
946 }
947 
949  StringRef Input, SmallVectorImpl<char> &Output,
951  SourceLocation InputSourceLoc) {
952  Output.clear();
953  Tokens.clear();
954  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
955 }
bool minimizeSourceToDependencyDirectives(llvm::StringRef Input, llvm::SmallVectorImpl< char > &Output, llvm::SmallVectorImpl< minimize_source_to_dependency_directives::Token > &Tokens, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Minimize the input down to the preprocessor directives that might have an effect on the dependencies ...
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;.
Definition: CharInfo.h:70
Represents a simplified token that&#39;s lexed as part of the source minimization.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;, &#39;\n&#39;, &#39;\r&#39;.
Definition: CharInfo.h:87
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
static const char * findFirstTrailingSpace(const char *First, const char *Last)
llvm::Error Error
Defines the Diagnostic-related interfaces.
static unsigned isEOL(const char *First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static const char * findLastNonSpace(const char *First, const char *Last)
static LLVM_NODISCARD const char * getIdentifierContinuation(const char *First, const char *const End)
static LLVM_NODISCARD bool isRawStringLiteral(const char *First, const char *Current)
unsigned Offset
Definition: Format.cpp:1809
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
Definition: CharInfo.h:48
SourceLocation End
TokenKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the source m...
int Id
Definition: ASTDiff.cpp:190
static bool isStartOfRelevantLine(char First)
static void skipWhitespace(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
Kind
static bool wasLineContinuation(const char *First, unsigned EOLLen)
Encodes a location in the source.
static void skipBlockComment(const char *&First, const char *const End)
static void skipDirective(StringRef Name, const char *&First, const char *const End)
Dataflow Directional Tag Classes.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: &#39;\n&#39;, &#39;\r&#39;.
Definition: CharInfo.h:78
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
Definition: CharInfo.h:58
static LLVM_NODISCARD const char * lexRawIdentifier(const char *First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static void skipToNewlineRaw(const char *&First, const char *const End)
static void skipLine(const char *&First, const char *const End)
This is the interface for minimizing header and source files to the minimum necessary preprocessor di...
Directive - Abstract class representing a parsed verify directive.
static void skipString(const char *&First, const char *const End)
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
Definition: CharInfo.h:147
bool computeSkippedRanges(ArrayRef< Token > Input, llvm::SmallVectorImpl< SkippedRange > &Range)
Computes the potential source ranges that can be skipped by the preprocessor when skipping a directiv...