clang  10.0.0svn
SortJavaScriptImports.cpp
Go to the documentation of this file.
1 //===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a sort operation for JavaScript ES6 imports.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "SortJavaScriptImports.h"
15 #include "TokenAnalyzer.h"
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Basic/LLVM.h"
22 #include "clang/Format/Format.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Support/Debug.h"
26 #include <algorithm>
27 #include <string>
28 
29 #define DEBUG_TYPE "format-formatter"
30 
31 namespace clang {
32 namespace format {
33 
34 class FormatTokenLexer;
35 
37 
38 // An imported symbol in a JavaScript ES6 import/export, possibly aliased.
40  StringRef Symbol;
41  StringRef Alias;
43 
44  bool operator==(const JsImportedSymbol &RHS) const {
45  // Ignore Range for comparison, it is only used to stitch code together,
46  // but imports at different code locations are still conceptually the same.
47  return Symbol == RHS.Symbol && Alias == RHS.Alias;
48  }
49 };
50 
51 // An ES6 module reference.
52 //
53 // ES6 implements a module system, where individual modules (~= source files)
54 // can reference other modules, either importing symbols from them, or exporting
55 // symbols from them:
56 // import {foo} from 'foo';
57 // export {foo};
58 // export {bar} from 'bar';
59 //
60 // `export`s with URLs are syntactic sugar for an import of the symbol from the
61 // URL, followed by an export of the symbol, allowing this code to treat both
62 // statements more or less identically, with the exception being that `export`s
63 // are sorted last.
64 //
65 // imports and exports support individual symbols, but also a wildcard syntax:
66 // import * as prefix from 'foo';
67 // export * from 'bar';
68 //
69 // This struct represents both exports and imports to build up the information
70 // required for sorting module references.
72  bool IsExport = false;
73  // Module references are sorted into these categories, in order.
75  SIDE_EFFECT, // "import 'something';"
76  ABSOLUTE, // from 'something'
77  RELATIVE_PARENT, // from '../*'
78  RELATIVE, // from './*'
79  };
80  ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
81  // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
82  StringRef URL;
83  // Prefix from "import * as prefix". Empty for symbol imports and `export *`.
84  // Implies an empty names list.
85  StringRef Prefix;
86  // Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
88  // Textual position of the import/export, including preceding and trailing
89  // comments.
91 };
92 
93 bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
94  if (LHS.IsExport != RHS.IsExport)
95  return LHS.IsExport < RHS.IsExport;
96  if (LHS.Category != RHS.Category)
97  return LHS.Category < RHS.Category;
98  if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
99  // Side effect imports might be ordering sensitive. Consider them equal so
100  // that they maintain their relative order in the stable sort below.
101  // This retains transitivity because LHS.Category == RHS.Category here.
102  return false;
103  // Empty URLs sort *last* (for export {...};).
104  if (LHS.URL.empty() != RHS.URL.empty())
105  return LHS.URL.empty() < RHS.URL.empty();
106  if (int Res = LHS.URL.compare_lower(RHS.URL))
107  return Res < 0;
108  // '*' imports (with prefix) sort before {a, b, ...} imports.
109  if (LHS.Prefix.empty() != RHS.Prefix.empty())
110  return LHS.Prefix.empty() < RHS.Prefix.empty();
111  if (LHS.Prefix != RHS.Prefix)
112  return LHS.Prefix > RHS.Prefix;
113  return false;
114 }
115 
116 // JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
117 // implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
118 // structure, making it messy to sort them using regular expressions.
120 public:
122  : TokenAnalyzer(Env, Style),
123  FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
124 
125  std::pair<tooling::Replacements, unsigned>
127  SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
128  FormatTokenLexer &Tokens) override {
129  tooling::Replacements Result;
130  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
131 
132  const AdditionalKeywords &Keywords = Tokens.getKeywords();
134  AnnotatedLine *FirstNonImportLine;
135  std::tie(References, FirstNonImportLine) =
136  parseModuleReferences(Keywords, AnnotatedLines);
137 
138  if (References.empty())
139  return {Result, 0};
140 
142  for (unsigned i = 0, e = References.size(); i != e; ++i)
143  Indices.push_back(i);
144  llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) {
145  return References[LHSI] < References[RHSI];
146  });
147  bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end());
148 
149  std::string ReferencesText;
150  bool SymbolsInOrder = true;
151  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
152  JsModuleReference Reference = References[Indices[i]];
153  if (appendReference(ReferencesText, Reference))
154  SymbolsInOrder = false;
155  if (i + 1 < e) {
156  // Insert breaks between imports and exports.
157  ReferencesText += "\n";
158  // Separate imports groups with two line breaks, but keep all exports
159  // in a single group.
160  if (!Reference.IsExport &&
161  (Reference.IsExport != References[Indices[i + 1]].IsExport ||
162  Reference.Category != References[Indices[i + 1]].Category))
163  ReferencesText += "\n";
164  }
165  }
166 
167  if (ReferencesInOrder && SymbolsInOrder)
168  return {Result, 0};
169 
170  SourceRange InsertionPoint = References[0].Range;
171  InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
172 
173  // The loop above might collapse previously existing line breaks between
174  // import blocks, and thus shrink the file. SortIncludes must not shrink
175  // overall source length as there is currently no re-calculation of ranges
176  // after applying source sorting.
177  // This loop just backfills trailing spaces after the imports, which are
178  // harmless and will be stripped by the subsequent formatting pass.
179  // FIXME: A better long term fix is to re-calculate Ranges after sorting.
180  unsigned PreviousSize = getSourceText(InsertionPoint).size();
181  while (ReferencesText.size() < PreviousSize) {
182  ReferencesText += " ";
183  }
184 
185  // Separate references from the main code body of the file.
186  if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
187  ReferencesText += "\n";
188 
189  LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n"
190  << getSourceText(InsertionPoint) << "\nwith:\n"
191  << ReferencesText << "\n");
192  auto Err = Result.add(tooling::Replacement(
193  Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
194  ReferencesText));
195  // FIXME: better error handling. For now, just print error message and skip
196  // the replacement for the release version.
197  if (Err) {
198  llvm::errs() << llvm::toString(std::move(Err)) << "\n";
199  assert(false);
200  }
201 
202  return {Result, 0};
203  }
204 
205 private:
206  FormatToken *Current;
207  FormatToken *LineEnd;
208 
209  FormatToken invalidToken;
210 
211  StringRef FileContents;
212 
213  void skipComments() { Current = skipComments(Current); }
214 
215  FormatToken *skipComments(FormatToken *Tok) {
216  while (Tok && Tok->is(tok::comment))
217  Tok = Tok->Next;
218  return Tok;
219  }
220 
221  void nextToken() {
222  Current = Current->Next;
223  skipComments();
224  if (!Current || Current == LineEnd->Next) {
225  // Set the current token to an invalid token, so that further parsing on
226  // this line fails.
227  invalidToken.Tok.setKind(tok::unknown);
228  Current = &invalidToken;
229  }
230  }
231 
232  StringRef getSourceText(SourceRange Range) {
233  return getSourceText(Range.getBegin(), Range.getEnd());
234  }
235 
236  StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
237  const SourceManager &SM = Env.getSourceManager();
238  return FileContents.substr(SM.getFileOffset(Begin),
239  SM.getFileOffset(End) - SM.getFileOffset(Begin));
240  }
241 
242  // Appends ``Reference`` to ``Buffer``, returning true if text within the
243  // ``Reference`` changed (e.g. symbol order).
244  bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
245  // Sort the individual symbols within the import.
246  // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
247  SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
248  llvm::stable_sort(
249  Symbols, [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
250  return LHS.Symbol.compare_lower(RHS.Symbol) < 0;
251  });
252  if (Symbols == Reference.Symbols) {
253  // No change in symbol order.
254  StringRef ReferenceStmt = getSourceText(Reference.Range);
255  Buffer += ReferenceStmt;
256  return false;
257  }
258  // Stitch together the module reference start...
259  SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin();
260  SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd();
261  Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart);
262  // ... then the references in order ...
263  for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
264  if (I != Symbols.begin())
265  Buffer += ",";
266  Buffer += getSourceText(I->Range);
267  }
268  // ... followed by the module reference end.
269  Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd());
270  return true;
271  }
272 
273  // Parses module references in the given lines. Returns the module references,
274  // and a pointer to the first "main code" line if that is adjacent to the
275  // affected lines of module references, nullptr otherwise.
276  std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>
277  parseModuleReferences(const AdditionalKeywords &Keywords,
278  SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
280  SourceLocation Start;
281  AnnotatedLine *FirstNonImportLine = nullptr;
282  bool AnyImportAffected = false;
283  for (auto Line : AnnotatedLines) {
284  Current = Line->First;
285  LineEnd = Line->Last;
286  skipComments();
287  if (Start.isInvalid() || References.empty())
288  // After the first file level comment, consider line comments to be part
289  // of the import that immediately follows them by using the previously
290  // set Start.
291  Start = Line->First->Tok.getLocation();
292  if (!Current) {
293  // Only comments on this line. Could be the first non-import line.
294  FirstNonImportLine = Line;
295  continue;
296  }
297  JsModuleReference Reference;
298  Reference.Range.setBegin(Start);
299  if (!parseModuleReference(Keywords, Reference)) {
300  if (!FirstNonImportLine)
301  FirstNonImportLine = Line; // if no comment before.
302  break;
303  }
304  FirstNonImportLine = nullptr;
305  AnyImportAffected = AnyImportAffected || Line->Affected;
306  Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
307  LLVM_DEBUG({
308  llvm::dbgs() << "JsModuleReference: {"
309  << "is_export: " << Reference.IsExport
310  << ", cat: " << Reference.Category
311  << ", url: " << Reference.URL
312  << ", prefix: " << Reference.Prefix;
313  for (size_t i = 0; i < Reference.Symbols.size(); ++i)
314  llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
315  << Reference.Symbols[i].Alias;
316  llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
317  llvm::dbgs() << "}\n";
318  });
319  References.push_back(Reference);
320  Start = SourceLocation();
321  }
322  // Sort imports if any import line was affected.
323  if (!AnyImportAffected)
324  References.clear();
325  return std::make_pair(References, FirstNonImportLine);
326  }
327 
328  // Parses a JavaScript/ECMAScript 6 module reference.
329  // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
330  // for grammar EBNF (production ModuleItem).
331  bool parseModuleReference(const AdditionalKeywords &Keywords,
332  JsModuleReference &Reference) {
333  if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
334  return false;
335  Reference.IsExport = Current->is(tok::kw_export);
336 
337  nextToken();
338  if (Current->isStringLiteral() && !Reference.IsExport) {
339  // "import 'side-effect';"
340  Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
341  Reference.URL =
342  Current->TokenText.substr(1, Current->TokenText.size() - 2);
343  return true;
344  }
345 
346  if (!parseModuleBindings(Keywords, Reference))
347  return false;
348 
349  if (Current->is(Keywords.kw_from)) {
350  // imports have a 'from' clause, exports might not.
351  nextToken();
352  if (!Current->isStringLiteral())
353  return false;
354  // URL = TokenText without the quotes.
355  Reference.URL =
356  Current->TokenText.substr(1, Current->TokenText.size() - 2);
357  if (Reference.URL.startswith(".."))
358  Reference.Category =
359  JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
360  else if (Reference.URL.startswith("."))
361  Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
362  else
363  Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
364  } else {
365  // w/o URL groups with "empty".
366  Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
367  }
368  return true;
369  }
370 
371  bool parseModuleBindings(const AdditionalKeywords &Keywords,
372  JsModuleReference &Reference) {
373  if (parseStarBinding(Keywords, Reference))
374  return true;
375  return parseNamedBindings(Keywords, Reference);
376  }
377 
378  bool parseStarBinding(const AdditionalKeywords &Keywords,
379  JsModuleReference &Reference) {
380  // * as prefix from '...';
381  if (Current->isNot(tok::star))
382  return false;
383  nextToken();
384  if (Current->isNot(Keywords.kw_as))
385  return false;
386  nextToken();
387  if (Current->isNot(tok::identifier))
388  return false;
389  Reference.Prefix = Current->TokenText;
390  nextToken();
391  return true;
392  }
393 
394  bool parseNamedBindings(const AdditionalKeywords &Keywords,
395  JsModuleReference &Reference) {
396  if (Current->is(tok::identifier)) {
397  nextToken();
398  if (Current->is(Keywords.kw_from))
399  return true;
400  if (Current->isNot(tok::comma))
401  return false;
402  nextToken(); // eat comma.
403  }
404  if (Current->isNot(tok::l_brace))
405  return false;
406 
407  // {sym as alias, sym2 as ...} from '...';
408  while (Current->isNot(tok::r_brace)) {
409  nextToken();
410  if (Current->is(tok::r_brace))
411  break;
412  if (!Current->isOneOf(tok::identifier, tok::kw_default))
413  return false;
414 
416  Symbol.Symbol = Current->TokenText;
417  // Make sure to include any preceding comments.
418  Symbol.Range.setBegin(
420  nextToken();
421 
422  if (Current->is(Keywords.kw_as)) {
423  nextToken();
424  if (!Current->isOneOf(tok::identifier, tok::kw_default))
425  return false;
426  Symbol.Alias = Current->TokenText;
427  nextToken();
428  }
429  Symbol.Range.setEnd(Current->Tok.getLocation());
430  Reference.Symbols.push_back(Symbol);
431 
432  if (!Current->isOneOf(tok::r_brace, tok::comma))
433  return false;
434  }
435  nextToken(); // consume r_brace
436  return true;
437  }
438 };
439 
441  StringRef Code,
443  StringRef FileName) {
444  // FIXME: Cursor support.
445  return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style)
446  .process()
447  .first;
448 }
449 
450 } // end namespace format
451 } // end namespace clang
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
Token Tok
The Token.
Definition: FormatToken.h:133
Defines the SourceManager interface.
llvm::Error add(const Replacement &R)
Adds a new replacement R to the current set of replacements.
const AdditionalKeywords & getKeywords()
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:209
void setBegin(SourceLocation b)
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:139
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:298
JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
bool operator==(const JsImportedSymbol &RHS) const
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
This file implements a token annotator, i.e.
int Category
Definition: Format.cpp:1729
void setKind(tok::TokenKind K)
Definition: Token.h:93
bool isNot(T Kind) const
Definition: FormatToken.h:328
const FormatToken & Tok
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
Defines the Diagnostic-related interfaces.
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:487
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:321
A text replacement.
Definition: Replacement.h:83
Determines extra information about the tokens comprising an UnwrappedLine.
SourceLocation End
const AnnotatedLine * Line
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:126
SourceLocation Begin
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:129
SourceLocation getEnd() const
const SourceManager & SM
Definition: Format.cpp:1586
static CharSourceRange getCharRange(SourceRange R)
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
Encodes a location in the source.
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:312
Various functions to configurably format source code.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:683
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:146
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:177
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:49
Dataflow Directional Tag Classes.
std::pair< tooling::Replacements, unsigned > process()
This file implements a sorter for JavaScript ES6 imports.
bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS)
SmallVector< JsImportedSymbol, 1 > Symbols
This file declares an abstract TokenAnalyzer, and associated helper classes.
Defines the clang::SourceLocation class and associated facilities.
void setEnd(SourceLocation e)
A trivial tuple used to represent a source range.
tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, StringRef Code, ArrayRef< tooling::Range > Ranges, StringRef FileName)
bool isStringLiteral() const
Definition: FormatToken.h:361
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
const FormatStyle & Style
SourceLocation getEndLoc() const
Definition: Token.h:153