clang-tools  10.0.0svn
SemanticHighlighting.cpp
Go to the documentation of this file.
1 //===--- SemanticHighlighting.cpp - ------------------------- ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SemanticHighlighting.h"
10 #include "Logger.h"
11 #include "ParsedAST.h"
12 #include "Protocol.h"
13 #include "SourceCode.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/DeclarationName.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Type.h"
20 #include "clang/AST/TypeLoc.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include <algorithm>
23 
24 namespace clang {
25 namespace clangd {
26 namespace {
27 
28 /// Some names are not written in the source code and cannot be highlighted,
29 /// e.g. anonymous classes. This function detects those cases.
30 bool canHighlightName(DeclarationName Name) {
31  if (Name.getNameKind() == DeclarationName::CXXConstructorName ||
32  Name.getNameKind() == DeclarationName::CXXUsingDirective)
33  return true;
34  auto *II = Name.getAsIdentifierInfo();
35  return II && !II->getName().empty();
36 }
37 
38 llvm::Optional<HighlightingKind> kindForType(const Type *TP);
39 llvm::Optional<HighlightingKind> kindForDecl(const NamedDecl *D) {
40  if (auto *TD = dyn_cast<TemplateDecl>(D)) {
41  if (auto *Templated = TD->getTemplatedDecl())
42  D = Templated;
43  }
44  if (auto *TD = dyn_cast<TypedefNameDecl>(D)) {
45  // We try to highlight typedefs as their underlying type.
46  if (auto K = kindForType(TD->getUnderlyingType().getTypePtrOrNull()))
47  return K;
48  // And fallback to a generic kind if this fails.
50  }
51  // We highlight class decls, constructor decls and destructor decls as
52  // `Class` type. The destructor decls are handled in `VisitTypeLoc` (we
53  // will visit a TypeLoc where the underlying Type is a CXXRecordDecl).
54  if (auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
55  // We don't want to highlight lambdas like classes.
56  if (RD->isLambda())
57  return llvm::None;
59  }
60  if (isa<ClassTemplateDecl>(D) || isa<RecordDecl>(D) ||
61  isa<CXXConstructorDecl>(D))
63  if (auto *MD = dyn_cast<CXXMethodDecl>(D))
64  return MD->isStatic() ? HighlightingKind::StaticMethod
66  if (isa<FieldDecl>(D))
68  if (isa<EnumDecl>(D))
70  if (isa<EnumConstantDecl>(D))
72  if (isa<ParmVarDecl>(D))
74  if (auto *VD = dyn_cast<VarDecl>(D))
75  return VD->isStaticDataMember()
77  : VD->isLocalVarDecl() ? HighlightingKind::LocalVariable
79  if (isa<BindingDecl>(D))
81  if (isa<FunctionDecl>(D))
83  if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D) ||
84  isa<UsingDirectiveDecl>(D))
86  if (isa<TemplateTemplateParmDecl>(D) || isa<TemplateTypeParmDecl>(D) ||
87  isa<NonTypeTemplateParmDecl>(D))
89  return llvm::None;
90 }
91 llvm::Optional<HighlightingKind> kindForType(const Type *TP) {
92  if (!TP)
93  return llvm::None;
94  if (TP->isBuiltinType()) // Builtins are special, they do not have decls.
96  if (auto *TD = dyn_cast<TemplateTypeParmType>(TP))
97  return kindForDecl(TD->getDecl());
98  if (auto *TD = TP->getAsTagDecl())
99  return kindForDecl(TD);
100  return llvm::None;
101 }
102 // Given a set of candidate declarations for an unresolved name,
103 // if the declarations all have the same highlighting kind, return
104 // that highlighting kind, otherwise return None.
105 llvm::Optional<HighlightingKind>
106 kindForCandidateDecls(llvm::iterator_range<UnresolvedSetIterator> Decls) {
107  llvm::Optional<HighlightingKind> Result;
108  for (NamedDecl *Decl : Decls) {
109  auto Kind = kindForDecl(Decl);
110  if (!Kind || (Result && Kind != Result))
111  return llvm::None;
112  Result = Kind;
113  }
114  return Result;
115 }
116 
117 // Collects all semantic tokens in an ASTContext.
118 class HighlightingTokenCollector
119  : public RecursiveASTVisitor<HighlightingTokenCollector> {
120  std::vector<HighlightingToken> Tokens;
121  ParsedAST &AST;
122 
123 public:
124  HighlightingTokenCollector(ParsedAST &AST) : AST(AST) {}
125 
126  std::vector<HighlightingToken> collectTokens() {
127  Tokens.clear();
128  TraverseAST(AST.getASTContext());
129  // Add highlightings for macro expansions as they are not traversed by the
130  // visitor.
131  for (const auto &M : AST.getMacros().Ranges)
132  Tokens.push_back({HighlightingKind::Macro, M});
133  // Initializer lists can give duplicates of tokens, therefore all tokens
134  // must be deduplicated.
135  llvm::sort(Tokens);
136  auto Last = std::unique(Tokens.begin(), Tokens.end());
137  Tokens.erase(Last, Tokens.end());
138  // Macros can give tokens that have the same source range but conflicting
139  // kinds. In this case all tokens sharing this source range should be
140  // removed.
141  std::vector<HighlightingToken> NonConflicting;
142  NonConflicting.reserve(Tokens.size());
143  for (ArrayRef<HighlightingToken> TokRef = Tokens; !TokRef.empty();) {
144  ArrayRef<HighlightingToken> Conflicting =
145  TokRef.take_while([&](const HighlightingToken &T) {
146  // TokRef is guaranteed at least one element here because otherwise
147  // this predicate would never fire.
148  return T.R == TokRef.front().R;
149  });
150  // If there is exactly one token with this range it's non conflicting and
151  // should be in the highlightings.
152  if (Conflicting.size() == 1)
153  NonConflicting.push_back(TokRef.front());
154  // TokRef[Conflicting.size()] is the next token with a different range (or
155  // the end of the Tokens).
156  TokRef = TokRef.drop_front(Conflicting.size());
157  }
158  return NonConflicting;
159  }
160 
161  bool VisitNamespaceAliasDecl(NamespaceAliasDecl *NAD) {
162  // The target namespace of an alias can not be found in any other way.
163  addToken(NAD->getTargetNameLoc(), NAD->getAliasedNamespace());
164  return true;
165  }
166 
167  bool VisitMemberExpr(MemberExpr *ME) {
168  if (canHighlightName(ME->getMemberNameInfo().getName()))
169  addToken(ME->getMemberLoc(), ME->getMemberDecl());
170  return true;
171  }
172 
173  bool VisitOverloadExpr(OverloadExpr *E) {
174  if (canHighlightName(E->getName()))
175  addToken(E->getNameLoc(),
176  kindForCandidateDecls(E->decls())
177  .getValueOr(HighlightingKind::DependentName));
178  return true;
179  }
180 
181  bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
182  if (canHighlightName(E->getDeclName()))
183  addToken(E->getLocation(), HighlightingKind::DependentName);
184  return true;
185  }
186 
187  bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
188  if (canHighlightName(E->getMember()))
189  addToken(E->getMemberLoc(), HighlightingKind::DependentName);
190  return true;
191  }
192 
193  bool VisitNamedDecl(NamedDecl *ND) {
194  if (canHighlightName(ND->getDeclName()))
195  addToken(ND->getLocation(), ND);
196  return true;
197  }
198 
199  bool VisitDeclRefExpr(DeclRefExpr *Ref) {
200  if (canHighlightName(Ref->getNameInfo().getName()))
201  addToken(Ref->getLocation(), Ref->getDecl());
202  return true;
203  }
204 
205  bool VisitTypedefTypeLoc(TypedefTypeLoc TL) {
206  addToken(TL.getBeginLoc(), TL.getTypedefNameDecl());
207  return true;
208  }
209 
210  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) {
211  if (const TemplateDecl *TD =
212  TL.getTypePtr()->getTemplateName().getAsTemplateDecl())
213  addToken(TL.getBeginLoc(), TD);
214  return true;
215  }
216 
217  bool WalkUpFromTagTypeLoc(TagTypeLoc L) {
218  if (L.isDefinition())
219  return true; // Definition will be highligthed by VisitNamedDecl.
220  return RecursiveASTVisitor::WalkUpFromTagTypeLoc(L);
221  }
222 
223  bool WalkUpFromElaboratedTypeLoc(ElaboratedTypeLoc L) {
224  // Avoid highlighting 'struct' or 'enum' keywords.
225  return true;
226  }
227 
228  bool WalkUpFromDependentNameTypeLoc(DependentNameTypeLoc L) {
229  addToken(L.getNameLoc(), HighlightingKind::DependentType);
230  return true;
231  }
232 
233  bool VisitTypeLoc(TypeLoc TL) {
234  if (auto K = kindForType(TL.getTypePtr()))
235  addToken(TL.getBeginLoc(), *K);
236  return true;
237  }
238 
239  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSLoc) {
240  if (auto *NNS = NNSLoc.getNestedNameSpecifier()) {
241  if (NNS->getKind() == NestedNameSpecifier::Namespace ||
242  NNS->getKind() == NestedNameSpecifier::NamespaceAlias)
243  addToken(NNSLoc.getLocalBeginLoc(), HighlightingKind::Namespace);
244  }
245  return RecursiveASTVisitor<
246  HighlightingTokenCollector>::TraverseNestedNameSpecifierLoc(NNSLoc);
247  }
248 
249  bool TraverseConstructorInitializer(CXXCtorInitializer *CI) {
250  if (const FieldDecl *FD = CI->getMember())
251  addToken(CI->getSourceLocation(), FD);
252  return RecursiveASTVisitor<
253  HighlightingTokenCollector>::TraverseConstructorInitializer(CI);
254  }
255 
256  bool VisitDeclaratorDecl(DeclaratorDecl *D) {
257  // Highlight 'auto' with its underlying type.
258  auto *AT = D->getType()->getContainedAutoType();
259  if (!AT)
260  return true;
261  auto K = kindForType(AT->getDeducedType().getTypePtrOrNull());
262  if (!K)
263  return true;
264  addToken(D->getTypeSpecStartLoc(), *K);
265  return true;
266  }
267 
268 private:
269  void addToken(SourceLocation Loc, HighlightingKind Kind) {
270  if (Loc.isInvalid())
271  return;
272  const auto &SM = AST.getSourceManager();
273  if (Loc.isMacroID()) {
274  // Only intereseted in highlighting arguments in macros (DEF_X(arg)).
275  if (!SM.isMacroArgExpansion(Loc))
276  return;
277  Loc = SM.getSpellingLoc(Loc);
278  }
279 
280  // Non top level decls that are included from a header are not filtered by
281  // topLevelDecls. (example: method declarations being included from
282  // another file for a class from another file).
283  // There are also cases with macros where the spelling loc will not be in
284  // the main file and the highlighting would be incorrect.
285  if (!isInsideMainFile(Loc, SM))
286  return;
287 
288  auto R = getTokenRange(SM, AST.getASTContext().getLangOpts(), Loc);
289  if (!R) {
290  // R should always have a value, if it doesn't something is very wrong.
291  elog("Tried to add semantic token with an invalid range");
292  return;
293  }
294 
295  Tokens.push_back({Kind, R.getValue()});
296  }
297 
298  void addToken(SourceLocation Loc, const NamedDecl *D) {
299  if (auto K = kindForDecl(D))
300  addToken(Loc, *K);
301  }
302 };
303 
304 // Encode binary data into base64.
305 // This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp.
306 // FIXME: Factor this out into llvm/Support?
307 std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) {
308  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
309  "abcdefghijklmnopqrstuvwxyz"
310  "0123456789+/";
311  std::string Res;
312  size_t I;
313  for (I = 0; I + 2 < Bytes.size(); I += 3) {
314  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2];
315  Res += Table[(X >> 18) & 63];
316  Res += Table[(X >> 12) & 63];
317  Res += Table[(X >> 6) & 63];
318  Res += Table[X & 63];
319  }
320  if (I + 1 == Bytes.size()) {
321  uint32_t X = (Bytes[I] << 16);
322  Res += Table[(X >> 18) & 63];
323  Res += Table[(X >> 12) & 63];
324  Res += "==";
325  } else if (I + 2 == Bytes.size()) {
326  uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8);
327  Res += Table[(X >> 18) & 63];
328  Res += Table[(X >> 12) & 63];
329  Res += Table[(X >> 6) & 63];
330  Res += "=";
331  }
332  return Res;
333 }
334 
335 void write32be(uint32_t I, llvm::raw_ostream &OS) {
336  std::array<char, 4> Buf;
337  llvm::support::endian::write32be(Buf.data(), I);
338  OS.write(Buf.data(), Buf.size());
339 }
340 
341 void write16be(uint16_t I, llvm::raw_ostream &OS) {
342  std::array<char, 2> Buf;
343  llvm::support::endian::write16be(Buf.data(), I);
344  OS.write(Buf.data(), Buf.size());
345 }
346 
347 // Get the highlightings on \c Line where the first entry of line is at \c
348 // StartLineIt. If it is not at \c StartLineIt an empty vector is returned.
349 ArrayRef<HighlightingToken>
350 takeLine(ArrayRef<HighlightingToken> AllTokens,
351  ArrayRef<HighlightingToken>::iterator StartLineIt, int Line) {
352  return ArrayRef<HighlightingToken>(StartLineIt, AllTokens.end())
353  .take_while([Line](const HighlightingToken &Token) {
354  return Token.R.start.line == Line;
355  });
356 }
357 } // namespace
358 
359 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, HighlightingKind K) {
360  switch (K) {
362  return OS << "Variable";
364  return OS << "LocalVariable";
366  return OS << "Parameter";
368  return OS << "Function";
370  return OS << "Method";
372  return OS << "StaticMethod";
374  return OS << "Field";
376  return OS << "StaticField";
378  return OS << "Class";
380  return OS << "Enum";
382  return OS << "EnumConstant";
384  return OS << "Typedef";
386  return OS << "DependentType";
388  return OS << "DependentName";
390  return OS << "Namespace";
392  return OS << "TemplateParameter";
394  return OS << "Primitive";
396  return OS << "Macro";
397  }
398  llvm_unreachable("invalid HighlightingKind");
399 }
400 
401 std::vector<LineHighlightings>
402 diffHighlightings(ArrayRef<HighlightingToken> New,
403  ArrayRef<HighlightingToken> Old) {
404  assert(std::is_sorted(New.begin(), New.end()) &&
405  "New must be a sorted vector");
406  assert(std::is_sorted(Old.begin(), Old.end()) &&
407  "Old must be a sorted vector");
408 
409  // FIXME: There's an edge case when tokens span multiple lines. If the first
410  // token on the line started on a line above the current one and the rest of
411  // the line is the equal to the previous one than we will remove all
412  // highlights but the ones for the token spanning multiple lines. This means
413  // that when we get into the LSP layer the only highlights that will be
414  // visible are the ones for the token spanning multiple lines.
415  // Example:
416  // EndOfMultilineToken Token Token Token
417  // If "Token Token Token" don't differ from previously the line is
418  // incorrectly removed. Suggestion to fix is to separate any multiline tokens
419  // into one token for every line it covers. This requires reading from the
420  // file buffer to figure out the length of each line though.
421  std::vector<LineHighlightings> DiffedLines;
422  // ArrayRefs to the current line in the highlightings.
423  ArrayRef<HighlightingToken> NewLine(New.begin(),
424  /*length*/ static_cast<size_t>(0));
425  ArrayRef<HighlightingToken> OldLine(Old.begin(),
426  /*length*/ static_cast<size_t>(0));
427  auto NewEnd = New.end();
428  auto OldEnd = Old.end();
429  auto NextLineNumber = [&]() {
430  int NextNew = NewLine.end() != NewEnd ? NewLine.end()->R.start.line
431  : std::numeric_limits<int>::max();
432  int NextOld = OldLine.end() != OldEnd ? OldLine.end()->R.start.line
433  : std::numeric_limits<int>::max();
434  return std::min(NextNew, NextOld);
435  };
436 
437  for (int LineNumber = 0; NewLine.end() < NewEnd || OldLine.end() < OldEnd;
438  LineNumber = NextLineNumber()) {
439  NewLine = takeLine(New, NewLine.end(), LineNumber);
440  OldLine = takeLine(Old, OldLine.end(), LineNumber);
441  if (NewLine != OldLine)
442  DiffedLines.push_back({LineNumber, NewLine});
443  }
444 
445  return DiffedLines;
446 }
447 
449  return std::tie(L.R, L.Kind) == std::tie(R.R, R.Kind);
450 }
452  return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
453 }
455  return std::tie(L.Line, L.Tokens) == std::tie(R.Line, R.Tokens);
456 }
457 
458 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) {
459  return HighlightingTokenCollector(AST).collectTokens();
460 }
461 
462 std::vector<SemanticHighlightingInformation>
463 toSemanticHighlightingInformation(llvm::ArrayRef<LineHighlightings> Tokens) {
464  if (Tokens.size() == 0)
465  return {};
466 
467  // FIXME: Tokens might be multiple lines long (block comments) in this case
468  // this needs to add multiple lines for those tokens.
469  std::vector<SemanticHighlightingInformation> Lines;
470  Lines.reserve(Tokens.size());
471  for (const auto &Line : Tokens) {
472  llvm::SmallVector<char, 128> LineByteTokens;
473  llvm::raw_svector_ostream OS(LineByteTokens);
474  for (const auto &Token : Line.Tokens) {
475  // Writes the token to LineByteTokens in the byte format specified by the
476  // LSP proposal. Described below.
477  // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->|
478  // | character | length | index |
479 
480  write32be(Token.R.start.character, OS);
481  write16be(Token.R.end.character - Token.R.start.character, OS);
482  write16be(static_cast<int>(Token.Kind), OS);
483  }
484 
485  Lines.push_back({Line.Line, encodeBase64(LineByteTokens)});
486  }
487 
488  return Lines;
489 }
490 
491 llvm::StringRef toTextMateScope(HighlightingKind Kind) {
492  // FIXME: Add scopes for C and Objective C.
493  switch (Kind) {
495  return "entity.name.function.cpp";
497  return "entity.name.function.method.cpp";
499  return "entity.name.function.method.static.cpp";
501  return "variable.other.cpp";
503  return "variable.other.local.cpp";
505  return "variable.parameter.cpp";
507  return "variable.other.field.cpp";
509  return "variable.other.field.static.cpp";
511  return "entity.name.type.class.cpp";
513  return "entity.name.type.enum.cpp";
515  return "variable.other.enummember.cpp";
517  return "entity.name.type.typedef.cpp";
519  return "entity.name.type.dependent.cpp";
521  return "entity.name.other.dependent.cpp";
523  return "entity.name.namespace.cpp";
525  return "entity.name.type.template.cpp";
527  return "storage.type.primitive.cpp";
529  return "entity.name.function.preprocessor.cpp";
530  }
531  llvm_unreachable("unhandled HighlightingKind");
532 }
533 
534 } // namespace clangd
535 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
std::vector< HighlightingToken > Tokens
bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM)
Returns true iff Loc is inside the main file.
Definition: SourceCode.cpp:537
llvm::StringRef toTextMateScope(HighlightingKind Kind)
Converts a HighlightingKind to a corresponding TextMate scope (https://manual.macromates.com/en/language_grammars).
Documents should not be synced at all.
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
BindArgumentKind Kind
std::vector< SemanticHighlightingInformation > toSemanticHighlightingInformation(llvm::ArrayRef< LineHighlightings > Tokens)
Convert to LSP&#39;s semantic highlighting information.
bool operator<(const Ref &L, const Ref &R)
Definition: Ref.h:58
bool operator==(const Ref &L, const Ref &R)
Definition: Ref.h:61
StringRef Tokens
static constexpr llvm::StringLiteral Name
std::vector< LineHighlightings > diffHighlightings(ArrayRef< HighlightingToken > New, ArrayRef< HighlightingToken > Old)
Return a line-by-line diff between two highlightings.
const Decl * D
Definition: XRefs.cpp:849
llvm::Optional< Range > getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc)
Returns the taken range at TokLoc.
Definition: SourceCode.cpp:229
Stores and provides access to parsed AST.
Definition: ParsedAST.h:46
llvm::SmallDenseMap< const Decl *, RelSet > Decls
Definition: FindTarget.cpp:88
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
Contains all information about highlightings on a single line.
static GeneratorRegistry::Add< MDGenerator > MD(MDGenerator::Format, "Generator for MD output.")
std::vector< HighlightingToken > getSemanticHighlightings(ParsedAST &AST)
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
NodeType Type
unsigned Lines