clang-tools  15.0.0git
Token.h
Go to the documentation of this file.
1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Token objects represent a characteristic of a symbol, which can be used to
11 /// perform efficient search. Tokens are keys for inverted index which are
12 /// mapped to the corresponding posting lists.
13 ///
14 /// The symbol std::cout might have the tokens:
15 /// * Scope "std::"
16 /// * Trigram "cou"
17 /// * Trigram "out"
18 /// * Type "std::ostream"
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
24 
25 #include "llvm/Support/raw_ostream.h"
26 #include <string>
27 #include <vector>
28 
29 namespace clang {
30 namespace clangd {
31 namespace dex {
32 
33 /// A Token represents an attribute of a symbol, such as a particular trigram
34 /// present in the name (used for fuzzy search).
35 ///
36 /// Tokens can be used to perform more sophisticated search queries by
37 /// constructing complex iterator trees.
38 class Token {
39 public:
40  /// Kind specifies Token type which defines semantics for the internal
41  /// representation. Each Kind has different representation stored in Data
42  /// field.
43  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
44  // strings. For example, PathURI store URIs of each directory and its parents,
45  // which induces a lot of overhead because these paths tend to be long and
46  // each parent directory is a prefix.
47  enum class Kind {
48  /// Represents trigram used for fuzzy search of unqualified symbol names.
49  ///
50  /// Data contains 3 bytes with trigram contents.
51  Trigram,
52  /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
53  ///
54  /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
55  /// scope).
56  Scope,
57  /// Path Proximity URI to symbol declaration.
58  ///
59  /// Data stores path URI of symbol declaration file or its parent.
60  ///
61  /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
62  /// and some amount of its parents.
64  /// Type of symbol (see `Symbol::Type`).
65  Type,
66  /// Internal Token type for invalid/special tokens, e.g. empty tokens for
67  /// llvm::DenseMap.
68  Sentinel,
69  };
70 
71  Token(Kind TokenKind, llvm::StringRef Data)
72  : Data(Data), TokenKind(TokenKind) {}
73 
74  bool operator==(const Token &Other) const {
75  return TokenKind == Other.TokenKind && Data == Other.Data;
76  }
77 
78  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
79  switch (T.TokenKind) {
80  case Kind::Trigram:
81  OS << "T=";
82  break;
83  case Kind::Scope:
84  OS << "S=";
85  break;
86  case Kind::ProximityURI:
87  OS << "U=";
88  break;
89  case Kind::Type:
90  OS << "Ty=";
91  break;
92  case Kind::Sentinel:
93  OS << "?=";
94  break;
95  }
96  return OS << T.Data;
97  }
98 
99 private:
100  /// Representation which is unique among Token with the same Kind.
101  std::string Data;
102  Kind TokenKind;
103 
104  friend llvm::hash_code hash_value(const Token &Token) {
105  return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
106  }
107 };
108 
109 } // namespace dex
110 } // namespace clangd
111 } // namespace clang
112 
113 namespace llvm {
114 
115 // Support Tokens as DenseMap keys.
116 template <> struct DenseMapInfo<clang::clangd::dex::Token> {
118  return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
119  }
120 
122  return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
123  }
124 
125  static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
126  return hash_value(Tag);
127  }
128 
129  static bool isEqual(const clang::clangd::dex::Token &LHS,
130  const clang::clangd::dex::Token &RHS) {
131  return LHS == RHS;
132  }
133 };
134 
135 } // namespace llvm
136 
137 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
llvm
Some operations such as code completion produce a set of candidates.
Definition: YAMLGenerator.cpp:28
llvm::DenseMapInfo< clang::clangd::dex::Token >::isEqual
static bool isEqual(const clang::clangd::dex::Token &LHS, const clang::clangd::dex::Token &RHS)
Definition: Token.h:129
clang::clangd::dex::Token::operator==
bool operator==(const Token &Other) const
Definition: Token.h:74
clang::clangd::hash_value
llvm::hash_code hash_value(const SymbolID &ID)
Definition: SymbolID.h:62
clang::clangd::dex::Token::Token
Token(Kind TokenKind, llvm::StringRef Data)
Definition: Token.h:71
llvm::DenseMapInfo< clang::clangd::dex::Token >::getHashValue
static unsigned getHashValue(const clang::clangd::dex::Token &Tag)
Definition: Token.h:125
llvm::DenseMapInfo< clang::clangd::dex::Token >::getEmptyKey
static clang::clangd::dex::Token getEmptyKey()
Definition: Token.h:117
clang::clangd::dex::Token::Kind::Scope
@ Scope
Scope primitives, e.g.
clang::clangd::dex::Token::Kind::ProximityURI
@ ProximityURI
Path Proximity URI to symbol declaration.
clang::clangd::dex::Token::Kind::Type
@ Type
Type of symbol (see Symbol::Type).
clang::clangd::dex::Token
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:38
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
clang::clangd::dex::Token::Kind::Sentinel
@ Sentinel
Internal Token type for invalid/special tokens, e.g.
clang::clangd::dex::Token::Kind
Kind
Kind specifies Token type which defines semantics for the internal representation.
Definition: Token.h:47
clang::clangd::dex::Token::hash_value
friend llvm::hash_code hash_value(const Token &Token)
Definition: Token.h:104
Tag
HTMLTag Tag
Definition: HTMLGenerator.cpp:90
clang::clangd::dex::Token::Kind::Trigram
@ Trigram
Represents trigram used for fuzzy search of unqualified symbol names.
llvm::DenseMapInfo< clang::clangd::dex::Token >::getTombstoneKey
static clang::clangd::dex::Token getTombstoneKey()
Definition: Token.h:121
clang::clangd::dex::Token::operator<<
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Token &T)
Definition: Token.h:78