clang-tools  10.0.0svn
Token.h
Go to the documentation of this file.
1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Token objects represent a characteristic of a symbol, which can be used to
11 /// perform efficient search. Tokens are keys for inverted index which are
12 /// mapped to the corresponding posting lists.
13 ///
14 /// The symbol std::cout might have the tokens:
15 /// * Scope "std::"
16 /// * Trigram "cou"
17 /// * Trigram "out"
18 /// * Type "std::ostream"
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
24 
25 #include "index/Index.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <string>
29 #include <vector>
30 
31 namespace clang {
32 namespace clangd {
33 namespace dex {
34 
35 /// A Token represents an attribute of a symbol, such as a particular trigram
36 /// present in the name (used for fuzzy search).
37 ///
38 /// Tokens can be used to perform more sophisticated search queries by
39 /// constructing complex iterator trees.
40 class Token {
41 public:
42  /// Kind specifies Token type which defines semantics for the internal
43  /// representation. Each Kind has different representation stored in Data
44  /// field.
45  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
46  // strings. For example, PathURI store URIs of each directory and its parents,
47  // which induces a lot of overhead because these paths tend to be long and
48  // each parent directory is a prefix.
49  enum class Kind {
50  /// Represents trigram used for fuzzy search of unqualified symbol names.
51  ///
52  /// Data contains 3 bytes with trigram contents.
53  Trigram,
54  /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
55  ///
56  /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
57  /// scope).
58  Scope,
59  /// Path Proximity URI to symbol declaration.
60  ///
61  /// Data stores path URI of symbol declaration file or its parent.
62  ///
63  /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
64  /// and some amount of its parents.
66  /// Type of symbol (see `Symbol::Type`).
67  Type,
68  /// Internal Token type for invalid/special tokens, e.g. empty tokens for
69  /// llvm::DenseMap.
70  Sentinel,
71  };
72 
73  Token(Kind TokenKind, llvm::StringRef Data)
74  : Data(Data), TokenKind(TokenKind) {}
75 
76  bool operator==(const Token &Other) const {
77  return TokenKind == Other.TokenKind && Data == Other.Data;
78  }
79 
80  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
81  switch (T.TokenKind) {
82  case Kind::Trigram:
83  OS << "T=";
84  break;
85  case Kind::Scope:
86  OS << "S=";
87  break;
88  case Kind::ProximityURI:
89  OS << "U=";
90  break;
91  case Kind::Type:
92  OS << "Ty=";
93  break;
94  case Kind::Sentinel:
95  OS << "?=";
96  break;
97  }
98  return OS << T.Data;
99  }
100 
101 private:
102  /// Representation which is unique among Token with the same Kind.
103  std::string Data;
104  Kind TokenKind;
105 
106  friend llvm::hash_code hash_value(const Token &Token) {
107  return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
108  }
109 };
110 
111 } // namespace dex
112 } // namespace clangd
113 } // namespace clang
114 
115 namespace llvm {
116 
117 // Support Tokens as DenseMap keys.
118 template <> struct DenseMapInfo<clang::clangd::dex::Token> {
120  return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
121  }
122 
124  return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
125  }
126 
127  static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
128  return hash_value(Tag);
129  }
130 
131  static bool isEqual(const clang::clangd::dex::Token &LHS,
132  const clang::clangd::dex::Token &RHS) {
133  return LHS == RHS;
134  }
135 };
136 
137 } // namespace llvm
138 
139 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
Some operations such as code completion produce a set of candidates.
static unsigned getHashValue(const clang::clangd::dex::Token &Tag)
Definition: Token.h:127
HTMLTag Tag
Path Proximity URI to symbol declaration.
Represents trigram used for fuzzy search of unqualified symbol names.
friend llvm::hash_code hash_value(const Token &Token)
Definition: Token.h:106
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Token &T)
Definition: Token.h:80
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
bool operator==(const Token &Other) const
Definition: Token.h:76
static bool isEqual(const clang::clangd::dex::Token &LHS, const clang::clangd::dex::Token &RHS)
Definition: Token.h:131
static clang::clangd::dex::Token getEmptyKey()
Definition: Token.h:119
Kind
Kind specifies Token type which defines semantics for the internal representation.
Definition: Token.h:49
Internal Token type for invalid/special tokens, e.g.
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:40
Type of symbol (see Symbol::Type).
Token(Kind TokenKind, llvm::StringRef Data)
Definition: Token.h:73
static clang::clangd::dex::Token getTombstoneKey()
Definition: Token.h:123