clang-tools 20.0.0git
index/dex/Token.h
Go to the documentation of this file.
1//===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Token objects represent a characteristic of a symbol, which can be used to
11/// perform efficient search. Tokens are keys for inverted index which are
12/// mapped to the corresponding posting lists.
13///
14/// The symbol std::cout might have the tokens:
15/// * Scope "std::"
16/// * Trigram "cou"
17/// * Trigram "out"
18/// * Type "std::ostream"
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
23#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
24
25#include "llvm/ADT/Hashing.h"
26#include "llvm/Support/raw_ostream.h"
27#include <string>
28#include <vector>
29
30namespace clang {
31namespace clangd {
32namespace dex {
33
34/// A Token represents an attribute of a symbol, such as a particular trigram
35/// present in the name (used for fuzzy search).
36///
37/// Tokens can be used to perform more sophisticated search queries by
38/// constructing complex iterator trees.
39class Token {
40public:
41 /// Kind specifies Token type which defines semantics for the internal
42 /// representation. Each Kind has different representation stored in Data
43 /// field.
44 // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
45 // strings. For example, PathURI store URIs of each directory and its parents,
46 // which induces a lot of overhead because these paths tend to be long and
47 // each parent directory is a prefix.
48 enum class Kind {
49 /// Represents trigram used for fuzzy search of unqualified symbol names.
50 ///
51 /// Data contains 3 bytes with trigram contents.
52 Trigram,
53 /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
54 ///
55 /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
56 /// scope).
57 Scope,
58 /// Path Proximity URI to symbol declaration.
59 ///
60 /// Data stores path URI of symbol declaration file or its parent.
61 ///
62 /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
63 /// and some amount of its parents.
65 /// Type of symbol (see `Symbol::Type`).
66 Type,
67 /// Internal Token type for invalid/special tokens, e.g. empty tokens for
68 /// llvm::DenseMap.
70 };
71
72 Token(Kind TokenKind, llvm::StringRef Data)
73 : Data(Data), TokenKind(TokenKind) {}
74
75 bool operator==(const Token &Other) const {
76 return TokenKind == Other.TokenKind && Data == Other.Data;
77 }
78
79 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
80 switch (T.TokenKind) {
81 case Kind::Trigram:
82 OS << "T=";
83 break;
84 case Kind::Scope:
85 OS << "S=";
86 break;
88 OS << "U=";
89 break;
90 case Kind::Type:
91 OS << "Ty=";
92 break;
93 case Kind::Sentinel:
94 OS << "?=";
95 break;
96 }
97 return OS << T.Data;
98 }
99
100private:
101 /// Representation which is unique among Token with the same Kind.
102 std::string Data;
103 Kind TokenKind;
104
105 friend llvm::hash_code hash_value(const Token &Token) {
106 return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
107 }
108};
109
110} // namespace dex
111} // namespace clangd
112} // namespace clang
113
114namespace llvm {
115
116// Support Tokens as DenseMap keys.
117template <> struct DenseMapInfo<clang::clangd::dex::Token> {
120 }
121
123 return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
124 }
125
127 return hash_value(Tag);
128 }
129
130 static bool isEqual(const clang::clangd::dex::Token &LHS,
131 const clang::clangd::dex::Token &RHS) {
132 return LHS == RHS;
133 }
134};
135
136} // namespace llvm
137
138#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
llvm::raw_ostream & OS
HTMLTag Tag
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Token(Kind TokenKind, llvm::StringRef Data)
bool operator==(const Token &Other) const
Kind
Kind specifies Token type which defines semantics for the internal representation.
@ ProximityURI
Path Proximity URI to symbol declaration.
@ Scope
Scope primitives, e.g.
@ Sentinel
Internal Token type for invalid/special tokens, e.g.
@ Trigram
Represents trigram used for fuzzy search of unqualified symbol names.
@ Type
Type of symbol (see Symbol::Type).
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Token &T)
friend llvm::hash_code hash_value(const Token &Token)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Some operations such as code completion produce a set of candidates.
Definition: Generators.h:58
static clang::clangd::dex::Token getTombstoneKey()
static clang::clangd::dex::Token getEmptyKey()
static unsigned getHashValue(const clang::clangd::dex::Token &Tag)
static bool isEqual(const clang::clangd::dex::Token &LHS, const clang::clangd::dex::Token &RHS)