clang  15.0.0git
SourceLocationEncoding.h
Go to the documentation of this file.
1 //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Source locations are stored pervasively in the AST, making up a third of
10 // the size of typical serialized files. Storing them efficiently is important.
11 //
12 // We use integers optimized by VBR-encoding, because:
13 // - when abbrevations cannot be used, VBR6 encoding is our only choice
14 // - in the worst case a SourceLocation can be ~any 32-bit number, but in
15 // practice they are highly predictable
16 //
17 // We encode the integer so that likely values encode as small numbers that
18 // turn into few VBR chunks:
19 // - the invalid sentinel location is a very common value: it encodes as 0
20 // - the "macro or not" bit is stored at the bottom of the integer
21 // (rather than at the top, as in memory), so macro locations can have
22 // small representations.
23 // - related locations (e.g. of a left and right paren pair) are usually
24 // similar, so when encoding a sequence of locations we store only
25 // differences between successive elements.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include <climits>
31 
32 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
33 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
34 
35 namespace clang {
36 class SourceLocationSequence;
37 
38 /// Serialized encoding of SourceLocations without context.
39 /// Optimized to have small unsigned values (=> small after VBR encoding).
40 ///
41 // Macro locations have the top bit set, we rotate by one so it is the low bit.
43  using UIntTy = SourceLocation::UIntTy;
44  constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
45 
46  static UIntTy encodeRaw(UIntTy Raw) {
47  return (Raw << 1) | (Raw >> (UIntBits - 1));
48  }
49  static UIntTy decodeRaw(UIntTy Raw) {
50  return (Raw >> 1) | (Raw << (UIntBits - 1));
51  }
53 
54 public:
55  static uint64_t encode(SourceLocation Loc,
56  SourceLocationSequence * = nullptr);
58 };
59 
60 /// Serialized encoding of a sequence of SourceLocations.
61 ///
62 /// Optimized to produce small values when locations with the sequence are
63 /// similar. Each element can be delta-encoded against the last nonzero element.
64 ///
65 /// Sequences should be started by creating a SourceLocationSequence::State,
66 /// and then passed around as SourceLocationSequence*. Example:
67 ///
68 /// // establishes a sequence
69 /// void EmitTopLevelThing() {
70 /// SourceLocationSequence::State Seq;
71 /// EmitContainedThing(Seq);
72 /// EmitRecursiveThing(Seq);
73 /// }
74 ///
75 /// // optionally part of a sequence
76 /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
77 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
78 /// }
79 ///
80 /// // establishes a sequence if there isn't one already
81 /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
82 /// SourceLocationSequence::State Seq(ParentSeq);
83 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
84 /// EmitRecursiveThing(Seq);
85 /// }
86 ///
88  using UIntTy = SourceLocation::UIntTy;
89  using EncodedTy = uint64_t;
90  constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
91  static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
92 
93  // Prev stores the rotated last nonzero location.
94  UIntTy &Prev;
95 
96  // Zig-zag encoding turns small signed integers into small unsigned integers.
97  // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
98  static UIntTy zigZag(UIntTy V) {
99  UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
100  return Sign ^ (V << 1);
101  }
102  static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
103 
104  SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
105 
106  EncodedTy encodeRaw(UIntTy Raw) {
107  if (Raw == 0)
108  return 0;
109  UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
110  if (Prev == 0)
111  return Prev = Rotated;
112  UIntTy Delta = Rotated - Prev;
113  Prev = Rotated;
114  // Exactly one 33 bit value is possible! (1 << 32).
115  // This is because we have two representations of zero: trivial & relative.
116  return 1 + EncodedTy{zigZag(Delta)};
117  }
118  UIntTy decodeRaw(EncodedTy Encoded) {
119  if (Encoded == 0)
120  return 0;
121  if (Prev == 0)
122  return SourceLocationEncoding::decodeRaw(Prev = Encoded);
123  return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
124  }
125 
126 public:
127  SourceLocation decode(EncodedTy Encoded) {
128  return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
129  }
130  EncodedTy encode(SourceLocation Loc) {
131  return encodeRaw(Loc.getRawEncoding());
132  }
133 
134  class State;
135 };
136 
137 /// This object establishes a SourceLocationSequence.
139  UIntTy Prev = 0;
141 
142 public:
143  // If Parent is provided and non-null, then this root becomes part of that
144  // enclosing sequence instead of establishing a new one.
146  : Seq(Parent ? Parent->Prev : Prev) {}
147 
148  // Implicit conversion for uniform use of roots vs propagated sequences.
149  operator SourceLocationSequence *() { return &Seq; }
150 };
151 
153  SourceLocationSequence *Seq) {
154  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
155 }
156 inline SourceLocation
158  return Seq ? Seq->decode(Encoded)
159  : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
160 }
161 
162 } // namespace clang
163 #endif
clang::SourceLocationEncoding::encode
static uint64_t encode(SourceLocation Loc, SourceLocationSequence *=nullptr)
Definition: SourceLocationEncoding.h:152
clang::SourceLocationSequence::encode
EncodedTy encode(SourceLocation Loc)
Definition: SourceLocationEncoding.h:130
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:86
clang::SourceLocationSequence::State
This object establishes a SourceLocationSequence.
Definition: SourceLocationEncoding.h:138
CHAR_BIT
#define CHAR_BIT
Definition: limits.h:63
uint64_t
unsigned long uint64_t
Definition: hlsl_basic_types.h:24
clang::SourceLocationSequence::decode
SourceLocation decode(EncodedTy Encoded)
Definition: SourceLocationEncoding.h:127
V
#define V(N, I)
Definition: ASTContext.h:3176
clang::SourceLocationEncoding
Serialized encoding of SourceLocations without context.
Definition: SourceLocationEncoding.h:42
SourceLocation.h
clang::SourceLocationEncoding::decode
static SourceLocation decode(uint64_t, SourceLocationSequence *=nullptr)
Definition: SourceLocationEncoding.h:157
clang::SourceLocationSequence
Serialized encoding of a sequence of SourceLocations.
Definition: SourceLocationEncoding.h:87
State
LineState State
Definition: UnwrappedLineFormatter.cpp:1126
clang::SourceLocation::getFromRawEncoding
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
Definition: SourceLocation.h:152
clang
Definition: CalledOnceCheck.h:17
clang::SourceLocationSequence::State::State
State(SourceLocationSequence *Parent=nullptr)
Definition: SourceLocationEncoding.h:145
Parent
NodeId Parent
Definition: ASTDiff.cpp:192
clang::SourceLocation::UIntTy
uint32_t UIntTy
Definition: SourceLocation.h:93
clang::SourceLocation::getRawEncoding
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
Definition: SourceLocation.h:146