clang 19.0.0git
SourceLocationEncoding.h
Go to the documentation of this file.
1//===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Source locations are stored pervasively in the AST, making up a third of
10// the size of typical serialized files. Storing them efficiently is important.
11//
12// We use integers optimized by VBR-encoding, because:
13// - when abbreviations cannot be used, VBR6 encoding is our only choice
14// - in the worst case a SourceLocation can be ~any 32-bit number, but in
15// practice they are highly predictable
16//
17// We encode the integer so that likely values encode as small numbers that
18// turn into few VBR chunks:
19// - the invalid sentinel location is a very common value: it encodes as 0
20// - the "macro or not" bit is stored at the bottom of the integer
21// (rather than at the top, as in memory), so macro locations can have
22// small representations.
23// - related locations (e.g. of a left and right paren pair) are usually
24// similar, so when encoding a sequence of locations we store only
25// differences between successive elements.
26//
27//===----------------------------------------------------------------------===//
28
29#include <climits>
31
32#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
33#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
34
35namespace clang {
36class SourceLocationSequence;
37
38/// Serialized encoding of SourceLocations without context.
39/// Optimized to have small unsigned values (=> small after VBR encoding).
40///
41// Macro locations have the top bit set, we rotate by one so it is the low bit.
43 using UIntTy = SourceLocation::UIntTy;
44 constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
45
46 static UIntTy encodeRaw(UIntTy Raw) {
47 return (Raw << 1) | (Raw >> (UIntBits - 1));
48 }
49 static UIntTy decodeRaw(UIntTy Raw) {
50 return (Raw >> 1) | (Raw << (UIntBits - 1));
51 }
53
54public:
55 static uint64_t encode(SourceLocation Loc,
56 SourceLocationSequence * = nullptr);
57 static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
58};
59
60/// Serialized encoding of a sequence of SourceLocations.
61///
62/// Optimized to produce small values when locations with the sequence are
63/// similar. Each element can be delta-encoded against the last nonzero element.
64///
65/// Sequences should be started by creating a SourceLocationSequence::State,
66/// and then passed around as SourceLocationSequence*. Example:
67///
68/// // establishes a sequence
69/// void EmitTopLevelThing() {
70/// SourceLocationSequence::State Seq;
71/// EmitContainedThing(Seq);
72/// EmitRecursiveThing(Seq);
73/// }
74///
75/// // optionally part of a sequence
76/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
77/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
78/// }
79///
80/// // establishes a sequence if there isn't one already
81/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
82/// SourceLocationSequence::State Seq(ParentSeq);
83/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
84/// EmitRecursiveThing(Seq);
85/// }
86///
88 using UIntTy = SourceLocation::UIntTy;
89 using EncodedTy = uint64_t;
90 constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
91 static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
92
93 // Prev stores the rotated last nonzero location.
94 UIntTy &Prev;
95
96 // Zig-zag encoding turns small signed integers into small unsigned integers.
97 // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
98 static UIntTy zigZag(UIntTy V) {
99 UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
100 return Sign ^ (V << 1);
101 }
102 static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
103
104 SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
105
106 EncodedTy encodeRaw(UIntTy Raw) {
107 if (Raw == 0)
108 return 0;
109 UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
110 if (Prev == 0)
111 return Prev = Rotated;
112 UIntTy Delta = Rotated - Prev;
113 Prev = Rotated;
114 // Exactly one 33 bit value is possible! (1 << 32).
115 // This is because we have two representations of zero: trivial & relative.
116 return 1 + EncodedTy{zigZag(Delta)};
117 }
118 UIntTy decodeRaw(EncodedTy Encoded) {
119 if (Encoded == 0)
120 return 0;
121 if (Prev == 0)
122 return SourceLocationEncoding::decodeRaw(Prev = Encoded);
123 return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
124 }
125
126public:
127 SourceLocation decode(EncodedTy Encoded) {
128 return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
129 }
130 EncodedTy encode(SourceLocation Loc) {
131 return encodeRaw(Loc.getRawEncoding());
132 }
133
134 class State;
135};
136
137/// This object establishes a SourceLocationSequence.
139 UIntTy Prev = 0;
141
142public:
143 // If Parent is provided and non-null, then this root becomes part of that
144 // enclosing sequence instead of establishing a new one.
146 : Seq(Parent ? Parent->Prev : Prev) {}
147
148 // Implicit conversion for uniform use of roots vs propagated sequences.
149 operator SourceLocationSequence *() { return &Seq; }
150};
151
154 return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
155}
156inline SourceLocation
158 return Seq ? Seq->decode(Encoded)
159 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
160}
161
162} // namespace clang
163#endif
#define V(N, I)
Definition: ASTContext.h:3255
NodeId Parent
Definition: ASTDiff.cpp:191
Defines the clang::SourceLocation class and associated facilities.
Serialized encoding of SourceLocations without context.
static SourceLocation decode(uint64_t, SourceLocationSequence *=nullptr)
static uint64_t encode(SourceLocation Loc, SourceLocationSequence *=nullptr)
This object establishes a SourceLocationSequence.
State(SourceLocationSequence *Parent=nullptr)
Serialized encoding of a sequence of SourceLocations.
EncodedTy encode(SourceLocation Loc)
SourceLocation decode(EncodedTy Encoded)
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
#define CHAR_BIT
Definition: limits.h:67
The JSON file list parser is used to communicate input to InstallAPI.
@ Seq
'seq' clause, allowed on 'loop' and 'routine' directives.