clang 20.0.0git
SourceLocationEncoding.h
Go to the documentation of this file.
1//===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// We wish to encode the SourceLocation from other module file not dependent
10// on the other module file. So that the source location changes from other
11// module file may not affect the contents of the current module file. Then the
12// users don't need to recompile the whole project due to a new line in a module
13// unit in the root of the dependency graph.
14//
15// To achieve this, we need to encode the index of the module file into the
16// encoding of the source location. The encoding of the source location may be:
17//
18// |-----------------------|-----------------------|
19// | A | B | C |
20//
21// * A: 32 bit. The index of the module file in the module manager + 1. The +1
22// here is necessary since we wish 0 stands for the current module file.
23// * B: 31 bit. The offset of the source location to the module file containing
24// it.
25// * C: The macro bit. We rotate it to the lowest bit so that we can save some
26// space in case the index of the module file is 0.
27//
28// Specially, if the index of the module file is 0, we allow to encode a
29// sequence of locations we store only differences between successive elements.
30//
31//===----------------------------------------------------------------------===//
32
34#include "llvm/Support/MathExtras.h"
35#include <climits>
36
37#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
38#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
39
40namespace clang {
41class SourceLocationSequence;
42
43/// Serialized encoding of SourceLocations without context.
44/// Optimized to have small unsigned values (=> small after VBR encoding).
45///
46// Macro locations have the top bit set, we rotate by one so it is the low bit.
48 using UIntTy = SourceLocation::UIntTy;
49 constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
50
51 static UIntTy encodeRaw(UIntTy Raw) {
52 return (Raw << 1) | (Raw >> (UIntBits - 1));
53 }
54 static UIntTy decodeRaw(UIntTy Raw) {
55 return (Raw >> 1) | (Raw << (UIntBits - 1));
56 }
58
59public:
60 using RawLocEncoding = uint64_t;
61
62 static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
63 unsigned BaseModuleFileIndex,
64 SourceLocationSequence * = nullptr);
65 static std::pair<SourceLocation, unsigned>
67};
68
69/// Serialized encoding of a sequence of SourceLocations.
70///
71/// Optimized to produce small values when locations with the sequence are
72/// similar. Each element can be delta-encoded against the last nonzero element.
73///
74/// Sequences should be started by creating a SourceLocationSequence::State,
75/// and then passed around as SourceLocationSequence*. Example:
76///
77/// // establishes a sequence
78/// void EmitTopLevelThing() {
79/// SourceLocationSequence::State Seq;
80/// EmitContainedThing(Seq);
81/// EmitRecursiveThing(Seq);
82/// }
83///
84/// // optionally part of a sequence
85/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
86/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
87/// }
88///
89/// // establishes a sequence if there isn't one already
90/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
91/// SourceLocationSequence::State Seq(ParentSeq);
92/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
93/// EmitRecursiveThing(Seq);
94/// }
95///
97 using UIntTy = SourceLocation::UIntTy;
98 using EncodedTy = uint64_t;
99 constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
100 static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
101
102 // Prev stores the rotated last nonzero location.
103 UIntTy &Prev;
104
105 // Zig-zag encoding turns small signed integers into small unsigned integers.
106 // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
107 static UIntTy zigZag(UIntTy V) {
108 UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
109 return Sign ^ (V << 1);
110 }
111 static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
112
113 SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
114
115 EncodedTy encodeRaw(UIntTy Raw) {
116 if (Raw == 0)
117 return 0;
118 UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
119 if (Prev == 0)
120 return Prev = Rotated;
121 UIntTy Delta = Rotated - Prev;
122 Prev = Rotated;
123 // Exactly one 33 bit value is possible! (1 << 32).
124 // This is because we have two representations of zero: trivial & relative.
125 return 1 + EncodedTy{zigZag(Delta)};
126 }
127 UIntTy decodeRaw(EncodedTy Encoded) {
128 if (Encoded == 0)
129 return 0;
130 if (Prev == 0)
131 return SourceLocationEncoding::decodeRaw(Prev = Encoded);
132 return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
133 }
134
135public:
136 SourceLocation decode(EncodedTy Encoded) {
137 return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
138 }
140 return encodeRaw(Loc.getRawEncoding());
141 }
142
143 class State;
144};
145
146/// This object establishes a SourceLocationSequence.
148 UIntTy Prev = 0;
150
151public:
152 // If Parent is provided and non-null, then this root becomes part of that
153 // enclosing sequence instead of establishing a new one.
155 : Seq(Parent ? Parent->Prev : Prev) {}
156
157 // Implicit conversion for uniform use of roots vs propagated sequences.
158 operator SourceLocationSequence *() { return &Seq; }
159};
160
163 unsigned BaseModuleFileIndex,
165 // If the source location is a local source location, we can try to optimize
166 // the similar sequences to only record the differences.
167 if (!BaseOffset)
168 return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169
170 if (Loc.isInvalid())
171 return 0;
172
173 // Otherwise, the higher bits are used to store the module file index,
174 // so it is meaningless to optimize the source locations into small
175 // integers. Let's try to always use the raw encodings.
176 assert(Loc.getOffset() >= BaseOffset);
177 Loc = Loc.getLocWithOffset(-BaseOffset);
178 RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
179
180 // 16 bits should be sufficient to store the module file index.
181 assert(BaseModuleFileIndex < (1 << 16));
182 Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
183 return Encoded;
184}
185inline std::pair<SourceLocation, unsigned>
188 unsigned ModuleFileIndex = Encoded >> 32;
189
190 if (!ModuleFileIndex)
191 return {Seq ? Seq->decode(Encoded)
192 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
193 ModuleFileIndex};
194
195 Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
197
198 return {Loc, ModuleFileIndex};
199}
200
201} // namespace clang
202#endif
#define V(N, I)
Definition: ASTContext.h:3338
NodeId Parent
Definition: ASTDiff.cpp:191
SourceLocation Loc
Definition: SemaObjC.cpp:758
Defines the clang::SourceLocation class and associated facilities.
Serialized encoding of SourceLocations without context.
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, unsigned BaseModuleFileIndex, SourceLocationSequence *=nullptr)
static std::pair< SourceLocation, unsigned > decode(RawLocEncoding, SourceLocationSequence *=nullptr)
This object establishes a SourceLocationSequence.
State(SourceLocationSequence *Parent=nullptr)
Serialized encoding of a sequence of SourceLocations.
EncodedTy encode(SourceLocation Loc)
SourceLocation decode(EncodedTy Encoded)
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
#define CHAR_BIT
Definition: limits.h:71
The JSON file list parser is used to communicate input to InstallAPI.
@ Seq
'seq' clause, allowed on 'loop' and 'routine' directives.