LLVM  14.0.0git
StringTableBuilder.cpp
Go to the documentation of this file.
1 //===- StringTableBuilder.cpp - String table building utility -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/BinaryFormat/COFF.h"
15 #include "llvm/Support/Endian.h"
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <utility>
23 #include <vector>
24 
25 using namespace llvm;
26 
28 
29 void StringTableBuilder::initSize() {
30  // Account for leading bytes in table so that offsets returned from add are
31  // correct.
32  switch (K) {
33  case RAW:
34  case DWARF:
35  Size = 0;
36  break;
37  case MachOLinked:
38  case MachO64Linked:
39  Size = 2;
40  break;
41  case MachO:
42  case MachO64:
43  case ELF:
44  // Start the table with a NUL byte.
45  Size = 1;
46  break;
47  case XCOFF:
48  case WinCOFF:
49  // Make room to write the table size later.
50  Size = 4;
51  break;
52  }
53 }
54 
56  : K(K), Alignment(Alignment) {
57  initSize();
58 }
59 
61  assert(isFinalized());
63  Data.resize(getSize());
64  write((uint8_t *)Data.data());
65  OS << Data;
66 }
67 
68 using StringPair = std::pair<CachedHashStringRef, size_t>;
69 
70 void StringTableBuilder::write(uint8_t *Buf) const {
71  assert(isFinalized());
72  for (const StringPair &P : StringIndexMap) {
73  StringRef Data = P.first.val();
74  if (!Data.empty())
75  memcpy(Buf + P.second, Data.data(), Data.size());
76  }
77  // The COFF formats store the size of the string table in the first 4 bytes.
78  // For Windows, the format is little-endian; for AIX, it is big-endian.
79  if (K == WinCOFF)
80  support::endian::write32le(Buf, Size);
81  else if (K == XCOFF)
82  support::endian::write32be(Buf, Size);
83 }
84 
85 // Returns the character at Pos from end of a string.
86 static int charTailAt(StringPair *P, size_t Pos) {
87  StringRef S = P->first.val();
88  if (Pos >= S.size())
89  return -1;
90  return (unsigned char)S[S.size() - Pos - 1];
91 }
92 
93 // Three-way radix quicksort. This is much faster than std::sort with strcmp
94 // because it does not compare characters that we already know the same.
95 static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
96 tailcall:
97  if (Vec.size() <= 1)
98  return;
99 
100  // Partition items so that items in [0, I) are greater than the pivot,
101  // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
102  // the pivot.
103  int Pivot = charTailAt(Vec[0], Pos);
104  size_t I = 0;
105  size_t J = Vec.size();
106  for (size_t K = 1; K < J;) {
107  int C = charTailAt(Vec[K], Pos);
108  if (C > Pivot)
109  std::swap(Vec[I++], Vec[K++]);
110  else if (C < Pivot)
111  std::swap(Vec[--J], Vec[K]);
112  else
113  K++;
114  }
115 
116  multikeySort(Vec.slice(0, I), Pos);
117  multikeySort(Vec.slice(J), Pos);
118 
119  // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
120  // tail call optimization.
121  if (Pivot != -1) {
122  Vec = Vec.slice(I, J - I);
123  ++Pos;
124  goto tailcall;
125  }
126 }
127 
129  assert(K != DWARF);
130  finalizeStringTable(/*Optimize=*/true);
131 }
132 
134  finalizeStringTable(/*Optimize=*/false);
135 }
136 
137 void StringTableBuilder::finalizeStringTable(bool Optimize) {
138  Finalized = true;
139 
140  if (Optimize) {
141  std::vector<StringPair *> Strings;
142  Strings.reserve(StringIndexMap.size());
143  for (StringPair &P : StringIndexMap)
144  Strings.push_back(&P);
145 
146  multikeySort(Strings, 0);
147  initSize();
148 
149  StringRef Previous;
150  for (StringPair *P : Strings) {
151  StringRef S = P->first.val();
152  if (Previous.endswith(S)) {
153  size_t Pos = Size - S.size() - (K != RAW);
154  if (!(Pos & (Alignment - 1))) {
155  P->second = Pos;
156  continue;
157  }
158  }
159 
160  Size = alignTo(Size, Alignment);
161  P->second = Size;
162 
163  Size += S.size();
164  if (K != RAW)
165  ++Size;
166  Previous = S;
167  }
168  }
169 
170  if (K == MachO || K == MachOLinked)
171  Size = alignTo(Size, 4); // Pad to multiple of 4.
172  if (K == MachO64 || K == MachO64Linked)
173  Size = alignTo(Size, 8); // Pad to multiple of 8.
174 
175  // According to ld64 the string table of a final linked Mach-O binary starts
176  // with " ", i.e. the first byte is ' ' and the second byte is zero. In
177  // 'initSize()' we reserved the first two bytes for holding this string.
178  if (K == MachOLinked || K == MachO64Linked)
179  StringIndexMap[CachedHashStringRef(" ")] = 0;
180 
181  // The first byte in an ELF string table must be null, according to the ELF
182  // specification. In 'initSize()' we reserved the first byte to hold null for
183  // this purpose and here we actually add the string to allow 'getOffset()' to
184  // be called on an empty string.
185  if (K == ELF)
186  StringIndexMap[CachedHashStringRef("")] = 0;
187 }
188 
190  Finalized = false;
191  StringIndexMap.clear();
192 }
193 
195  assert(isFinalized());
196  auto I = StringIndexMap.find(S);
197  assert(I != StringIndexMap.end() && "String is not in table!");
198  return I->second;
199 }
200 
202  if (K == WinCOFF)
203  assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
204 
205  assert(!isFinalized());
206  auto P = StringIndexMap.insert(std::make_pair(S, 0));
207  if (P.second) {
208  size_t Start = alignTo(Size, Alignment);
209  P.first->second = Start;
210  Size = Start + S.size() + (K != RAW);
211  }
212  return P.first->second;
213 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::StringTableBuilder::XCOFF
@ XCOFF
Definition: StringTableBuilder.h:34
MathExtras.h
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::support::endian::write32be
void write32be(void *P, uint32_t V)
Definition: Endian.h:419
llvm::StringRef::endswith
LLVM_NODISCARD bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:297
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::support::endian::write32le
void write32le(void *P, uint32_t V)
Definition: Endian.h:416
llvm::StringTableBuilder::finalizeInOrder
void finalizeInOrder()
Finalize the string table without reording it.
Definition: StringTableBuilder.cpp:133
COFF.h
llvm::COFF::NameSize
@ NameSize
Definition: COFF.h:58
charTailAt
static int charTailAt(StringPair *P, size_t Pos)
Definition: StringTableBuilder.cpp:86
llvm::StringTableBuilder::finalize
void finalize()
Analyze the strings and build the final table.
Definition: StringTableBuilder.cpp:128
llvm::StringTableBuilder::getOffset
size_t getOffset(CachedHashStringRef S) const
Get the offest of a string in the string table.
Definition: StringTableBuilder.cpp:194
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
StringTableBuilder.h
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
CachedHashString.h
SmallString.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::StringTableBuilder::MachO64
@ MachO64
Definition: StringTableBuilder.h:29
llvm::StringTableBuilder::WinCOFF
@ WinCOFF
Definition: StringTableBuilder.h:27
llvm::StringTableBuilder::getSize
size_t getSize() const
Definition: StringTableBuilder.h:82
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::StringTableBuilder::MachO64Linked
@ MachO64Linked
Definition: StringTableBuilder.h:31
llvm::StringTableBuilder::RAW
@ RAW
Definition: StringTableBuilder.h:32
llvm::SmallString< 0 >
llvm::CachedHashStringRef
A container which contains a StringRef plus a precomputed hash.
Definition: CachedHashString.h:28
llvm::StringTableBuilder::clear
void clear()
Definition: StringTableBuilder.cpp:189
llvm::MutableArrayRef::slice
MutableArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:376
llvm::StringTableBuilder::MachO
@ MachO
Definition: StringTableBuilder.h:28
I
#define I(x, y, z)
Definition: MD5.cpp:59
StringPair
std::pair< CachedHashStringRef, size_t > StringPair
Definition: StringTableBuilder.cpp:68
llvm::StringTableBuilder::~StringTableBuilder
~StringTableBuilder()
ArrayRef.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::StringTableBuilder::write
void write(raw_ostream &OS) const
Definition: StringTableBuilder.cpp:60
llvm::StringTableBuilder::MachOLinked
@ MachOLinked
Definition: StringTableBuilder.h:30
llvm::StringTableBuilder::DWARF
@ DWARF
Definition: StringTableBuilder.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
multikeySort
static void multikeySort(MutableArrayRef< StringPair * > Vec, int Pos)
Definition: StringTableBuilder.cpp:95
llvm::StringTableBuilder::add
size_t add(CachedHashStringRef S)
Add a string to the builder.
Definition: StringTableBuilder.cpp:201
llvm::StringTableBuilder::ELF
@ ELF
Definition: StringTableBuilder.h:26
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::StringTableBuilder::Kind
Kind
Definition: StringTableBuilder.h:25
llvm::StringTableBuilder::StringTableBuilder
StringTableBuilder(Kind K, unsigned Alignment=1)
Definition: StringTableBuilder.cpp:55
raw_ostream.h
Endian.h