LLVM 22.0.0git
StringToOffsetTable.cpp
Go to the documentation of this file.
1//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/TableGen/Error.h"
13#include "llvm/TableGen/Main.h"
14
15using namespace llvm;
16
18 auto [II, Inserted] = StringOffset.insert({Str, size()});
19 if (Inserted) {
20 // Add the string to the aggregate if this is the first time found.
21 AggregateString.append(Str.begin(), Str.end());
22 if (AppendZero)
23 AggregateString += '\0';
24 }
25
26 return II->second;
27}
28
30 const Twine &Name) const {
31 // This generates a `llvm::StringTable` which expects that entries are null
32 // terminated. So fail with an error if `AppendZero` is false.
33 if (!AppendZero)
34 PrintFatalError("llvm::StringTable requires null terminated strings");
35
36 OS << formatv(R"(
37#ifdef __GNUC__
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Woverlength-strings"
40#endif
41{} constexpr char {}{}Storage[] =)",
42 ClassPrefix.empty() ? "static" : "", ClassPrefix, Name);
43
44 // MSVC silently miscompiles string literals longer than 64k in some
45 // circumstances. The build system sets EmitLongStrLiterals to false when it
46 // detects that it is targetting MSVC. When that option is false and the
47 // string table is longer than 64k, emit it as an array of character
48 // literals.
49 bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
50 OS << (UseChars ? "{\n" : "\n");
51
52 ListSeparator LineSep(UseChars ? ",\n" : "\n");
53 SmallVector<StringRef> Strings(split(AggregateString, '\0'));
54 // We should always have an empty string at the start, and because these are
55 // null terminators rather than separators, we'll have one at the end as
56 // well. Skip the end one.
57 assert(Strings.front().empty() && "Expected empty initial string!");
58 assert(Strings.back().empty() &&
59 "Expected empty string at the end due to terminators!");
60 Strings.pop_back();
61 for (StringRef Str : Strings) {
62 OS << LineSep << " ";
63 // If we can, just emit this as a string literal to be concatenated.
64 if (!UseChars) {
65 OS << "\"";
66 OS.write_escaped(Str);
67 OS << "\\0\"";
68 continue;
69 }
70
71 ListSeparator CharSep(", ");
72 for (char C : Str) {
73 OS << CharSep << "'";
74 OS.write_escaped(StringRef(&C, 1));
75 OS << "'";
76 }
77 OS << CharSep << "'\\0'";
78 }
79 OS << LineSep << (UseChars ? "};" : " ;");
80
81 OS << formatv(R"(
82#ifdef __GNUC__
83#pragma GCC diagnostic pop
84#endif
85
86{1} llvm::StringTable
87{2}{0} = {0}Storage;
88)",
89 Name, ClassPrefix.empty() ? "static constexpr" : "const",
90 ClassPrefix);
91}
92
94 // Escape the string.
95 SmallString<256> EscapedStr;
96 raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
97
98 O << " \"";
99 unsigned CharsPrinted = 0;
100 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
101 if (CharsPrinted > 70) {
102 O << "\"\n \"";
103 CharsPrinted = 0;
104 }
105 O << EscapedStr[i];
106 ++CharsPrinted;
107
108 // Print escape sequences all together.
109 if (EscapedStr[i] != '\\')
110 continue;
111
112 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
113 if (isDigit(EscapedStr[i + 1])) {
114 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
115 "Expected 3 digit octal escape!");
116 O << EscapedStr[++i];
117 O << EscapedStr[++i];
118 O << EscapedStr[++i];
119 CharsPrinted += 3;
120 } else {
121 O << EscapedStr[++i];
122 ++CharsPrinted;
123 }
124 }
125 O << "\"";
126}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
uint64_t IntrinsicInst * II
A helper class to return the specified delimiter string after the first invocation of operator String...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
unsigned GetOrAddStringOffset(StringRef Str)
void EmitStringTableDef(raw_ostream &OS, const Twine &Name) const
void EmitString(raw_ostream &O) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
A raw_ostream that writes to an SmallVector or SmallString.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
void PrintFatalError(const Twine &Msg)
Definition Error.cpp:132
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
cl::opt< bool > EmitLongStrLiterals
Controls emitting large character arrays as strings or character arrays.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...