LLVM  14.0.0git
StringExtras.h
Go to the documentation of this file.
1 //===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains some functions that are useful when dealing with strings.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ADT_STRINGEXTRAS_H
14 #define LLVM_ADT_STRINGEXTRAS_H
15 
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include <cassert>
22 #include <cstddef>
23 #include <cstdint>
24 #include <cstdlib>
25 #include <cstring>
26 #include <iterator>
27 #include <string>
28 #include <utility>
29 
30 namespace llvm {
31 
32 template<typename T> class SmallVectorImpl;
33 class raw_ostream;
34 
35 /// hexdigit - Return the hexadecimal character for the
36 /// given number \p X (which should be less than 16).
37 inline char hexdigit(unsigned X, bool LowerCase = false) {
38  const char HexChar = LowerCase ? 'a' : 'A';
39  return X < 10 ? '0' + X : HexChar + X - 10;
40 }
41 
42 /// Given an array of c-style strings terminated by a null pointer, construct
43 /// a vector of StringRefs representing the same strings without the terminating
44 /// null string.
45 inline std::vector<StringRef> toStringRefArray(const char *const *Strings) {
46  std::vector<StringRef> Result;
47  while (*Strings)
48  Result.push_back(*Strings++);
49  return Result;
50 }
51 
52 /// Construct a string ref from a boolean.
53 inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); }
54 
55 /// Construct a string ref from an array ref of unsigned chars.
56 inline StringRef toStringRef(ArrayRef<uint8_t> Input) {
57  return StringRef(reinterpret_cast<const char *>(Input.begin()), Input.size());
58 }
59 
60 /// Construct a string ref from an array ref of unsigned chars.
61 inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
62  return {Input.bytes_begin(), Input.bytes_end()};
63 }
64 
65 /// Interpret the given character \p C as a hexadecimal digit and return its
66 /// value.
67 ///
68 /// If \p C is not a valid hex digit, -1U is returned.
69 inline unsigned hexDigitValue(char C) {
70  struct HexTable {
71  unsigned LUT[255] = {};
72  constexpr HexTable() {
73  // Default initialize everything to invalid.
74  for (int i = 0; i < 255; ++i)
75  LUT[i] = ~0U;
76  // Initialize `0`-`9`.
77  for (int i = 0; i < 10; ++i)
78  LUT['0' + i] = i;
79  // Initialize `A`-`F` and `a`-`f`.
80  for (int i = 0; i < 6; ++i)
81  LUT['A' + i] = LUT['a' + i] = 10 + i;
82  }
83  };
84  constexpr HexTable Table;
85  return Table.LUT[static_cast<unsigned char>(C)];
86 }
87 
88 /// Checks if character \p C is one of the 10 decimal digits.
89 inline bool isDigit(char C) { return C >= '0' && C <= '9'; }
90 
91 /// Checks if character \p C is a hexadecimal numeric character.
92 inline bool isHexDigit(char C) { return hexDigitValue(C) != ~0U; }
93 
94 /// Checks if character \p C is a valid letter as classified by "C" locale.
95 inline bool isAlpha(char C) {
96  return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z');
97 }
98 
99 /// Checks whether character \p C is either a decimal digit or an uppercase or
100 /// lowercase letter as classified by "C" locale.
101 inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); }
102 
103 /// Checks whether character \p C is valid ASCII (high bit is zero).
104 inline bool isASCII(char C) { return static_cast<unsigned char>(C) <= 127; }
105 
106 /// Checks whether all characters in S are ASCII.
107 inline bool isASCII(llvm::StringRef S) {
108  for (char C : S)
109  if (LLVM_UNLIKELY(!isASCII(C)))
110  return false;
111  return true;
112 }
113 
114 /// Checks whether character \p C is printable.
115 ///
116 /// Locale-independent version of the C standard library isprint whose results
117 /// may differ on different platforms.
118 inline bool isPrint(char C) {
119  unsigned char UC = static_cast<unsigned char>(C);
120  return (0x20 <= UC) && (UC <= 0x7E);
121 }
122 
123 /// Checks whether character \p C is whitespace in the "C" locale.
124 ///
125 /// Locale-independent version of the C standard library isspace.
126 inline bool isSpace(char C) {
127  return C == ' ' || C == '\f' || C == '\n' || C == '\r' || C == '\t' ||
128  C == '\v';
129 }
130 
131 /// Returns the corresponding lowercase character if \p x is uppercase.
132 inline char toLower(char x) {
133  if (x >= 'A' && x <= 'Z')
134  return x - 'A' + 'a';
135  return x;
136 }
137 
138 /// Returns the corresponding uppercase character if \p x is lowercase.
139 inline char toUpper(char x) {
140  if (x >= 'a' && x <= 'z')
141  return x - 'a' + 'A';
142  return x;
143 }
144 
145 inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
146  char Buffer[17];
147  char *BufPtr = std::end(Buffer);
148 
149  if (X == 0) *--BufPtr = '0';
150 
151  while (X) {
152  unsigned char Mod = static_cast<unsigned char>(X) & 15;
153  *--BufPtr = hexdigit(Mod, LowerCase);
154  X >>= 4;
155  }
156 
157  return std::string(BufPtr, std::end(Buffer));
158 }
159 
160 /// Convert buffer \p Input to its hexadecimal representation.
161 /// The returned string is double the size of \p Input.
162 inline std::string toHex(StringRef Input, bool LowerCase = false) {
163  static const char *const LUT = "0123456789ABCDEF";
164  const uint8_t Offset = LowerCase ? 32 : 0;
165  size_t Length = Input.size();
166 
167  std::string Output;
168  Output.reserve(2 * Length);
169  for (size_t i = 0; i < Length; ++i) {
170  const unsigned char c = Input[i];
171  Output.push_back(LUT[c >> 4] | Offset);
172  Output.push_back(LUT[c & 15] | Offset);
173  }
174  return Output;
175 }
176 
177 inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
178  return toHex(toStringRef(Input), LowerCase);
179 }
180 
181 /// Store the binary representation of the two provided values, \p MSB and
182 /// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB
183 /// do not correspond to proper nibbles of a hexadecimal digit, this method
184 /// returns false. Otherwise, returns true.
185 inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) {
186  unsigned U1 = hexDigitValue(MSB);
187  unsigned U2 = hexDigitValue(LSB);
188  if (U1 == ~0U || U2 == ~0U)
189  return false;
190 
191  Hex = static_cast<uint8_t>((U1 << 4) | U2);
192  return true;
193 }
194 
195 /// Return the binary representation of the two provided values, \p MSB and
196 /// \p LSB, that make up the nibbles of a hexadecimal digit.
197 inline uint8_t hexFromNibbles(char MSB, char LSB) {
198  uint8_t Hex = 0;
199  bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex);
200  (void)GotHex;
201  assert(GotHex && "MSB and/or LSB do not correspond to hex digits");
202  return Hex;
203 }
204 
205 /// Convert hexadecimal string \p Input to its binary representation and store
206 /// the result in \p Output. Returns true if the binary representation could be
207 /// converted from the hexadecimal string. Returns false if \p Input contains
208 /// non-hexadecimal digits. The output string is half the size of \p Input.
209 inline bool tryGetFromHex(StringRef Input, std::string &Output) {
210  if (Input.empty())
211  return true;
212 
213  Output.reserve((Input.size() + 1) / 2);
214  if (Input.size() % 2 == 1) {
215  uint8_t Hex = 0;
216  if (!tryGetHexFromNibbles('0', Input.front(), Hex))
217  return false;
218 
219  Output.push_back(Hex);
220  Input = Input.drop_front();
221  }
222 
223  assert(Input.size() % 2 == 0);
224  while (!Input.empty()) {
225  uint8_t Hex = 0;
226  if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
227  return false;
228 
229  Output.push_back(Hex);
230  Input = Input.drop_front(2);
231  }
232  return true;
233 }
234 
235 /// Convert hexadecimal string \p Input to its binary representation.
236 /// The return string is half the size of \p Input.
237 inline std::string fromHex(StringRef Input) {
238  std::string Hex;
239  bool GotHex = tryGetFromHex(Input, Hex);
240  (void)GotHex;
241  assert(GotHex && "Input contains non hex digits");
242  return Hex;
243 }
244 
245 /// Convert the string \p S to an integer of the specified type using
246 /// the radix \p Base. If \p Base is 0, auto-detects the radix.
247 /// Returns true if the number was successfully converted, false otherwise.
248 template <typename N> bool to_integer(StringRef S, N &Num, unsigned Base = 0) {
249  return !S.getAsInteger(Base, Num);
250 }
251 
252 namespace detail {
253 template <typename N>
254 inline bool to_float(const Twine &T, N &Num, N (*StrTo)(const char *, char **)) {
255  SmallString<32> Storage;
256  StringRef S = T.toNullTerminatedStringRef(Storage);
257  char *End;
258  N Temp = StrTo(S.data(), &End);
259  if (*End != '\0')
260  return false;
261  Num = Temp;
262  return true;
263 }
264 }
265 
266 inline bool to_float(const Twine &T, float &Num) {
267  return detail::to_float(T, Num, strtof);
268 }
269 
270 inline bool to_float(const Twine &T, double &Num) {
271  return detail::to_float(T, Num, strtod);
272 }
273 
274 inline bool to_float(const Twine &T, long double &Num) {
275  return detail::to_float(T, Num, strtold);
276 }
277 
278 inline std::string utostr(uint64_t X, bool isNeg = false) {
279  char Buffer[21];
280  char *BufPtr = std::end(Buffer);
281 
282  if (X == 0) *--BufPtr = '0'; // Handle special case...
283 
284  while (X) {
285  *--BufPtr = '0' + char(X % 10);
286  X /= 10;
287  }
288 
289  if (isNeg) *--BufPtr = '-'; // Add negative sign...
290  return std::string(BufPtr, std::end(Buffer));
291 }
292 
293 inline std::string itostr(int64_t X) {
294  if (X < 0)
295  return utostr(static_cast<uint64_t>(1) + ~static_cast<uint64_t>(X), true);
296  else
297  return utostr(static_cast<uint64_t>(X));
298 }
299 
300 inline std::string toString(const APInt &I, unsigned Radix, bool Signed,
301  bool formatAsCLiteral = false) {
302  SmallString<40> S;
303  I.toString(S, Radix, Signed, formatAsCLiteral);
304  return std::string(S.str());
305 }
306 
307 inline std::string toString(const APSInt &I, unsigned Radix) {
308  return toString(I, Radix, I.isSigned());
309 }
310 
311 /// StrInStrNoCase - Portable version of strcasestr. Locates the first
312 /// occurrence of string 's1' in string 's2', ignoring case. Returns
313 /// the offset of s2 in s1 or npos if s2 cannot be found.
314 StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2);
315 
316 /// getToken - This function extracts one token from source, ignoring any
317 /// leading characters that appear in the Delimiters string, and ending the
318 /// token at any of the characters that appear in the Delimiters string. If
319 /// there are no tokens in the source string, an empty string is returned.
320 /// The function returns a pair containing the extracted token and the
321 /// remaining tail string.
322 std::pair<StringRef, StringRef> getToken(StringRef Source,
323  StringRef Delimiters = " \t\n\v\f\r");
324 
325 /// SplitString - Split up the specified string according to the specified
326 /// delimiters, appending the result fragments to the output list.
327 void SplitString(StringRef Source,
328  SmallVectorImpl<StringRef> &OutFragments,
329  StringRef Delimiters = " \t\n\v\f\r");
330 
331 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
332 inline StringRef getOrdinalSuffix(unsigned Val) {
333  // It is critically important that we do this perfectly for
334  // user-written sequences with over 100 elements.
335  switch (Val % 100) {
336  case 11:
337  case 12:
338  case 13:
339  return "th";
340  default:
341  switch (Val % 10) {
342  case 1: return "st";
343  case 2: return "nd";
344  case 3: return "rd";
345  default: return "th";
346  }
347  }
348 }
349 
350 /// Print each character of the specified string, escaping it if it is not
351 /// printable or if it is an escape char.
352 void printEscapedString(StringRef Name, raw_ostream &Out);
353 
354 /// Print each character of the specified string, escaping HTML special
355 /// characters.
356 void printHTMLEscaped(StringRef String, raw_ostream &Out);
357 
358 /// printLowerCase - Print each character as lowercase if it is uppercase.
359 void printLowerCase(StringRef String, raw_ostream &Out);
360 
361 /// Converts a string from camel-case to snake-case by replacing all uppercase
362 /// letters with '_' followed by the letter in lowercase, except if the
363 /// uppercase letter is the first character of the string.
364 std::string convertToSnakeFromCamelCase(StringRef input);
365 
366 /// Converts a string from snake-case to camel-case by replacing all occurrences
367 /// of '_' followed by a lowercase letter with the letter in uppercase.
368 /// Optionally allow capitalization of the first letter (if it is a lowercase
369 /// letter)
370 std::string convertToCamelFromSnakeCase(StringRef input,
371  bool capitalizeFirst = false);
372 
373 namespace detail {
374 
375 template <typename IteratorT>
376 inline std::string join_impl(IteratorT Begin, IteratorT End,
377  StringRef Separator, std::input_iterator_tag) {
378  std::string S;
379  if (Begin == End)
380  return S;
381 
382  S += (*Begin);
383  while (++Begin != End) {
384  S += Separator;
385  S += (*Begin);
386  }
387  return S;
388 }
389 
390 template <typename IteratorT>
391 inline std::string join_impl(IteratorT Begin, IteratorT End,
392  StringRef Separator, std::forward_iterator_tag) {
393  std::string S;
394  if (Begin == End)
395  return S;
396 
397  size_t Len = (std::distance(Begin, End) - 1) * Separator.size();
398  for (IteratorT I = Begin; I != End; ++I)
399  Len += (*I).size();
400  S.reserve(Len);
401  size_t PrevCapacity = S.capacity();
402  (void)PrevCapacity;
403  S += (*Begin);
404  while (++Begin != End) {
405  S += Separator;
406  S += (*Begin);
407  }
408  assert(PrevCapacity == S.capacity() && "String grew during building");
409  return S;
410 }
411 
412 template <typename Sep>
413 inline void join_items_impl(std::string &Result, Sep Separator) {}
414 
415 template <typename Sep, typename Arg>
416 inline void join_items_impl(std::string &Result, Sep Separator,
417  const Arg &Item) {
418  Result += Item;
419 }
420 
421 template <typename Sep, typename Arg1, typename... Args>
422 inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1,
423  Args &&... Items) {
424  Result += A1;
425  Result += Separator;
426  join_items_impl(Result, Separator, std::forward<Args>(Items)...);
427 }
428 
429 inline size_t join_one_item_size(char) { return 1; }
430 inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; }
431 
432 template <typename T> inline size_t join_one_item_size(const T &Str) {
433  return Str.size();
434 }
435 
436 inline size_t join_items_size() { return 0; }
437 
438 template <typename A1> inline size_t join_items_size(const A1 &A) {
439  return join_one_item_size(A);
440 }
441 template <typename A1, typename... Args>
442 inline size_t join_items_size(const A1 &A, Args &&... Items) {
443  return join_one_item_size(A) + join_items_size(std::forward<Args>(Items)...);
444 }
445 
446 } // end namespace detail
447 
448 /// Joins the strings in the range [Begin, End), adding Separator between
449 /// the elements.
450 template <typename IteratorT>
451 inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
452  using tag = typename std::iterator_traits<IteratorT>::iterator_category;
453  return detail::join_impl(Begin, End, Separator, tag());
454 }
455 
456 /// Joins the strings in the range [R.begin(), R.end()), adding Separator
457 /// between the elements.
458 template <typename Range>
459 inline std::string join(Range &&R, StringRef Separator) {
460  return join(R.begin(), R.end(), Separator);
461 }
462 
463 /// Joins the strings in the parameter pack \p Items, adding \p Separator
464 /// between the elements. All arguments must be implicitly convertible to
465 /// std::string, or there should be an overload of std::string::operator+=()
466 /// that accepts the argument explicitly.
467 template <typename Sep, typename... Args>
468 inline std::string join_items(Sep Separator, Args &&... Items) {
469  std::string Result;
470  if (sizeof...(Items) == 0)
471  return Result;
472 
473  size_t NS = detail::join_one_item_size(Separator);
474  size_t NI = detail::join_items_size(std::forward<Args>(Items)...);
475  Result.reserve(NI + (sizeof...(Items) - 1) * NS + 1);
476  detail::join_items_impl(Result, Separator, std::forward<Args>(Items)...);
477  return Result;
478 }
479 
480 /// A helper class to return the specified delimiter string after the first
481 /// invocation of operator StringRef(). Used to generate a comma-separated
482 /// list from a loop like so:
483 ///
484 /// \code
485 /// ListSeparator LS;
486 /// for (auto &I : C)
487 /// OS << LS << I.getName();
488 /// \end
489 class ListSeparator {
490  bool First = true;
491  StringRef Separator;
492 
493 public:
494  ListSeparator(StringRef Separator = ", ") : Separator(Separator) {}
495  operator StringRef() {
496  if (First) {
497  First = false;
498  return {};
499  }
500  return Separator;
501  }
502 };
503 
504 } // end namespace llvm
505 
506 #endif // LLVM_ADT_STRINGEXTRAS_H
i
i
Definition: README.txt:29
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::StringRef::size_type
size_t size_type
Definition: StringRef.h:64
StringRef.h
llvm::toString
std::string toString(Error E)
Write all error messages (if any) in E to a string.
Definition: Error.h:1020
llvm::lltok::APSInt
@ APSInt
Definition: LLToken.h:493
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::sys::locale::isPrint
bool isPrint(int c)
Definition: Locale.cpp:13
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:193
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
s2
_bar mov r0 mov r1 fldd LCPI1_0 fmrrd d0 bl _foo fmdrr r5 fmsr s2
Definition: README.txt:159
SmallString.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::dwarf::toStringRef
StringRef toStringRef(const Optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
Definition: DWARFFormValue.h:186
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t
llvm::symbolize::toHex
static std::string toHex(uint64_t V)
Definition: DIPrinter.cpp:276
I
#define I(x, y, z)
Definition: MD5.cpp:59
ArrayRef.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
APSInt.h
isDigit
static bool isDigit(const char C)
Definition: RustDemangle.cpp:205
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
x
TODO unsigned x
Definition: README.txt:10
s1
int s1
Definition: README.txt:182
N
#define N
isHexDigit
static bool isHexDigit(const char C)
Definition: RustDemangle.cpp:207
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
LLVM_UNLIKELY
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:220
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58