LLVM  13.0.0git
StringExtras.h
Go to the documentation of this file.
1 //===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains some functions that are useful when dealing with strings.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_ADT_STRINGEXTRAS_H
14 #define LLVM_ADT_STRINGEXTRAS_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include <cassert>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <cstring>
25 #include <iterator>
26 #include <string>
27 #include <utility>
28 
29 namespace llvm {
30 
31 template<typename T> class SmallVectorImpl;
32 class raw_ostream;
33 
34 /// hexdigit - Return the hexadecimal character for the
35 /// given number \p X (which should be less than 16).
36 inline char hexdigit(unsigned X, bool LowerCase = false) {
37  const char HexChar = LowerCase ? 'a' : 'A';
38  return X < 10 ? '0' + X : HexChar + X - 10;
39 }
40 
41 /// Given an array of c-style strings terminated by a null pointer, construct
42 /// a vector of StringRefs representing the same strings without the terminating
43 /// null string.
44 inline std::vector<StringRef> toStringRefArray(const char *const *Strings) {
45  std::vector<StringRef> Result;
46  while (*Strings)
47  Result.push_back(*Strings++);
48  return Result;
49 }
50 
51 /// Construct a string ref from a boolean.
52 inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); }
53 
54 /// Construct a string ref from an array ref of unsigned chars.
55 inline StringRef toStringRef(ArrayRef<uint8_t> Input) {
56  return StringRef(reinterpret_cast<const char *>(Input.begin()), Input.size());
57 }
58 
59 /// Construct a string ref from an array ref of unsigned chars.
60 inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
61  return {Input.bytes_begin(), Input.bytes_end()};
62 }
63 
64 /// Interpret the given character \p C as a hexadecimal digit and return its
65 /// value.
66 ///
67 /// If \p C is not a valid hex digit, -1U is returned.
68 inline unsigned hexDigitValue(char C) {
69  struct HexTable {
70  unsigned LUT[255] = {};
71  constexpr HexTable() {
72  // Default initialize everything to invalid.
73  for (int i = 0; i < 255; ++i)
74  LUT[i] = ~0U;
75  // Initialize `0`-`9`.
76  for (int i = 0; i < 10; ++i)
77  LUT['0' + i] = i;
78  // Initialize `A`-`F` and `a`-`f`.
79  for (int i = 0; i < 6; ++i)
80  LUT['A' + i] = LUT['a' + i] = 10 + i;
81  }
82  };
83  constexpr HexTable Table;
84  return Table.LUT[static_cast<unsigned char>(C)];
85 }
86 
87 /// Checks if character \p C is one of the 10 decimal digits.
88 inline bool isDigit(char C) { return C >= '0' && C <= '9'; }
89 
90 /// Checks if character \p C is a hexadecimal numeric character.
91 inline bool isHexDigit(char C) { return hexDigitValue(C) != ~0U; }
92 
93 /// Checks if character \p C is a valid letter as classified by "C" locale.
94 inline bool isAlpha(char C) {
95  return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z');
96 }
97 
98 /// Checks whether character \p C is either a decimal digit or an uppercase or
99 /// lowercase letter as classified by "C" locale.
100 inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); }
101 
102 /// Checks whether character \p C is valid ASCII (high bit is zero).
103 inline bool isASCII(char C) { return static_cast<unsigned char>(C) <= 127; }
104 
105 /// Checks whether all characters in S are ASCII.
106 inline bool isASCII(llvm::StringRef S) {
107  for (char C : S)
108  if (LLVM_UNLIKELY(!isASCII(C)))
109  return false;
110  return true;
111 }
112 
113 /// Checks whether character \p C is printable.
114 ///
115 /// Locale-independent version of the C standard library isprint whose results
116 /// may differ on different platforms.
117 inline bool isPrint(char C) {
118  unsigned char UC = static_cast<unsigned char>(C);
119  return (0x20 <= UC) && (UC <= 0x7E);
120 }
121 
122 /// Checks whether character \p C is whitespace in the "C" locale.
123 ///
124 /// Locale-independent version of the C standard library isspace.
125 inline bool isSpace(char C) {
126  return C == ' ' || C == '\f' || C == '\n' || C == '\r' || C == '\t' ||
127  C == '\v';
128 }
129 
130 /// Returns the corresponding lowercase character if \p x is uppercase.
131 inline char toLower(char x) {
132  if (x >= 'A' && x <= 'Z')
133  return x - 'A' + 'a';
134  return x;
135 }
136 
137 /// Returns the corresponding uppercase character if \p x is lowercase.
138 inline char toUpper(char x) {
139  if (x >= 'a' && x <= 'z')
140  return x - 'a' + 'A';
141  return x;
142 }
143 
144 inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
145  char Buffer[17];
146  char *BufPtr = std::end(Buffer);
147 
148  if (X == 0) *--BufPtr = '0';
149 
150  while (X) {
151  unsigned char Mod = static_cast<unsigned char>(X) & 15;
152  *--BufPtr = hexdigit(Mod, LowerCase);
153  X >>= 4;
154  }
155 
156  return std::string(BufPtr, std::end(Buffer));
157 }
158 
159 /// Convert buffer \p Input to its hexadecimal representation.
160 /// The returned string is double the size of \p Input.
161 inline std::string toHex(StringRef Input, bool LowerCase = false) {
162  static const char *const LUT = "0123456789ABCDEF";
163  const uint8_t Offset = LowerCase ? 32 : 0;
164  size_t Length = Input.size();
165 
166  std::string Output;
167  Output.reserve(2 * Length);
168  for (size_t i = 0; i < Length; ++i) {
169  const unsigned char c = Input[i];
170  Output.push_back(LUT[c >> 4] | Offset);
171  Output.push_back(LUT[c & 15] | Offset);
172  }
173  return Output;
174 }
175 
176 inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
177  return toHex(toStringRef(Input), LowerCase);
178 }
179 
180 /// Store the binary representation of the two provided values, \p MSB and
181 /// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB
182 /// do not correspond to proper nibbles of a hexadecimal digit, this method
183 /// returns false. Otherwise, returns true.
184 inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) {
185  unsigned U1 = hexDigitValue(MSB);
186  unsigned U2 = hexDigitValue(LSB);
187  if (U1 == ~0U || U2 == ~0U)
188  return false;
189 
190  Hex = static_cast<uint8_t>((U1 << 4) | U2);
191  return true;
192 }
193 
194 /// Return the binary representation of the two provided values, \p MSB and
195 /// \p LSB, that make up the nibbles of a hexadecimal digit.
196 inline uint8_t hexFromNibbles(char MSB, char LSB) {
197  uint8_t Hex = 0;
198  bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex);
199  (void)GotHex;
200  assert(GotHex && "MSB and/or LSB do not correspond to hex digits");
201  return Hex;
202 }
203 
204 /// Convert hexadecimal string \p Input to its binary representation and store
205 /// the result in \p Output. Returns true if the binary representation could be
206 /// converted from the hexadecimal string. Returns false if \p Input contains
207 /// non-hexadecimal digits. The output string is half the size of \p Input.
208 inline bool tryGetFromHex(StringRef Input, std::string &Output) {
209  if (Input.empty())
210  return true;
211 
212  Output.reserve((Input.size() + 1) / 2);
213  if (Input.size() % 2 == 1) {
214  uint8_t Hex = 0;
215  if (!tryGetHexFromNibbles('0', Input.front(), Hex))
216  return false;
217 
218  Output.push_back(Hex);
219  Input = Input.drop_front();
220  }
221 
222  assert(Input.size() % 2 == 0);
223  while (!Input.empty()) {
224  uint8_t Hex = 0;
225  if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
226  return false;
227 
228  Output.push_back(Hex);
229  Input = Input.drop_front(2);
230  }
231  return true;
232 }
233 
234 /// Convert hexadecimal string \p Input to its binary representation.
235 /// The return string is half the size of \p Input.
236 inline std::string fromHex(StringRef Input) {
237  std::string Hex;
238  bool GotHex = tryGetFromHex(Input, Hex);
239  (void)GotHex;
240  assert(GotHex && "Input contains non hex digits");
241  return Hex;
242 }
243 
244 /// Convert the string \p S to an integer of the specified type using
245 /// the radix \p Base. If \p Base is 0, auto-detects the radix.
246 /// Returns true if the number was successfully converted, false otherwise.
247 template <typename N> bool to_integer(StringRef S, N &Num, unsigned Base = 0) {
248  return !S.getAsInteger(Base, Num);
249 }
250 
251 namespace detail {
252 template <typename N>
253 inline bool to_float(const Twine &T, N &Num, N (*StrTo)(const char *, char **)) {
254  SmallString<32> Storage;
255  StringRef S = T.toNullTerminatedStringRef(Storage);
256  char *End;
257  N Temp = StrTo(S.data(), &End);
258  if (*End != '\0')
259  return false;
260  Num = Temp;
261  return true;
262 }
263 }
264 
265 inline bool to_float(const Twine &T, float &Num) {
266  return detail::to_float(T, Num, strtof);
267 }
268 
269 inline bool to_float(const Twine &T, double &Num) {
270  return detail::to_float(T, Num, strtod);
271 }
272 
273 inline bool to_float(const Twine &T, long double &Num) {
274  return detail::to_float(T, Num, strtold);
275 }
276 
277 inline std::string utostr(uint64_t X, bool isNeg = false) {
278  char Buffer[21];
279  char *BufPtr = std::end(Buffer);
280 
281  if (X == 0) *--BufPtr = '0'; // Handle special case...
282 
283  while (X) {
284  *--BufPtr = '0' + char(X % 10);
285  X /= 10;
286  }
287 
288  if (isNeg) *--BufPtr = '-'; // Add negative sign...
289  return std::string(BufPtr, std::end(Buffer));
290 }
291 
292 inline std::string itostr(int64_t X) {
293  if (X < 0)
294  return utostr(static_cast<uint64_t>(1) + ~static_cast<uint64_t>(X), true);
295  else
296  return utostr(static_cast<uint64_t>(X));
297 }
298 
299 /// StrInStrNoCase - Portable version of strcasestr. Locates the first
300 /// occurrence of string 's1' in string 's2', ignoring case. Returns
301 /// the offset of s2 in s1 or npos if s2 cannot be found.
302 StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2);
303 
304 /// getToken - This function extracts one token from source, ignoring any
305 /// leading characters that appear in the Delimiters string, and ending the
306 /// token at any of the characters that appear in the Delimiters string. If
307 /// there are no tokens in the source string, an empty string is returned.
308 /// The function returns a pair containing the extracted token and the
309 /// remaining tail string.
310 std::pair<StringRef, StringRef> getToken(StringRef Source,
311  StringRef Delimiters = " \t\n\v\f\r");
312 
313 /// SplitString - Split up the specified string according to the specified
314 /// delimiters, appending the result fragments to the output list.
315 void SplitString(StringRef Source,
316  SmallVectorImpl<StringRef> &OutFragments,
317  StringRef Delimiters = " \t\n\v\f\r");
318 
319 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
320 inline StringRef getOrdinalSuffix(unsigned Val) {
321  // It is critically important that we do this perfectly for
322  // user-written sequences with over 100 elements.
323  switch (Val % 100) {
324  case 11:
325  case 12:
326  case 13:
327  return "th";
328  default:
329  switch (Val % 10) {
330  case 1: return "st";
331  case 2: return "nd";
332  case 3: return "rd";
333  default: return "th";
334  }
335  }
336 }
337 
338 /// Print each character of the specified string, escaping it if it is not
339 /// printable or if it is an escape char.
340 void printEscapedString(StringRef Name, raw_ostream &Out);
341 
342 /// Print each character of the specified string, escaping HTML special
343 /// characters.
344 void printHTMLEscaped(StringRef String, raw_ostream &Out);
345 
346 /// printLowerCase - Print each character as lowercase if it is uppercase.
347 void printLowerCase(StringRef String, raw_ostream &Out);
348 
349 /// Converts a string from camel-case to snake-case by replacing all uppercase
350 /// letters with '_' followed by the letter in lowercase, except if the
351 /// uppercase letter is the first character of the string.
352 std::string convertToSnakeFromCamelCase(StringRef input);
353 
354 /// Converts a string from snake-case to camel-case by replacing all occurrences
355 /// of '_' followed by a lowercase letter with the letter in uppercase.
356 /// Optionally allow capitalization of the first letter (if it is a lowercase
357 /// letter)
358 std::string convertToCamelFromSnakeCase(StringRef input,
359  bool capitalizeFirst = false);
360 
361 namespace detail {
362 
363 template <typename IteratorT>
364 inline std::string join_impl(IteratorT Begin, IteratorT End,
365  StringRef Separator, std::input_iterator_tag) {
366  std::string S;
367  if (Begin == End)
368  return S;
369 
370  S += (*Begin);
371  while (++Begin != End) {
372  S += Separator;
373  S += (*Begin);
374  }
375  return S;
376 }
377 
378 template <typename IteratorT>
379 inline std::string join_impl(IteratorT Begin, IteratorT End,
380  StringRef Separator, std::forward_iterator_tag) {
381  std::string S;
382  if (Begin == End)
383  return S;
384 
385  size_t Len = (std::distance(Begin, End) - 1) * Separator.size();
386  for (IteratorT I = Begin; I != End; ++I)
387  Len += (*I).size();
388  S.reserve(Len);
389  size_t PrevCapacity = S.capacity();
390  (void)PrevCapacity;
391  S += (*Begin);
392  while (++Begin != End) {
393  S += Separator;
394  S += (*Begin);
395  }
396  assert(PrevCapacity == S.capacity() && "String grew during building");
397  return S;
398 }
399 
400 template <typename Sep>
401 inline void join_items_impl(std::string &Result, Sep Separator) {}
402 
403 template <typename Sep, typename Arg>
404 inline void join_items_impl(std::string &Result, Sep Separator,
405  const Arg &Item) {
406  Result += Item;
407 }
408 
409 template <typename Sep, typename Arg1, typename... Args>
410 inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1,
411  Args &&... Items) {
412  Result += A1;
413  Result += Separator;
414  join_items_impl(Result, Separator, std::forward<Args>(Items)...);
415 }
416 
417 inline size_t join_one_item_size(char) { return 1; }
418 inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; }
419 
420 template <typename T> inline size_t join_one_item_size(const T &Str) {
421  return Str.size();
422 }
423 
424 inline size_t join_items_size() { return 0; }
425 
426 template <typename A1> inline size_t join_items_size(const A1 &A) {
427  return join_one_item_size(A);
428 }
429 template <typename A1, typename... Args>
430 inline size_t join_items_size(const A1 &A, Args &&... Items) {
431  return join_one_item_size(A) + join_items_size(std::forward<Args>(Items)...);
432 }
433 
434 } // end namespace detail
435 
436 /// Joins the strings in the range [Begin, End), adding Separator between
437 /// the elements.
438 template <typename IteratorT>
439 inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
440  using tag = typename std::iterator_traits<IteratorT>::iterator_category;
441  return detail::join_impl(Begin, End, Separator, tag());
442 }
443 
444 /// Joins the strings in the range [R.begin(), R.end()), adding Separator
445 /// between the elements.
446 template <typename Range>
447 inline std::string join(Range &&R, StringRef Separator) {
448  return join(R.begin(), R.end(), Separator);
449 }
450 
451 /// Joins the strings in the parameter pack \p Items, adding \p Separator
452 /// between the elements. All arguments must be implicitly convertible to
453 /// std::string, or there should be an overload of std::string::operator+=()
454 /// that accepts the argument explicitly.
455 template <typename Sep, typename... Args>
456 inline std::string join_items(Sep Separator, Args &&... Items) {
457  std::string Result;
458  if (sizeof...(Items) == 0)
459  return Result;
460 
461  size_t NS = detail::join_one_item_size(Separator);
462  size_t NI = detail::join_items_size(std::forward<Args>(Items)...);
463  Result.reserve(NI + (sizeof...(Items) - 1) * NS + 1);
464  detail::join_items_impl(Result, Separator, std::forward<Args>(Items)...);
465  return Result;
466 }
467 
468 /// A helper class to return the specified delimiter string after the first
469 /// invocation of operator StringRef(). Used to generate a comma-separated
470 /// list from a loop like so:
471 ///
472 /// \code
473 /// ListSeparator LS;
474 /// for (auto &I : C)
475 /// OS << LS << I.getName();
476 /// \end
477 class ListSeparator {
478  bool First = true;
479  StringRef Separator;
480 
481 public:
482  ListSeparator(StringRef Separator = ", ") : Separator(Separator) {}
483  operator StringRef() {
484  if (First) {
485  First = false;
486  return {};
487  }
488  return Separator;
489  }
490 };
491 
492 } // end namespace llvm
493 
494 #endif // LLVM_ADT_STRINGEXTRAS_H
i
i
Definition: README.txt:29
llvm
Definition: AllocatorList.h:23
llvm::StringRef::size_type
size_t size_type
Definition: StringRef.h:64
StringRef.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::sys::locale::isPrint
bool isPrint(int c)
Definition: Locale.cpp:13
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
s2
_bar mov r0 mov r1 fldd LCPI1_0 fmrrd d0 bl _foo fmdrr r5 fmsr s2
Definition: README.txt:159
SmallString.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
llvm::rust_demangle::BasicType::Str
@ Str
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::dwarf::toStringRef
StringRef toStringRef(const Optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
Definition: DWARFFormValue.h:177
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::symbolize::toHex
static std::string toHex(uint64_t V)
Definition: DIPrinter.cpp:276
I
#define I(x, y, z)
Definition: MD5.cpp:59
ArrayRef.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
isDigit
static bool isDigit(const char C)
Definition: RustDemangle.cpp:81
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
x
TODO unsigned x
Definition: README.txt:10
s1
int s1
Definition: README.txt:182
N
#define N
isHexDigit
static bool isHexDigit(const char C)
Definition: RustDemangle.cpp:83
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
LLVM_UNLIKELY
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:220
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58