LLVM  16.0.0git
UnicodeNameToCodepoint.cpp
Go to the documentation of this file.
1 //===- llvm/Support/UnicodeNameToCodepoint.cpp - Unicode character properties
2 //-*- C++ -*-===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements functions to map the name or alias of a unicode
11 // character to its codepoint.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/Unicode.h"
19 
20 namespace llvm {
21 namespace sys {
22 namespace unicode {
23 
24 extern const char *UnicodeNameToCodepointDict;
25 extern const uint8_t *UnicodeNameToCodepointIndex;
26 extern const std::size_t UnicodeNameToCodepointIndexSize;
27 extern const std::size_t UnicodeNameToCodepointLargestNameSize;
28 
30 
31 struct Node {
32  bool IsRoot = false;
33  char32_t Value = 0xFFFFFFFF;
35  bool HasSibling = false;
38  const Node *Parent = nullptr;
39 
40  constexpr bool isValid() const {
41  return !Name.empty() || Value == 0xFFFFFFFF;
42  }
43  constexpr bool hasChildren() const { return ChildrenOffset != 0 || IsRoot; }
44 
45  std::string fullName() const {
46  std::string S;
47  // Reserve enough space for most unicode code points.
48  // The chosen value represent the 99th percentile of name size as of
49  // Unicode 14.
50  S.reserve(46);
51  const Node *N = this;
52  while (N) {
53  std::reverse_copy(N->Name.begin(), N->Name.end(), std::back_inserter(S));
54  N = N->Parent;
55  }
56  std::reverse(S.begin(), S.end());
57  return S;
58  }
59 };
60 
61 static Node createRoot() {
62  Node N;
63  N.IsRoot = true;
64  N.ChildrenOffset = 1;
65  N.Size = 1;
66  return N;
67 }
68 
69 static Node readNode(uint32_t Offset, const Node *Parent = nullptr) {
70  if (Offset == 0)
71  return createRoot();
72 
73  uint32_t Origin = Offset;
74  Node N;
75  N.Parent = Parent;
76  uint8_t NameInfo = UnicodeNameToCodepointIndex[Offset++];
77  if (Offset + 6 >= UnicodeNameToCodepointIndexSize)
78  return N;
79 
80  bool LongName = NameInfo & 0x40;
81  bool HasValue = NameInfo & 0x80;
82  std::size_t Size = NameInfo & ~0xC0;
83  if (LongName) {
84  uint32_t NameOffset = (UnicodeNameToCodepointIndex[Offset++] << 8);
85  NameOffset |= UnicodeNameToCodepointIndex[Offset++];
86  N.Name = StringRef(UnicodeNameToCodepointDict + NameOffset, Size);
87  } else {
88  N.Name = StringRef(UnicodeNameToCodepointDict + Size, 1);
89  }
90  if (HasValue) {
91  uint8_t H = UnicodeNameToCodepointIndex[Offset++];
92  uint8_t M = UnicodeNameToCodepointIndex[Offset++];
93  uint8_t L = UnicodeNameToCodepointIndex[Offset++];
94  N.Value = ((H << 16) | (M << 8) | L) >> 3;
95 
96  bool HasChildren = L & 0x02;
97  N.HasSibling = L & 0x01;
98 
99  if (HasChildren) {
100  N.ChildrenOffset = UnicodeNameToCodepointIndex[Offset++] << 16;
101  N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++] << 8;
102  N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++];
103  }
104  } else {
105  uint8_t H = UnicodeNameToCodepointIndex[Offset++];
106  N.HasSibling = H & 0x80;
107  bool HasChildren = H & 0x40;
108  H &= ~0xC0;
109  if (HasChildren) {
110  N.ChildrenOffset = (H << 16);
111  N.ChildrenOffset |=
112  (uint32_t(UnicodeNameToCodepointIndex[Offset++]) << 8);
113  N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++];
114  }
115  }
116  N.Size = Offset - Origin;
117  return N;
118 }
119 
120 static bool startsWith(StringRef Name, StringRef Needle, bool Strict,
121  std::size_t &Consummed, char &PreviousCharInName,
122  char &PreviousCharInNeedle, bool IsPrefix = false) {
123 
124  Consummed = 0;
125  if (Strict) {
126  if (!Name.startswith(Needle))
127  return false;
128  Consummed = Needle.size();
129  return true;
130  }
131  if (Needle.empty())
132  return true;
133 
134  auto NamePos = Name.begin();
135  auto NeedlePos = Needle.begin();
136 
137  char PreviousCharInNameOrigin = PreviousCharInName;
138  char PreviousCharInNeedleOrigin = PreviousCharInNeedle;
139 
140  auto IgnoreSpaces = [](auto It, auto End, char &PreviousChar,
141  bool IgnoreEnd = false) {
142  while (It != End) {
143  const auto Next = std::next(It);
144  // Ignore spaces, underscore, medial hyphens
145  // https://unicode.org/reports/tr44/#UAX44-LM2.
146  bool Ignore =
147  *It == ' ' || *It == '_' ||
148  (*It == '-' && isAlnum(PreviousChar) &&
149  ((Next != End && isAlnum(*Next)) || (Next == End && IgnoreEnd)));
150  PreviousChar = *It;
151  if (!Ignore)
152  break;
153  ++It;
154  }
155  return It;
156  };
157 
158  while (true) {
159  NamePos = IgnoreSpaces(NamePos, Name.end(), PreviousCharInName);
160  NeedlePos =
161  IgnoreSpaces(NeedlePos, Needle.end(), PreviousCharInNeedle, IsPrefix);
162  if (NeedlePos == Needle.end())
163  break;
164  if (NamePos == Name.end())
165  break;
166  if (toUpper(*NeedlePos) != toUpper(*NamePos))
167  break;
168  NeedlePos++;
169  NamePos++;
170  }
171  Consummed = std::distance(Name.begin(), NamePos);
172  if (NeedlePos != Needle.end()) {
173  PreviousCharInName = PreviousCharInNameOrigin;
174  PreviousCharInNeedle = PreviousCharInNeedleOrigin;
175  }
176  return NeedlePos == Needle.end();
177 }
178 
179 static std::tuple<Node, bool, uint32_t>
181  char PreviousCharInName, char PreviousCharInNeedle,
182  BufferType &Buffer, const Node *Parent = nullptr) {
183  Node N = readNode(Offset, Parent);
184  std::size_t Consummed = 0;
185  bool DoesStartWith =
186  N.IsRoot || startsWith(Name, N.Name, Strict, Consummed,
187  PreviousCharInName, PreviousCharInNeedle);
188  if (!DoesStartWith)
189  return std::make_tuple(N, false, 0);
190 
191  if (Name.size() - Consummed == 0 && N.Value != 0xFFFFFFFF)
192  return std::make_tuple(N, true, N.Value);
193 
194  if (N.hasChildren()) {
195  uint32_t ChildOffset = N.ChildrenOffset;
196  for (;;) {
197  Node C;
198  bool Matches;
199  uint32_t Value;
200  std::tie(C, Matches, Value) =
201  compareNode(ChildOffset, Name.substr(Consummed), Strict,
202  PreviousCharInName, PreviousCharInNeedle, Buffer, &N);
203  if (Matches) {
204  std::reverse_copy(C.Name.begin(), C.Name.end(),
205  std::back_inserter(Buffer));
206  return std::make_tuple(N, true, Value);
207  }
208  ChildOffset += C.Size;
209  if (!C.HasSibling)
210  break;
211  }
212  }
213  return std::make_tuple(N, false, 0);
214 }
215 
216 static std::tuple<Node, bool, uint32_t>
218  return compareNode(Offset, Name, Strict, 0, 0, Buffer);
219 }
220 
221 // clang-format off
222 constexpr const char *const HangulSyllables[][3] = {
223  { "G", "A", "" },
224  { "GG", "AE", "G" },
225  { "N", "YA", "GG" },
226  { "D", "YAE", "GS" },
227  { "DD", "EO", "N", },
228  { "R", "E", "NJ" },
229  { "M", "YEO", "NH" },
230  { "B", "YE", "D" },
231  { "BB", "O", "L" },
232  { "S", "WA", "LG" },
233  { "SS", "WAE", "LM" },
234  { "", "OE", "LB" },
235  { "J", "YO", "LS" },
236  { "JJ", "U", "LT" },
237  { "C", "WEO", "LP" },
238  { "K", "WE", "LH" },
239  { "T", "WI", "M" },
240  { "P", "YU", "B" },
241  { "H", "EU", "BS" },
242  { 0, "YI", "S" },
243  { 0, "I", "SS" },
244  { 0, 0, "NG" },
245  { 0, 0, "J" },
246  { 0, 0, "C" },
247  { 0, 0, "K" },
248  { 0, 0, "T" },
249  { 0, 0, "P" },
250  { 0, 0, "H" }
251  };
252 // clang-format on
253 
254 // Unicode 14.0
255 // 3.12 Conjoining Jamo Behavior Common constants
256 constexpr const char32_t SBase = 0xAC00;
257 constexpr const uint32_t LCount = 19;
258 constexpr const uint32_t VCount = 21;
259 constexpr const uint32_t TCount = 28;
260 
261 static std::size_t findSyllable(StringRef Name, bool Strict,
262  char &PreviousInName, int &Pos, int Column) {
263  assert(Column == 0 || Column == 1 || Column == 2);
264  static std::size_t CountPerColumn[] = {LCount, VCount, TCount};
265  char NeedleStart = 0;
266  int Len = -1;
267  int Prev = PreviousInName;
268  for (std::size_t I = 0; I < CountPerColumn[Column]; I++) {
269  StringRef Syllable(HangulSyllables[I][Column]);
270  if (int(Syllable.size()) <= Len)
271  continue;
272  std::size_t Consummed = 0;
273  char PreviousInNameCopy = PreviousInName;
274  bool DoesStartWith = startsWith(Name, Syllable, Strict, Consummed,
275  PreviousInNameCopy, NeedleStart);
276  if (!DoesStartWith)
277  continue;
278  Len = Consummed;
279  Pos = I;
280  Prev = PreviousInNameCopy;
281  }
282  if (Len == -1)
283  return 0;
284  PreviousInName = Prev;
285  return size_t(Len);
286 }
287 
290  Buffer.clear();
291  // Hangul Syllable Decomposition
292  std::size_t Consummed = 0;
293  char NameStart = 0, NeedleStart = 0;
294  bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed,
295  NameStart, NeedleStart);
296  if (!DoesStartWith)
297  return None;
298  Name = Name.substr(Consummed);
299  int L = -1, V = -1, T = -1;
300  Name = Name.substr(findSyllable(Name, Strict, NameStart, L, 0));
301  Name = Name.substr(findSyllable(Name, Strict, NameStart, V, 1));
302  Name = Name.substr(findSyllable(Name, Strict, NameStart, T, 2));
303  if (L != -1 && V != -1 && T != -1 && Name.empty()) {
304  if (!Strict) {
305  Buffer.append("HANGUL SYLLABLE ");
306  if (L != -1)
307  Buffer.append(HangulSyllables[L][0]);
308  if (V != -1)
309  Buffer.append(HangulSyllables[V][1]);
310  if (T != -1)
311  Buffer.append(HangulSyllables[T][2]);
312  }
313  return SBase + (std::uint32_t(L) * VCount + std::uint32_t(V)) * TCount +
314  std::uint32_t(T);
315  }
316  // Otherwise, it's an illegal syllable name.
317  return None;
318 }
319 
324 };
325 
326 // Unicode 14.0 Table 4-8. Name Derivation Rule Prefix Strings
327 // This needs to be kept in sync with
328 // llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
330  {"CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4DBF},
331  {"CJK UNIFIED IDEOGRAPH-", 0x4E00, 0x9FFC},
332  {"CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2A6DD},
333  {"CJK UNIFIED IDEOGRAPH-", 0x2A700, 0x2B734},
334  {"CJK UNIFIED IDEOGRAPH-", 0x2B740, 0x2B81D},
335  {"CJK UNIFIED IDEOGRAPH-", 0x2B820, 0x2CEA1},
336  {"CJK UNIFIED IDEOGRAPH-", 0x2CEB0, 0x2EBE0},
337  {"CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134A},
338  {"TANGUT IDEOGRAPH-", 0x17000, 0x187F7},
339  {"TANGUT IDEOGRAPH-", 0x18D00, 0x18D08},
340  {"KHITAN SMALL SCRIPT CHARACTER-", 0x18B00, 0x18CD5},
341  {"NUSHU CHARACTER-", 0x1B170, 0x1B2FB},
342  {"CJK COMPATIBILITY IDEOGRAPH-", 0xF900, 0xFA6D},
343  {"CJK COMPATIBILITY IDEOGRAPH-", 0xFA70, 0xFAD9},
344  {"CJK COMPATIBILITY IDEOGRAPH-", 0x2F800, 0x2FA1D},
345 };
346 
349  for (auto &&Item : GeneratedNamesDataTable) {
350  Buffer.clear();
351  std::size_t Consummed = 0;
352  char NameStart = 0, NeedleStart = 0;
353  bool DoesStartWith = startsWith(Name, Item.Prefix, Strict, Consummed,
354  NameStart, NeedleStart, /*isPrefix*/ true);
355  if (!DoesStartWith)
356  continue;
357  auto Number = Name.substr(Consummed);
358  unsigned long long V = 0;
359  // Be consistent about mandating upper casing.
360  if (Strict &&
361  llvm::any_of(Number, [](char C) { return C >= 'a' && C <= 'f'; }))
362  return {};
363  if (getAsUnsignedInteger(Number, 16, V) || V < Item.Start || V > Item.End)
364  continue;
365  if (!Strict) {
366  Buffer.append(Item.Prefix);
367  Buffer.append(utohexstr(V, true));
368  }
369  return V;
370  }
371  return None;
372 }
373 
375  BufferType &Buffer) {
376  if (Name.empty())
377  return None;
378 
380  if (!Res)
381  Res = nameToGeneratedCodePoint(Name, Strict, Buffer);
382  if (Res)
383  return *Res;
384 
385  Buffer.clear();
386  Node Node;
387  bool Matches;
388  uint32_t Value;
389  std::tie(Node, Matches, Value) = compareNode(0, Name, Strict, Buffer);
390  if (Matches) {
391  std::reverse(Buffer.begin(), Buffer.end());
392  // UAX44-LM2. Ignore case, whitespace, underscore ('_'), and all medial
393  // hyphens except the hyphen in U+1180 HANGUL JUNGSEONG O-E.
394  if (!Strict && Value == 0x116c &&
395  Name.find_insensitive("O-E") != StringRef::npos) {
396  Buffer = "HANGUL JUNGSEONG O-E";
397  Value = 0x1180;
398  }
399  return Value;
400  }
401  return None;
402 }
403 
405 
406  BufferType Buffer;
407  auto Opt = nameToCodepoint(Name, true, Buffer);
408  return Opt;
409 }
410 
413  BufferType Buffer;
414  auto Opt = nameToCodepoint(Name, false, Buffer);
415  if (!Opt)
416  return None;
417  return LooseMatchingResult{*Opt, Buffer};
418 }
419 
420 // Find the unicode character whose editing distance to Pattern
421 // is shortest, using the Wagner–Fischer algorithm.
423 nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
424  // We maintain a fixed size vector of matches,
425  // sorted by distance
426  // The worst match (with the biggest distance) are discarded when new elements
427  // are added.
428  std::size_t LargestEditDistance = 0;
430  Matches.reserve(MaxMatchesCount + 1);
431 
432  auto Insert = [&](const Node &Node, uint32_t Distance,
433  char32_t Value) -> bool {
434  if (Distance > LargestEditDistance) {
435  if (Matches.size() == MaxMatchesCount)
436  return false;
437  LargestEditDistance = Distance;
438  }
439  // To avoid allocations, the creation of the name is delayed
440  // as much as possible.
441  std::string Name;
442  auto GetName = [&] {
443  if (Name.empty())
444  Name = Node.fullName();
445  return Name;
446  };
447 
448  auto It = std::lower_bound(
449  Matches.begin(), Matches.end(), Distance,
450  [&](const MatchForCodepointName &a, std::size_t Distance) {
451  if (Distance == a.Distance)
452  return a.Name < GetName();
453  return a.Distance < Distance;
454  });
455  if (It == Matches.end() && Matches.size() == MaxMatchesCount)
456  return false;
457 
458  MatchForCodepointName M{GetName(), Distance, Value};
459  Matches.insert(It, std::move(M));
460  if (Matches.size() > MaxMatchesCount)
461  Matches.pop_back();
462  return true;
463  };
464 
465  // We ignore case, space, hyphens, etc,
466  // in both the search pattern and the prospective names.
467  auto Normalize = [](StringRef Name) {
468  std::string Out;
469  Out.reserve(Name.size());
470  for (char C : Name) {
471  if (isAlnum(C))
472  Out.push_back(toUpper(C));
473  }
474  return Out;
475  };
476  std::string NormalizedName = Normalize(Pattern);
477 
478  // Allocate a matrix big enough for longest names.
479  const std::size_t Columns =
480  std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) +
481  1;
482 
483  LLVM_ATTRIBUTE_UNUSED static std::size_t Rows =
485 
486  std::vector<char> Distances(
487  Columns * (UnicodeNameToCodepointLargestNameSize + 1), 0);
488 
489  auto Get = [&Distances, Columns](size_t Column, std::size_t Row) -> char & {
490  assert(Column < Columns);
491  assert(Row < Rows);
492  return Distances[Row * Columns + Column];
493  };
494 
495  for (std::size_t I = 0; I < Columns; I++)
496  Get(I, 0) = I;
497 
498  // Visit the childrens,
499  // Filling (and overriding) the matrix for the name fragment of each node
500  // iteratively. CompleteName is used to collect the actual name of potential
501  // match, respecting case and spacing.
502  auto VisitNode = [&](const Node &N, std::size_t Row,
503  auto &VisitNode) -> void {
504  std::size_t J = 0;
505  for (; J < N.Name.size(); J++) {
506  if (!isAlnum(N.Name[J]))
507  continue;
508 
509  Get(0, Row) = Row;
510 
511  for (std::size_t I = 1; I < Columns; I++) {
512  const int Delete = Get(I - 1, Row) + 1;
513  const int Insert = Get(I, Row - 1) + 1;
514 
515  const int Replace =
516  Get(I - 1, Row - 1) + (NormalizedName[I - 1] != N.Name[J] ? 1 : 0);
517 
518  Get(I, Row) = std::min(Insert, std::min(Delete, Replace));
519  }
520 
521  Row++;
522  }
523 
524  unsigned Cost = Get(Columns - 1, Row - 1);
525  if (N.Value != 0xFFFFFFFF) {
526  Insert(N, Cost, N.Value);
527  }
528 
529  if (N.hasChildren()) {
530  auto ChildOffset = N.ChildrenOffset;
531  for (;;) {
532  Node C = readNode(ChildOffset, &N);
533  ChildOffset += C.Size;
534  if (!C.isValid())
535  break;
536  VisitNode(C, Row, VisitNode);
537  if (!C.HasSibling)
538  break;
539  }
540  }
541  };
542 
543  Node Root = createRoot();
544  VisitNode(Root, 1, VisitNode);
545  return Matches;
546 }
547 
548 } // namespace unicode
549 
550 } // namespace sys
551 } // namespace llvm
llvm::sys::unicode::compareNode
static std::tuple< Node, bool, uint32_t > compareNode(uint32_t Offset, StringRef Name, bool Strict, char PreviousCharInName, char PreviousCharInNeedle, BufferType &Buffer, const Node *Parent=nullptr)
Definition: UnicodeNameToCodepoint.cpp:180
llvm::sys::unicode::readNode
static Node readNode(uint32_t Offset, const Node *Parent=nullptr)
Definition: UnicodeNameToCodepoint.cpp:69
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::sys::unicode::Node::Size
uint32_t Size
Definition: UnicodeNameToCodepoint.cpp:36
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::sys::unicode::LooseMatchingResult
Definition: Unicode.h:72
llvm::sys::unicode::Node::ChildrenOffset
uint32_t ChildrenOffset
Definition: UnicodeNameToCodepoint.cpp:34
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::sys::unicode::GeneratedNamesData
Definition: UnicodeNameToCodepoint.cpp:320
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1737
StringRef.h
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:52
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:542
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
llvm::sys::unicode::nameToCodepointStrict
Optional< char32_t > nameToCodepointStrict(StringRef Name)
Maps the name or the alias of a Unicode character to its associated codepoints.
Definition: UnicodeNameToCodepoint.cpp:404
llvm::sys::unicode::Node::IsRoot
bool IsRoot
Definition: UnicodeNameToCodepoint.cpp:32
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:381
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
STLExtras.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
llvm::sys::unicode::nameToHangulCodePoint
static llvm::Optional< char32_t > nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer)
Definition: UnicodeNameToCodepoint.cpp:289
size_t
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
llvm::sys::unicode::UnicodeNameToCodepointDict
const char * UnicodeNameToCodepointDict
Definition: UnicodeNameToCodepointGenerated.cpp:71
llvm::sys::unicode::UnicodeNameToCodepointIndex
const uint8_t * UnicodeNameToCodepointIndex
Definition: UnicodeNameToCodepointGenerated.cpp:72
llvm::sys::unicode::Node
Definition: UnicodeNameToCodepoint.cpp:31
llvm::sys::unicode::GeneratedNamesData::Prefix
StringRef Prefix
Definition: UnicodeNameToCodepoint.cpp:321
llvm::sys::unicode::Node::hasChildren
constexpr bool hasChildren() const
Definition: UnicodeNameToCodepoint.cpp:43
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::sys::unicode::GeneratedNamesData::End
uint32_t End
Definition: UnicodeNameToCodepoint.cpp:323
llvm::getAsUnsignedInteger
bool getAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result)
Helper functions for StringRef::getAsInteger.
Definition: StringRef.cpp:492
llvm::sys::unicode::findSyllable
static std::size_t findSyllable(StringRef Name, bool Strict, char &PreviousInName, int &Pos, int Column)
Definition: UnicodeNameToCodepoint.cpp:261
llvm::sys::unicode::GeneratedNamesData::Start
uint32_t Start
Definition: UnicodeNameToCodepoint.cpp:322
llvm::sys::unicode::Node::Name
StringRef Name
Definition: UnicodeNameToCodepoint.cpp:37
llvm::None
const NoneType None
Definition: None.h:24
llvm::sys::unicode::LCount
constexpr const uint32_t LCount
Definition: UnicodeNameToCodepoint.cpp:257
llvm::SmallString< 64 >
llvm::sys::unicode::Node::Parent
const Node * Parent
Definition: UnicodeNameToCodepoint.cpp:38
llvm::sys::unicode::SBase
constexpr const char32_t SBase
Definition: UnicodeNameToCodepoint.cpp:256
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
llvm::sys::unicode::Node::isValid
constexpr bool isValid() const
Definition: UnicodeNameToCodepoint.cpp:40
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:105
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::sys::unicode::nearestMatchesForCodepointName
SmallVector< MatchForCodepointName > nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount)
Definition: UnicodeNameToCodepoint.cpp:423
Normalize
@ Normalize
Normalize - Normalize according to the given loops.
Definition: ScalarEvolutionNormalization.cpp:24
llvm::sys::unicode::createRoot
static Node createRoot()
Definition: UnicodeNameToCodepoint.cpp:61
llvm::sys::unicode::TCount
constexpr const uint32_t TCount
Definition: UnicodeNameToCodepoint.cpp:259
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1625
llvm::sys::unicode::UnicodeNameToCodepointIndexSize
const std::size_t UnicodeNameToCodepointIndexSize
Definition: UnicodeNameToCodepointGenerated.cpp:73
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::sys::unicode::VCount
constexpr const uint32_t VCount
Definition: UnicodeNameToCodepoint.cpp:258
Number
uint32_t Number
Definition: Profile.cpp:47
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::sys::unicode::MatchForCodepointName
Definition: Unicode.h:79
Unicode.h
llvm::sys::unicode::Node::fullName
std::string fullName() const
Definition: UnicodeNameToCodepoint.cpp:45
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:129
llvm::sys::unicode::GeneratedNamesDataTable
static const GeneratedNamesData GeneratedNamesDataTable[]
Definition: UnicodeNameToCodepoint.cpp:329
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::InlinePriorityMode::Cost
@ Cost
llvm::sys::unicode::Node::HasSibling
bool HasSibling
Definition: UnicodeNameToCodepoint.cpp:35
llvm::sys::unicode::nameToCodepoint
static llvm::Optional< char32_t > nameToCodepoint(StringRef Name, bool Strict, BufferType &Buffer)
Definition: UnicodeNameToCodepoint.cpp:374
llvm::FPOpFusion::Strict
@ Strict
Definition: TargetOptions.h:39
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::sys::unicode::nameToGeneratedCodePoint
static llvm::Optional< char32_t > nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer)
Definition: UnicodeNameToCodepoint.cpp:348
llvm::HasValue
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:598
llvm::sys::unicode::startsWith
static bool startsWith(StringRef Name, StringRef Needle, bool Strict, std::size_t &Consummed, char &PreviousCharInName, char &PreviousCharInNeedle, bool IsPrefix=false)
Definition: UnicodeNameToCodepoint.cpp:120
llvm::Pattern
Definition: FileCheckImpl.h:614
N
#define N
llvm::sys::unicode::HangulSyllables
constexpr const char *const HangulSyllables[][3]
Definition: UnicodeNameToCodepoint.cpp:222
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:651
llvm::sys::unicode::UnicodeNameToCodepointLargestNameSize
const std::size_t UnicodeNameToCodepointLargestNameSize
Definition: UnicodeNameToCodepointGenerated.cpp:74
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:103
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::sys::unicode::nameToCodepointLooseMatching
Optional< LooseMatchingResult > nameToCodepointLooseMatching(StringRef Name)
Definition: UnicodeNameToCodepoint.cpp:412
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:793