LLVM  9.0.0svn
FileCheck.cpp
Go to the documentation of this file.
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileCheck does a line-by line check of a file that validates whether it
10 // contains the expected content. This is useful for regression tests etc.
11 //
12 // This file implements most of the API that will be used by the FileCheck utility
13 // as well as various unittests.
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Support/FileCheck.h"
17 #include "llvm/ADT/StringSet.h"
19 #include <cstdint>
20 #include <list>
21 #include <map>
22 #include <tuple>
23 #include <utility>
24 
25 using namespace llvm;
26 
27 /// Parses the given string into the Pattern.
28 ///
29 /// \p Prefix provides which prefix is being matched, \p SM provides the
30 /// SourceMgr used for error reports, and \p LineNumber is the line number in
31 /// the input file from which the pattern string was read. Returns true in
32 /// case of an error, false otherwise.
34  SourceMgr &SM, unsigned LineNumber,
35  const FileCheckRequest &Req) {
36  bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
37 
38  this->LineNumber = LineNumber;
39  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
40 
41  if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
42  // Ignore trailing whitespace.
43  while (!PatternStr.empty() &&
44  (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
45  PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
46 
47  // Check that there is something on the line.
48  if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
49  SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
50  "found empty check string with prefix '" + Prefix + ":'");
51  return true;
52  }
53 
54  if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
55  SM.PrintMessage(
56  PatternLoc, SourceMgr::DK_Error,
57  "found non-empty check string for empty check with prefix '" + Prefix +
58  ":'");
59  return true;
60  }
61 
62  if (CheckTy == Check::CheckEmpty) {
63  RegExStr = "(\n$)";
64  return false;
65  }
66 
67  // Check to see if this is a fixed string, or if it has regex pieces.
68  if (!MatchFullLinesHere &&
69  (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
70  PatternStr.find("[[") == StringRef::npos))) {
71  FixedStr = PatternStr;
72  return false;
73  }
74 
75  if (MatchFullLinesHere) {
76  RegExStr += '^';
77  if (!Req.NoCanonicalizeWhiteSpace)
78  RegExStr += " *";
79  }
80 
81  // Paren value #0 is for the fully matched string. Any new parenthesized
82  // values add from there.
83  unsigned CurParen = 1;
84 
85  // Otherwise, there is at least one regex piece. Build up the regex pattern
86  // by escaping scary characters in fixed strings, building up one big regex.
87  while (!PatternStr.empty()) {
88  // RegEx matches.
89  if (PatternStr.startswith("{{")) {
90  // This is the start of a regex match. Scan for the }}.
91  size_t End = PatternStr.find("}}");
92  if (End == StringRef::npos) {
93  SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
95  "found start of regex string with no end '}}'");
96  return true;
97  }
98 
99  // Enclose {{}} patterns in parens just like [[]] even though we're not
100  // capturing the result for any purpose. This is required in case the
101  // expression contains an alternation like: CHECK: abc{{x|z}}def. We
102  // want this to turn into: "abc(x|z)def" not "abcx|zdef".
103  RegExStr += '(';
104  ++CurParen;
105 
106  if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
107  return true;
108  RegExStr += ')';
109 
110  PatternStr = PatternStr.substr(End + 2);
111  continue;
112  }
113 
114  // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
115  // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
116  // second form is [[foo]] which is a reference to foo. The variable name
117  // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
118  // it. This is to catch some common errors.
119  if (PatternStr.startswith("[[")) {
120  // Find the closing bracket pair ending the match. End is going to be an
121  // offset relative to the beginning of the match string.
122  size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
123 
124  if (End == StringRef::npos) {
125  SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
127  "invalid named regex reference, no ]] found");
128  return true;
129  }
130 
131  StringRef MatchStr = PatternStr.substr(2, End);
132  PatternStr = PatternStr.substr(End + 4);
133 
134  // Get the regex name (e.g. "foo").
135  size_t NameEnd = MatchStr.find(':');
136  StringRef Name = MatchStr.substr(0, NameEnd);
137 
138  if (Name.empty()) {
140  "invalid name in named regex: empty name");
141  return true;
142  }
143 
144  // Verify that the name/expression is well formed. FileCheck currently
145  // supports @LINE, @LINE+number, @LINE-number expressions. The check here
146  // is relaxed, more strict check is performed in \c EvaluateExpression.
147  bool IsExpression = false;
148  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
149  if (i == 0) {
150  if (Name[i] == '$') // Global vars start with '$'
151  continue;
152  if (Name[i] == '@') {
153  if (NameEnd != StringRef::npos) {
156  "invalid name in named regex definition");
157  return true;
158  }
159  IsExpression = true;
160  continue;
161  }
162  }
163  if (Name[i] != '_' && !isalnum(Name[i]) &&
164  (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
165  SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
166  SourceMgr::DK_Error, "invalid name in named regex");
167  return true;
168  }
169  }
170 
171  // Name can't start with a digit.
172  if (isdigit(static_cast<unsigned char>(Name[0]))) {
174  "invalid name in named regex");
175  return true;
176  }
177 
178  // Handle [[foo]].
179  if (NameEnd == StringRef::npos) {
180  // Handle variables that were defined earlier on the same line by
181  // emitting a backreference.
182  if (VariableDefs.find(Name) != VariableDefs.end()) {
183  unsigned VarParenNum = VariableDefs[Name];
184  if (VarParenNum < 1 || VarParenNum > 9) {
187  "Can't back-reference more than 9 variables");
188  return true;
189  }
190  AddBackrefToRegEx(VarParenNum);
191  } else {
192  VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
193  }
194  continue;
195  }
196 
197  // Handle [[foo:.*]].
198  VariableDefs[Name] = CurParen;
199  RegExStr += '(';
200  ++CurParen;
201 
202  if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
203  return true;
204 
205  RegExStr += ')';
206  }
207 
208  // Handle fixed string matches.
209  // Find the end, which is the start of the next regex.
210  size_t FixedMatchEnd = PatternStr.find("{{");
211  FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
212  RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
213  PatternStr = PatternStr.substr(FixedMatchEnd);
214  }
215 
216  if (MatchFullLinesHere) {
217  if (!Req.NoCanonicalizeWhiteSpace)
218  RegExStr += " *";
219  RegExStr += '$';
220  }
221 
222  return false;
223 }
224 
225 bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
226  Regex R(RS);
227  std::string Error;
228  if (!R.isValid(Error)) {
230  "invalid regex: " + Error);
231  return true;
232  }
233 
234  RegExStr += RS.str();
235  CurParen += R.getNumMatches();
236  return false;
237 }
238 
239 void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
240  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
241  std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
242  RegExStr += Backref;
243 }
244 
245 /// Evaluates expression and stores the result to \p Value.
246 ///
247 /// Returns true on success and false when the expression has invalid syntax.
248 bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
249  // The only supported expression is @LINE([\+-]\d+)?
250  if (!Expr.startswith("@LINE"))
251  return false;
252  Expr = Expr.substr(StringRef("@LINE").size());
253  int Offset = 0;
254  if (!Expr.empty()) {
255  if (Expr[0] == '+')
256  Expr = Expr.substr(1);
257  else if (Expr[0] != '-')
258  return false;
259  if (Expr.getAsInteger(10, Offset))
260  return false;
261  }
262  Value = llvm::itostr(LineNumber + Offset);
263  return true;
264 }
265 
266 /// Matches the pattern string against the input buffer \p Buffer
267 ///
268 /// This returns the position that is matched or npos if there is no match. If
269 /// there is a match, the size of the matched string is returned in \p
270 /// MatchLen.
271 ///
272 /// The \p VariableTable StringMap provides the current values of filecheck
273 /// variables and is updated if this match defines new values.
274 size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
275  StringMap<StringRef> &VariableTable) const {
276  // If this is the EOF pattern, match it immediately.
277  if (CheckTy == Check::CheckEOF) {
278  MatchLen = 0;
279  return Buffer.size();
280  }
281 
282  // If this is a fixed string pattern, just match it now.
283  if (!FixedStr.empty()) {
284  MatchLen = FixedStr.size();
285  return Buffer.find(FixedStr);
286  }
287 
288  // Regex match.
289 
290  // If there are variable uses, we need to create a temporary string with the
291  // actual value.
292  StringRef RegExToMatch = RegExStr;
293  std::string TmpStr;
294  if (!VariableUses.empty()) {
295  TmpStr = RegExStr;
296 
297  unsigned InsertOffset = 0;
298  for (const auto &VariableUse : VariableUses) {
299  std::string Value;
300 
301  if (VariableUse.first[0] == '@') {
302  if (!EvaluateExpression(VariableUse.first, Value))
303  return StringRef::npos;
304  } else {
306  VariableTable.find(VariableUse.first);
307  // If the variable is undefined, return an error.
308  if (it == VariableTable.end())
309  return StringRef::npos;
310 
311  // Look up the value and escape it so that we can put it into the regex.
312  Value += Regex::escape(it->second);
313  }
314 
315  // Plop it into the regex at the adjusted offset.
316  TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
317  Value.begin(), Value.end());
318  InsertOffset += Value.size();
319  }
320 
321  // Match the newly constructed regex.
322  RegExToMatch = TmpStr;
323  }
324 
325  SmallVector<StringRef, 4> MatchInfo;
326  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
327  return StringRef::npos;
328 
329  // Successful regex match.
330  assert(!MatchInfo.empty() && "Didn't get any match");
331  StringRef FullMatch = MatchInfo[0];
332 
333  // If this defines any variables, remember their values.
334  for (const auto &VariableDef : VariableDefs) {
335  assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
336  VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
337  }
338 
339  // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
340  // the required preceding newline, which is consumed by the pattern in the
341  // case of CHECK-EMPTY but not CHECK-NEXT.
342  size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
343  MatchLen = FullMatch.size() - MatchStartSkip;
344  return FullMatch.data() - Buffer.data() + MatchStartSkip;
345 }
346 
347 
348 /// Computes an arbitrary estimate for the quality of matching this pattern at
349 /// the start of \p Buffer; a distance of zero should correspond to a perfect
350 /// match.
351 unsigned
352 FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
353  const StringMap<StringRef> &VariableTable) const {
354  // Just compute the number of matching characters. For regular expressions, we
355  // just compare against the regex itself and hope for the best.
356  //
357  // FIXME: One easy improvement here is have the regex lib generate a single
358  // example regular expression which matches, and use that as the example
359  // string.
360  StringRef ExampleString(FixedStr);
361  if (ExampleString.empty())
362  ExampleString = RegExStr;
363 
364  // Only compare up to the first line in the buffer, or the string size.
365  StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
366  BufferPrefix = BufferPrefix.split('\n').first;
367  return BufferPrefix.edit_distance(ExampleString);
368 }
369 
371  const StringMap<StringRef> &VariableTable,
372  SMRange MatchRange) const {
373  // If this was a regular expression using variables, print the current
374  // variable values.
375  if (!VariableUses.empty()) {
376  for (const auto &VariableUse : VariableUses) {
377  SmallString<256> Msg;
378  raw_svector_ostream OS(Msg);
379  StringRef Var = VariableUse.first;
380  if (Var[0] == '@') {
381  std::string Value;
382  if (EvaluateExpression(Var, Value)) {
383  OS << "with expression \"";
384  OS.write_escaped(Var) << "\" equal to \"";
385  OS.write_escaped(Value) << "\"";
386  } else {
387  OS << "uses incorrect expression \"";
388  OS.write_escaped(Var) << "\"";
389  }
390  } else {
391  StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
392 
393  // Check for undefined variable references.
394  if (it == VariableTable.end()) {
395  OS << "uses undefined variable \"";
396  OS.write_escaped(Var) << "\"";
397  } else {
398  OS << "with variable \"";
399  OS.write_escaped(Var) << "\" equal to \"";
400  OS.write_escaped(it->second) << "\"";
401  }
402  }
403 
404  if (MatchRange.isValid())
405  SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
406  {MatchRange});
407  else
409  SourceMgr::DK_Note, OS.str());
410  }
411  }
412 }
413 
415  const SourceMgr &SM, SMLoc Loc,
416  Check::FileCheckType CheckTy,
417  StringRef Buffer, size_t Pos, size_t Len,
418  std::vector<FileCheckDiag> *Diags,
419  bool AdjustPrevDiag = false) {
420  SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
421  SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
422  SMRange Range(Start, End);
423  if (Diags) {
424  if (AdjustPrevDiag)
425  Diags->rbegin()->MatchTy = MatchTy;
426  else
427  Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
428  }
429  return Range;
430 }
431 
433  const SourceMgr &SM, StringRef Buffer,
434  const StringMap<StringRef> &VariableTable,
435  std::vector<FileCheckDiag> *Diags) const {
436  // Attempt to find the closest/best fuzzy match. Usually an error happens
437  // because some string in the output didn't exactly match. In these cases, we
438  // would like to show the user a best guess at what "should have" matched, to
439  // save them having to actually check the input manually.
440  size_t NumLinesForward = 0;
441  size_t Best = StringRef::npos;
442  double BestQuality = 0;
443 
444  // Use an arbitrary 4k limit on how far we will search.
445  for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
446  if (Buffer[i] == '\n')
447  ++NumLinesForward;
448 
449  // Patterns have leading whitespace stripped, so skip whitespace when
450  // looking for something which looks like a pattern.
451  if (Buffer[i] == ' ' || Buffer[i] == '\t')
452  continue;
453 
454  // Compute the "quality" of this match as an arbitrary combination of the
455  // match distance and the number of lines skipped to get to this match.
456  unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
457  double Quality = Distance + (NumLinesForward / 100.);
458 
459  if (Quality < BestQuality || Best == StringRef::npos) {
460  Best = i;
461  BestQuality = Quality;
462  }
463  }
464 
465  // Print the "possible intended match here" line if we found something
466  // reasonable and not equal to what we showed in the "scanning from here"
467  // line.
468  if (Best && Best != StringRef::npos && BestQuality < 50) {
469  SMRange MatchRange =
471  getCheckTy(), Buffer, Best, 0, Diags);
472  SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
473  "possible intended match here");
474 
475  // FIXME: If we wanted to be really friendly we would show why the match
476  // failed, as it can be hard to spot simple one character differences.
477  }
478 }
479 
480 /// Finds the closing sequence of a regex variable usage or definition.
481 ///
482 /// \p Str has to point in the beginning of the definition (right after the
483 /// opening sequence). Returns the offset of the closing sequence within Str,
484 /// or npos if it was not found.
485 size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
486  // Offset keeps track of the current offset within the input Str
487  size_t Offset = 0;
488  // [...] Nesting depth
489  size_t BracketDepth = 0;
490 
491  while (!Str.empty()) {
492  if (Str.startswith("]]") && BracketDepth == 0)
493  return Offset;
494  if (Str[0] == '\\') {
495  // Backslash escapes the next char within regexes, so skip them both.
496  Str = Str.substr(2);
497  Offset += 2;
498  } else {
499  switch (Str[0]) {
500  default:
501  break;
502  case '[':
503  BracketDepth++;
504  break;
505  case ']':
506  if (BracketDepth == 0) {
509  "missing closing \"]\" for regex variable");
510  exit(1);
511  }
512  BracketDepth--;
513  break;
514  }
515  Str = Str.substr(1);
516  Offset++;
517  }
518  }
519 
520  return StringRef::npos;
521 }
522 
523 /// Canonicalize whitespaces in the file. Line endings are replaced with
524 /// UNIX-style '\n'.
525 StringRef
527  SmallVectorImpl<char> &OutputBuffer) {
528  OutputBuffer.reserve(MB.getBufferSize());
529 
530  for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
531  Ptr != End; ++Ptr) {
532  // Eliminate trailing dosish \r.
533  if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
534  continue;
535  }
536 
537  // If current char is not a horizontal whitespace or if horizontal
538  // whitespace canonicalization is disabled, dump it to output as is.
539  if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
540  OutputBuffer.push_back(*Ptr);
541  continue;
542  }
543 
544  // Otherwise, add one space and advance over neighboring space.
545  OutputBuffer.push_back(' ');
546  while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
547  ++Ptr;
548  }
549 
550  // Add a null byte and then return all but that byte.
551  OutputBuffer.push_back('\0');
552  return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
553 }
554 
556  const Check::FileCheckType &CheckTy,
557  SMLoc CheckLoc, MatchType MatchTy,
558  SMRange InputRange)
559  : CheckTy(CheckTy), MatchTy(MatchTy) {
560  auto Start = SM.getLineAndColumn(InputRange.Start);
561  auto End = SM.getLineAndColumn(InputRange.End);
562  InputStartLine = Start.first;
563  InputStartCol = Start.second;
564  InputEndLine = End.first;
565  InputEndCol = End.second;
566  Start = SM.getLineAndColumn(CheckLoc);
567  CheckLine = Start.first;
568  CheckCol = Start.second;
569 }
570 
571 static bool IsPartOfWord(char c) {
572  return (isalnum(c) || c == '-' || c == '_');
573 }
574 
576  assert(Count > 0 && "zero and negative counts are not supported");
577  assert((C == 1 || Kind == CheckPlain) &&
578  "count supported only for plain CHECK directives");
579  Count = C;
580  return *this;
581 }
582 
583 // Get a description of the type.
585  switch (Kind) {
586  case Check::CheckNone:
587  return "invalid";
588  case Check::CheckPlain:
589  if (Count > 1)
590  return Prefix.str() + "-COUNT";
591  return Prefix;
592  case Check::CheckNext:
593  return Prefix.str() + "-NEXT";
594  case Check::CheckSame:
595  return Prefix.str() + "-SAME";
596  case Check::CheckNot:
597  return Prefix.str() + "-NOT";
598  case Check::CheckDAG:
599  return Prefix.str() + "-DAG";
600  case Check::CheckLabel:
601  return Prefix.str() + "-LABEL";
602  case Check::CheckEmpty:
603  return Prefix.str() + "-EMPTY";
604  case Check::CheckEOF:
605  return "implicit EOF";
606  case Check::CheckBadNot:
607  return "bad NOT";
609  return "bad COUNT";
610  }
611  llvm_unreachable("unknown FileCheckType");
612 }
613 
614 static std::pair<Check::FileCheckType, StringRef>
616  if (Buffer.size() <= Prefix.size())
617  return {Check::CheckNone, StringRef()};
618 
619  char NextChar = Buffer[Prefix.size()];
620 
621  StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
622  // Verify that the : is present after the prefix.
623  if (NextChar == ':')
624  return {Check::CheckPlain, Rest};
625 
626  if (NextChar != '-')
627  return {Check::CheckNone, StringRef()};
628 
629  if (Rest.consume_front("COUNT-")) {
630  int64_t Count;
631  if (Rest.consumeInteger(10, Count))
632  // Error happened in parsing integer.
633  return {Check::CheckBadCount, Rest};
634  if (Count <= 0 || Count > INT32_MAX)
635  return {Check::CheckBadCount, Rest};
636  if (!Rest.consume_front(":"))
637  return {Check::CheckBadCount, Rest};
638  return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
639  }
640 
641  if (Rest.consume_front("NEXT:"))
642  return {Check::CheckNext, Rest};
643 
644  if (Rest.consume_front("SAME:"))
645  return {Check::CheckSame, Rest};
646 
647  if (Rest.consume_front("NOT:"))
648  return {Check::CheckNot, Rest};
649 
650  if (Rest.consume_front("DAG:"))
651  return {Check::CheckDAG, Rest};
652 
653  if (Rest.consume_front("LABEL:"))
654  return {Check::CheckLabel, Rest};
655 
656  if (Rest.consume_front("EMPTY:"))
657  return {Check::CheckEmpty, Rest};
658 
659  // You can't combine -NOT with another suffix.
660  if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
661  Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
662  Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
663  Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
664  return {Check::CheckBadNot, Rest};
665 
666  return {Check::CheckNone, Rest};
667 }
668 
669 // From the given position, find the next character after the word.
670 static size_t SkipWord(StringRef Str, size_t Loc) {
671  while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
672  ++Loc;
673  return Loc;
674 }
675 
676 /// Search the buffer for the first prefix in the prefix regular expression.
677 ///
678 /// This searches the buffer using the provided regular expression, however it
679 /// enforces constraints beyond that:
680 /// 1) The found prefix must not be a suffix of something that looks like
681 /// a valid prefix.
682 /// 2) The found prefix must be followed by a valid check type suffix using \c
683 /// FindCheckType above.
684 ///
685 /// Returns a pair of StringRefs into the Buffer, which combines:
686 /// - the first match of the regular expression to satisfy these two is
687 /// returned,
688 /// otherwise an empty StringRef is returned to indicate failure.
689 /// - buffer rewound to the location right after parsed suffix, for parsing
690 /// to continue from
691 ///
692 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
693 /// start at the beginning of the returned prefix, increment \p LineNumber for
694 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
695 /// check found by examining the suffix.
696 ///
697 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
698 /// is unspecified.
699 static std::pair<StringRef, StringRef>
701  unsigned &LineNumber, Check::FileCheckType &CheckTy) {
703 
704  while (!Buffer.empty()) {
705  // Find the first (longest) match using the RE.
706  if (!PrefixRE.match(Buffer, &Matches))
707  // No match at all, bail.
708  return {StringRef(), StringRef()};
709 
710  StringRef Prefix = Matches[0];
711  Matches.clear();
712 
713  assert(Prefix.data() >= Buffer.data() &&
714  Prefix.data() < Buffer.data() + Buffer.size() &&
715  "Prefix doesn't start inside of buffer!");
716  size_t Loc = Prefix.data() - Buffer.data();
717  StringRef Skipped = Buffer.substr(0, Loc);
718  Buffer = Buffer.drop_front(Loc);
719  LineNumber += Skipped.count('\n');
720 
721  // Check that the matched prefix isn't a suffix of some other check-like
722  // word.
723  // FIXME: This is a very ad-hoc check. it would be better handled in some
724  // other way. Among other things it seems hard to distinguish between
725  // intentional and unintentional uses of this feature.
726  if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
727  // Now extract the type.
728  StringRef AfterSuffix;
729  std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
730 
731  // If we've found a valid check type for this prefix, we're done.
732  if (CheckTy != Check::CheckNone)
733  return {Prefix, AfterSuffix};
734  }
735 
736  // If we didn't successfully find a prefix, we need to skip this invalid
737  // prefix and continue scanning. We directly skip the prefix that was
738  // matched and any additional parts of that check-like word.
739  Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
740  }
741 
742  // We ran out of buffer while skipping partial matches so give up.
743  return {StringRef(), StringRef()};
744 }
745 
746 /// Read the check file, which specifies the sequence of expected strings.
747 ///
748 /// The strings are added to the CheckStrings vector. Returns true in case of
749 /// an error, false otherwise.
751  Regex &PrefixRE,
752  std::vector<FileCheckString> &CheckStrings) {
753  std::vector<FileCheckPattern> ImplicitNegativeChecks;
754  for (const auto &PatternString : Req.ImplicitCheckNot) {
755  // Create a buffer with fake command line content in order to display the
756  // command line option responsible for the specific implicit CHECK-NOT.
757  std::string Prefix = "-implicit-check-not='";
758  std::string Suffix = "'";
759  std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
760  Prefix + PatternString + Suffix, "command line");
761 
762  StringRef PatternInBuffer =
763  CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
764  SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
765 
766  ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
767  ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
768  "IMPLICIT-CHECK", SM, 0, Req);
769  }
770 
771  std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
772 
773  // LineNumber keeps track of the line on which CheckPrefix instances are
774  // found.
775  unsigned LineNumber = 1;
776 
777  while (1) {
779 
780  // See if a prefix occurs in the memory buffer.
781  StringRef UsedPrefix;
782  StringRef AfterSuffix;
783  std::tie(UsedPrefix, AfterSuffix) =
784  FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
785  if (UsedPrefix.empty())
786  break;
787  assert(UsedPrefix.data() == Buffer.data() &&
788  "Failed to move Buffer's start forward, or pointed prefix outside "
789  "of the buffer!");
790  assert(AfterSuffix.data() >= Buffer.data() &&
791  AfterSuffix.data() < Buffer.data() + Buffer.size() &&
792  "Parsing after suffix doesn't start inside of buffer!");
793 
794  // Location to use for error messages.
795  const char *UsedPrefixStart = UsedPrefix.data();
796 
797  // Skip the buffer to the end of parsed suffix (or just prefix, if no good
798  // suffix was processed).
799  Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
800  : AfterSuffix;
801 
802  // Complain about useful-looking but unsupported suffixes.
803  if (CheckTy == Check::CheckBadNot) {
805  "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
806  return true;
807  }
808 
809  // Complain about invalid count specification.
810  if (CheckTy == Check::CheckBadCount) {
812  "invalid count in -COUNT specification on prefix '" +
813  UsedPrefix + "'");
814  return true;
815  }
816 
817  // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
818  // leading whitespace.
819  if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
820  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
821 
822  // Scan ahead to the end of line.
823  size_t EOL = Buffer.find_first_of("\n\r");
824 
825  // Remember the location of the start of the pattern, for diagnostics.
826  SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
827 
828  // Parse the pattern.
829  FileCheckPattern P(CheckTy);
830  if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
831  return true;
832 
833  // Verify that CHECK-LABEL lines do not define or use variables
834  if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
835  SM.PrintMessage(
836  SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
837  "found '" + UsedPrefix + "-LABEL:'"
838  " with variable definition or use");
839  return true;
840  }
841 
842  Buffer = Buffer.substr(EOL);
843 
844  // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
845  if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
846  CheckTy == Check::CheckEmpty) &&
847  CheckStrings.empty()) {
848  StringRef Type = CheckTy == Check::CheckNext
849  ? "NEXT"
850  : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
851  SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
853  "found '" + UsedPrefix + "-" + Type +
854  "' without previous '" + UsedPrefix + ": line");
855  return true;
856  }
857 
858  // Handle CHECK-DAG/-NOT.
859  if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
860  DagNotMatches.push_back(P);
861  continue;
862  }
863 
864  // Okay, add the string we captured to the output vector and move on.
865  CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
866  std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
867  DagNotMatches = ImplicitNegativeChecks;
868  }
869 
870  // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
871  // prefix as a filler for the error message.
872  if (!DagNotMatches.empty()) {
873  CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
874  SMLoc::getFromPointer(Buffer.data()));
875  std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
876  }
877 
878  if (CheckStrings.empty()) {
879  errs() << "error: no check strings found with prefix"
880  << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
881  auto I = Req.CheckPrefixes.begin();
882  auto E = Req.CheckPrefixes.end();
883  if (I != E) {
884  errs() << "\'" << *I << ":'";
885  ++I;
886  }
887  for (; I != E; ++I)
888  errs() << ", \'" << *I << ":'";
889 
890  errs() << '\n';
891  return true;
892  }
893 
894  return false;
895 }
896 
897 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
898  StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
899  int MatchedCount, StringRef Buffer,
900  StringMap<StringRef> &VariableTable, size_t MatchPos,
901  size_t MatchLen, const FileCheckRequest &Req,
902  std::vector<FileCheckDiag> *Diags) {
903  bool PrintDiag = true;
904  if (ExpectedMatch) {
905  if (!Req.Verbose)
906  return;
907  if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
908  return;
909  // Due to their verbosity, we don't print verbose diagnostics here if we're
910  // gathering them for a different rendering, but we always print other
911  // diagnostics.
912  PrintDiag = !Diags;
913  }
914  SMRange MatchRange = ProcessMatchResult(
917  SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
918  if (!PrintDiag)
919  return;
920 
921  std::string Message = formatv("{0}: {1} string found in input",
922  Pat.getCheckTy().getDescription(Prefix),
923  (ExpectedMatch ? "expected" : "excluded"))
924  .str();
925  if (Pat.getCount() > 1)
926  Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
927 
928  SM.PrintMessage(
929  Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
930  SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
931  {MatchRange});
932  Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
933 }
934 
935 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
936  const FileCheckString &CheckStr, int MatchedCount,
937  StringRef Buffer, StringMap<StringRef> &VariableTable,
938  size_t MatchPos, size_t MatchLen, FileCheckRequest &Req,
939  std::vector<FileCheckDiag> *Diags) {
940  PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
941  MatchedCount, Buffer, VariableTable, MatchPos, MatchLen, Req,
942  Diags);
943 }
944 
945 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
946  StringRef Prefix, SMLoc Loc,
947  const FileCheckPattern &Pat, int MatchedCount,
948  StringRef Buffer, StringMap<StringRef> &VariableTable,
949  bool VerboseVerbose,
950  std::vector<FileCheckDiag> *Diags) {
951  bool PrintDiag = true;
952  if (!ExpectedMatch) {
953  if (!VerboseVerbose)
954  return;
955  // Due to their verbosity, we don't print verbose diagnostics here if we're
956  // gathering them for a different rendering, but we always print other
957  // diagnostics.
958  PrintDiag = !Diags;
959  }
960 
961  // If the current position is at the end of a line, advance to the start of
962  // the next line.
963  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
964  SMRange SearchRange = ProcessMatchResult(
967  SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
968  if (!PrintDiag)
969  return;
970 
971  // Print "not found" diagnostic.
972  std::string Message = formatv("{0}: {1} string not found in input",
973  Pat.getCheckTy().getDescription(Prefix),
974  (ExpectedMatch ? "expected" : "excluded"))
975  .str();
976  if (Pat.getCount() > 1)
977  Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
978  SM.PrintMessage(
979  Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
980 
981  // Print the "scanning from here" line.
982  SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
983 
984  // Allow the pattern to print additional information if desired.
985  Pat.PrintVariableUses(SM, Buffer, VariableTable);
986 
987  if (ExpectedMatch)
988  Pat.PrintFuzzyMatch(SM, Buffer, VariableTable, Diags);
989 }
990 
991 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
992  const FileCheckString &CheckStr, int MatchedCount,
993  StringRef Buffer, StringMap<StringRef> &VariableTable,
994  bool VerboseVerbose,
995  std::vector<FileCheckDiag> *Diags) {
996  PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
997  MatchedCount, Buffer, VariableTable, VerboseVerbose, Diags);
998 }
999 
1000 /// Count the number of newlines in the specified range.
1001 static unsigned CountNumNewlinesBetween(StringRef Range,
1002  const char *&FirstNewLine) {
1003  unsigned NumNewLines = 0;
1004  while (1) {
1005  // Scan for newline.
1006  Range = Range.substr(Range.find_first_of("\n\r"));
1007  if (Range.empty())
1008  return NumNewLines;
1009 
1010  ++NumNewLines;
1011 
1012  // Handle \n\r and \r\n as a single newline.
1013  if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1014  (Range[0] != Range[1]))
1015  Range = Range.substr(1);
1016  Range = Range.substr(1);
1017 
1018  if (NumNewLines == 1)
1019  FirstNewLine = Range.begin();
1020  }
1021 }
1022 
1023 /// Match check string and its "not strings" and/or "dag strings".
1024 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
1025  bool IsLabelScanMode, size_t &MatchLen,
1026  StringMap<StringRef> &VariableTable,
1027  FileCheckRequest &Req,
1028  std::vector<FileCheckDiag> *Diags) const {
1029  size_t LastPos = 0;
1030  std::vector<const FileCheckPattern *> NotStrings;
1031 
1032  // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1033  // bounds; we have not processed variable definitions within the bounded block
1034  // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1035  // over the block again (including the last CHECK-LABEL) in normal mode.
1036  if (!IsLabelScanMode) {
1037  // Match "dag strings" (with mixed "not strings" if any).
1038  LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req, Diags);
1039  if (LastPos == StringRef::npos)
1040  return StringRef::npos;
1041  }
1042 
1043  // Match itself from the last position after matching CHECK-DAG.
1044  size_t LastMatchEnd = LastPos;
1045  size_t FirstMatchPos = 0;
1046  // Go match the pattern Count times. Majority of patterns only match with
1047  // count 1 though.
1048  assert(Pat.getCount() != 0 && "pattern count can not be zero");
1049  for (int i = 1; i <= Pat.getCount(); i++) {
1050  StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
1051  size_t CurrentMatchLen;
1052  // get a match at current start point
1053  size_t MatchPos = Pat.Match(MatchBuffer, CurrentMatchLen, VariableTable);
1054  if (i == 1)
1055  FirstMatchPos = LastPos + MatchPos;
1056 
1057  // report
1058  if (MatchPos == StringRef::npos) {
1059  PrintNoMatch(true, SM, *this, i, MatchBuffer, VariableTable,
1060  Req.VerboseVerbose, Diags);
1061  return StringRef::npos;
1062  }
1063  PrintMatch(true, SM, *this, i, MatchBuffer, VariableTable, MatchPos,
1064  CurrentMatchLen, Req, Diags);
1065 
1066  // move start point after the match
1067  LastMatchEnd += MatchPos + CurrentMatchLen;
1068  }
1069  // Full match len counts from first match pos.
1070  MatchLen = LastMatchEnd - FirstMatchPos;
1071 
1072  // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1073  // or CHECK-NOT
1074  if (!IsLabelScanMode) {
1075  size_t MatchPos = FirstMatchPos - LastPos;
1076  StringRef MatchBuffer = Buffer.substr(LastPos);
1077  StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1078 
1079  // If this check is a "CHECK-NEXT", verify that the previous match was on
1080  // the previous line (i.e. that there is one newline between them).
1081  if (CheckNext(SM, SkippedRegion)) {
1083  Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1084  Diags, Req.Verbose);
1085  return StringRef::npos;
1086  }
1087 
1088  // If this check is a "CHECK-SAME", verify that the previous match was on
1089  // the same line (i.e. that there is no newline between them).
1090  if (CheckSame(SM, SkippedRegion)) {
1092  Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1093  Diags, Req.Verbose);
1094  return StringRef::npos;
1095  }
1096 
1097  // If this match had "not strings", verify that they don't exist in the
1098  // skipped region.
1099  if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
1100  return StringRef::npos;
1101  }
1102 
1103  return FirstMatchPos;
1104 }
1105 
1106 /// Verify there is a single line in the given buffer.
1107 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1108  if (Pat.getCheckTy() != Check::CheckNext &&
1109  Pat.getCheckTy() != Check::CheckEmpty)
1110  return false;
1111 
1112  Twine CheckName =
1113  Prefix +
1114  Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1115 
1116  // Count the number of newlines between the previous match and this one.
1117  const char *FirstNewLine = nullptr;
1118  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1119 
1120  if (NumNewLines == 0) {
1122  CheckName + ": is on the same line as previous match");
1124  "'next' match was here");
1126  "previous match ended here");
1127  return true;
1128  }
1129 
1130  if (NumNewLines != 1) {
1132  CheckName +
1133  ": is not on the line after the previous match");
1135  "'next' match was here");
1137  "previous match ended here");
1139  "non-matching line after previous match is here");
1140  return true;
1141  }
1142 
1143  return false;
1144 }
1145 
1146 /// Verify there is no newline in the given buffer.
1147 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1148  if (Pat.getCheckTy() != Check::CheckSame)
1149  return false;
1150 
1151  // Count the number of newlines between the previous match and this one.
1152  const char *FirstNewLine = nullptr;
1153  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1154 
1155  if (NumNewLines != 0) {
1157  Prefix +
1158  "-SAME: is not on the same line as the previous match");
1160  "'next' match was here");
1162  "previous match ended here");
1163  return true;
1164  }
1165 
1166  return false;
1167 }
1168 
1169 /// Verify there's no "not strings" in the given buffer.
1171  const SourceMgr &SM, StringRef Buffer,
1172  const std::vector<const FileCheckPattern *> &NotStrings,
1173  StringMap<StringRef> &VariableTable, const FileCheckRequest &Req,
1174  std::vector<FileCheckDiag> *Diags) const {
1175  for (const FileCheckPattern *Pat : NotStrings) {
1176  assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1177 
1178  size_t MatchLen = 0;
1179  size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1180 
1181  if (Pos == StringRef::npos) {
1182  PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
1183  VariableTable, Req.VerboseVerbose, Diags);
1184  continue;
1185  }
1186 
1187  PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, VariableTable,
1188  Pos, MatchLen, Req, Diags);
1189 
1190  return true;
1191  }
1192 
1193  return false;
1194 }
1195 
1196 /// Match "dag strings" and their mixed "not strings".
1197 size_t
1199  std::vector<const FileCheckPattern *> &NotStrings,
1200  StringMap<StringRef> &VariableTable,
1201  const FileCheckRequest &Req,
1202  std::vector<FileCheckDiag> *Diags) const {
1203  if (DagNotStrings.empty())
1204  return 0;
1205 
1206  // The start of the search range.
1207  size_t StartPos = 0;
1208 
1209  struct MatchRange {
1210  size_t Pos;
1211  size_t End;
1212  };
1213  // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
1214  // ranges are erased from this list once they are no longer in the search
1215  // range.
1216  std::list<MatchRange> MatchRanges;
1217 
1218  // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
1219  // group, so we don't use a range-based for loop here.
1220  for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
1221  PatItr != PatEnd; ++PatItr) {
1222  const FileCheckPattern &Pat = *PatItr;
1223  assert((Pat.getCheckTy() == Check::CheckDAG ||
1224  Pat.getCheckTy() == Check::CheckNot) &&
1225  "Invalid CHECK-DAG or CHECK-NOT!");
1226 
1227  if (Pat.getCheckTy() == Check::CheckNot) {
1228  NotStrings.push_back(&Pat);
1229  continue;
1230  }
1231 
1232  assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1233 
1234  // CHECK-DAG always matches from the start.
1235  size_t MatchLen = 0, MatchPos = StartPos;
1236 
1237  // Search for a match that doesn't overlap a previous match in this
1238  // CHECK-DAG group.
1239  for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1240  StringRef MatchBuffer = Buffer.substr(MatchPos);
1241  size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1242  // With a group of CHECK-DAGs, a single mismatching means the match on
1243  // that group of CHECK-DAGs fails immediately.
1244  if (MatchPosBuf == StringRef::npos) {
1245  PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
1246  VariableTable, Req.VerboseVerbose, Diags);
1247  return StringRef::npos;
1248  }
1249  // Re-calc it as the offset relative to the start of the original string.
1250  MatchPos += MatchPosBuf;
1251  if (Req.VerboseVerbose)
1252  PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
1253  VariableTable, MatchPos, MatchLen, Req, Diags);
1254  MatchRange M{MatchPos, MatchPos + MatchLen};
1255  if (Req.AllowDeprecatedDagOverlap) {
1256  // We don't need to track all matches in this mode, so we just maintain
1257  // one match range that encompasses the current CHECK-DAG group's
1258  // matches.
1259  if (MatchRanges.empty())
1260  MatchRanges.insert(MatchRanges.end(), M);
1261  else {
1262  auto Block = MatchRanges.begin();
1263  Block->Pos = std::min(Block->Pos, M.Pos);
1264  Block->End = std::max(Block->End, M.End);
1265  }
1266  break;
1267  }
1268  // Iterate previous matches until overlapping match or insertion point.
1269  bool Overlap = false;
1270  for (; MI != ME; ++MI) {
1271  if (M.Pos < MI->End) {
1272  // !Overlap => New match has no overlap and is before this old match.
1273  // Overlap => New match overlaps this old match.
1274  Overlap = MI->Pos < M.End;
1275  break;
1276  }
1277  }
1278  if (!Overlap) {
1279  // Insert non-overlapping match into list.
1280  MatchRanges.insert(MI, M);
1281  break;
1282  }
1283  if (Req.VerboseVerbose) {
1284  // Due to their verbosity, we don't print verbose diagnostics here if
1285  // we're gathering them for a different rendering, but we always print
1286  // other diagnostics.
1287  if (!Diags) {
1288  SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1289  SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1290  SMRange OldRange(OldStart, OldEnd);
1291  SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1292  "match discarded, overlaps earlier DAG match here",
1293  {OldRange});
1294  } else
1295  Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
1296  }
1297  MatchPos = MI->End;
1298  }
1299  if (!Req.VerboseVerbose)
1300  PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, VariableTable,
1301  MatchPos, MatchLen, Req, Diags);
1302 
1303  // Handle the end of a CHECK-DAG group.
1304  if (std::next(PatItr) == PatEnd ||
1305  std::next(PatItr)->getCheckTy() == Check::CheckNot) {
1306  if (!NotStrings.empty()) {
1307  // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
1308  // CHECK-DAG, verify that there are no 'not' strings occurred in that
1309  // region.
1310  StringRef SkippedRegion =
1311  Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1312  if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
1313  return StringRef::npos;
1314  // Clear "not strings".
1315  NotStrings.clear();
1316  }
1317  // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
1318  // end of this CHECK-DAG group's match range.
1319  StartPos = MatchRanges.rbegin()->End;
1320  // Don't waste time checking for (impossible) overlaps before that.
1321  MatchRanges.clear();
1322  }
1323  }
1324 
1325  return StartPos;
1326 }
1327 
1328 // A check prefix must contain only alphanumeric, hyphens and underscores.
1329 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1330  Regex Validator("^[a-zA-Z0-9_-]*$");
1331  return Validator.match(CheckPrefix);
1332 }
1333 
1335  StringSet<> PrefixSet;
1336 
1337  for (StringRef Prefix : Req.CheckPrefixes) {
1338  // Reject empty prefixes.
1339  if (Prefix == "")
1340  return false;
1341 
1342  if (!PrefixSet.insert(Prefix).second)
1343  return false;
1344 
1345  if (!ValidateCheckPrefix(Prefix))
1346  return false;
1347  }
1348 
1349  return true;
1350 }
1351 
1352 // Combines the check prefixes into a single regex so that we can efficiently
1353 // scan for any of the set.
1354 //
1355 // The semantics are that the longest-match wins which matches our regex
1356 // library.
1358  // I don't think there's a way to specify an initial value for cl::list,
1359  // so if nothing was specified, add the default
1360  if (Req.CheckPrefixes.empty())
1361  Req.CheckPrefixes.push_back("CHECK");
1362 
1363  // We already validated the contents of CheckPrefixes so just concatenate
1364  // them as alternatives.
1365  SmallString<32> PrefixRegexStr;
1366  for (StringRef Prefix : Req.CheckPrefixes) {
1367  if (Prefix != Req.CheckPrefixes.front())
1368  PrefixRegexStr.push_back('|');
1369 
1370  PrefixRegexStr.append(Prefix);
1371  }
1372 
1373  return Regex(PrefixRegexStr);
1374 }
1375 
1376 // Remove local variables from \p VariableTable. Global variables
1377 // (start with '$') are preserved.
1378 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1379  SmallVector<StringRef, 16> LocalVars;
1380  for (const auto &Var : VariableTable)
1381  if (Var.first()[0] != '$')
1382  LocalVars.push_back(Var.first());
1383 
1384  for (const auto &Var : LocalVars)
1385  VariableTable.erase(Var);
1386 }
1387 
1388 /// Check the input to FileCheck provided in the \p Buffer against the \p
1389 /// CheckStrings read from the check file.
1390 ///
1391 /// Returns false if the input fails to satisfy the checks.
1393  ArrayRef<FileCheckString> CheckStrings,
1394  std::vector<FileCheckDiag> *Diags) {
1395  bool ChecksFailed = false;
1396 
1397  /// VariableTable - This holds all the current filecheck variables.
1398  StringMap<StringRef> VariableTable;
1399 
1400  for (const auto& Def : Req.GlobalDefines)
1401  VariableTable.insert(StringRef(Def).split('='));
1402 
1403  unsigned i = 0, j = 0, e = CheckStrings.size();
1404  while (true) {
1405  StringRef CheckRegion;
1406  if (j == e) {
1407  CheckRegion = Buffer;
1408  } else {
1409  const FileCheckString &CheckLabelStr = CheckStrings[j];
1410  if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1411  ++j;
1412  continue;
1413  }
1414 
1415  // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1416  size_t MatchLabelLen = 0;
1417  size_t MatchLabelPos = CheckLabelStr.Check(
1418  SM, Buffer, true, MatchLabelLen, VariableTable, Req, Diags);
1419  if (MatchLabelPos == StringRef::npos)
1420  // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1421  return false;
1422 
1423  CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1424  Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1425  ++j;
1426  }
1427 
1428  if (Req.EnableVarScope)
1429  ClearLocalVars(VariableTable);
1430 
1431  for (; i != j; ++i) {
1432  const FileCheckString &CheckStr = CheckStrings[i];
1433 
1434  // Check each string within the scanned region, including a second check
1435  // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1436  size_t MatchLen = 0;
1437  size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1438  VariableTable, Req, Diags);
1439 
1440  if (MatchPos == StringRef::npos) {
1441  ChecksFailed = true;
1442  i = j;
1443  break;
1444  }
1445 
1446  CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1447  }
1448 
1449  if (j == e)
1450  break;
1451  }
1452 
1453  // Success if no checks failed.
1454  return !ChecksFailed;
1455 }
uint64_t CallInst * C
Represents a range in source code.
Definition: SMLoc.h:48
Indicates a good match for an expected pattern.
Definition: FileCheck.h:175
bool ValidateCheckPrefixes()
Definition: FileCheck.cpp:1334
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Indicates no match for an excluded pattern.
Definition: FileCheck.h:184
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:218
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Indicates a discarded match for an expected pattern.
Definition: FileCheck.h:182
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer, const StringMap< StringRef > &VariableTable, std::vector< FileCheckDiag > *Diags) const
Definition: FileCheck.cpp:432
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
void push_back(const T &Elt)
Definition: SmallVector.h:211
bool CheckSame(const SourceMgr &SM, StringRef Buffer) const
Verify there is no newline in the given buffer.
Definition: FileCheck.cpp:1147
static std::string escape(StringRef String)
Turn String into a regex by escaping its special characters.
Definition: Regex.cpp:200
iterator find(StringRef Key)
Definition: StringMap.h:332
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:509
auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object< decltype(std::make_tuple(detail::build_format_adapter(std::forward< Ts >(Vals))...))>
std::string getDescription(StringRef Prefix) const
Definition: FileCheck.cpp:584
static std::pair< StringRef, StringRef > split(StringRef Str, char Separator)
Checked version of split, to ensure mandatory subparts.
Definition: DataLayout.cpp:203
Marks when parsing found a -NOT check combined with another CHECK suffix.
Definition: FileCheck.h:61
size_t getBufferSize() const
Definition: MemoryBuffer.h:61
void reserve(size_type N)
Definition: SmallVector.h:369
std::pair< unsigned, unsigned > getLineAndColumn(SMLoc Loc, unsigned BufferID=0) const
Find the line and column number for the specified location in the specified file. ...
Definition: SourceMgr.cpp:130
LLVM_NODISCARD unsigned edit_distance(StringRef Other, bool AllowReplacements=true, unsigned MaxEditDistance=0) const
Determine the edit distance between this string and another string.
Definition: StringRef.cpp:94
bool CheckNext(const SourceMgr &SM, StringRef Buffer) const
Verify there is a single line in the given buffer.
Definition: FileCheck.cpp:1107
SMLoc Start
Definition: SMLoc.h:50
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl< std::pair< SDValue, SDNode *>> &RecordedNodes)
CheckSame - Implements OP_CheckSame.
StringRef Prefix
Which prefix name this check matched.
Definition: FileCheck.h:213
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
FileCheckPattern Pat
The pattern to match.
Definition: FileCheck.h:210
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:247
bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, unsigned LineNumber, const FileCheckRequest &Req)
Parses the given string into the Pattern.
Definition: FileCheck.cpp:33
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
LLVM_NODISCARD StringRef drop_front(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the first N elements dropped.
Definition: StringRef.h:620
LLVM_NODISCARD size_t count(char C) const
Return the number of occurrences of C in the string.
Definition: StringRef.h:457
SMLoc Loc
The location in the match file that the check string was specified.
Definition: FileCheck.h:216
Check::FileCheckType CheckTy
What is the FileCheck directive for this diagnostic?
Definition: FileCheck.h:156
Indicates a match for an expected pattern, but the match is on the wrong line.
Definition: FileCheck.h:180
Compile for newline-sensitive matching.
Definition: Regex.h:38
LLVM_NODISCARD StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:679
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Marks when parsing found a -COUNT directive with invalid count value.
Definition: FileCheck.h:64
bool CheckNot(const SourceMgr &SM, StringRef Buffer, const std::vector< const FileCheckPattern *> &NotStrings, StringMap< StringRef > &VariableTable, const FileCheckRequest &Req, std::vector< FileCheckDiag > *Diags) const
Verify there&#39;s no "not strings" in the given buffer.
Definition: FileCheck.cpp:1170
std::string itostr(int64_t X)
Definition: StringExtras.h:238
static std::pair< Check::FileCheckType, StringRef > FindCheckType(StringRef Buffer, StringRef Prefix)
Definition: FileCheck.cpp:615
static unsigned CountNumNewlinesBetween(StringRef Range, const char *&FirstNewLine)
Count the number of newlines in the specified range.
Definition: FileCheck.cpp:1001
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
Definition: SourceMgr.h:151
StringRef CanonicalizeFile(MemoryBuffer &MB, SmallVectorImpl< char > &OutputBuffer)
Canonicalize whitespaces in the file.
Definition: FileCheck.cpp:526
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Contains info about various FileCheck options.
Definition: FileCheck.h:26
static bool ValidateCheckPrefix(StringRef CheckPrefix)
Definition: FileCheck.cpp:1329
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat, int MatchedCount, StringRef Buffer, StringMap< StringRef > &VariableTable, bool VerboseVerbose, std::vector< FileCheckDiag > *Diags)
Definition: FileCheck.cpp:945
void PrintVariableUses(const SourceMgr &SM, StringRef Buffer, const StringMap< StringRef > &VariableTable, SMRange MatchRange=None) const
Definition: FileCheck.cpp:370
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
unsigned InputEndLine
Definition: FileCheck.h:197
#define P(N)
A check that we found in the input file.
Definition: FileCheck.h:208
size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, size_t &MatchLen, StringMap< StringRef > &VariableTable, FileCheckRequest &Req, std::vector< FileCheckDiag > *Diags) const
Match check string and its "not strings" and/or "dag strings".
Definition: FileCheck.cpp:1024
FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy, SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange)
Definition: FileCheck.cpp:555
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, std::vector< FileCheckString > &CheckStrings)
Read the check file, which specifies the sequence of expected strings.
Definition: FileCheck.cpp:750
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat, int MatchedCount, StringRef Buffer, StringMap< StringRef > &VariableTable, size_t MatchPos, size_t MatchLen, const FileCheckRequest &Req, std::vector< FileCheckDiag > *Diags)
Definition: FileCheck.cpp:897
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:249
bool CheckInput(SourceMgr &SM, StringRef Buffer, ArrayRef< FileCheckString > CheckStrings, std::vector< FileCheckDiag > *Diags=nullptr)
Check the input to FileCheck provided in the Buffer against the CheckStrings read from the check file...
Definition: FileCheck.cpp:1392
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:285
unsigned CheckLine
Where is the FileCheck directive for this diagnostic?
Definition: FileCheck.h:158
SMLoc getLoc() const
Returns the location in source code.
Definition: FileCheck.h:119
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:41
Check::FileCheckType getCheckTy() const
Definition: FileCheck.h:136
size_t size() const
Definition: SmallVector.h:52
LLVM_NODISCARD char back() const
back - Get the last character in the string.
Definition: StringRef.h:141
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< typename base::iterator, bool > insert(StringRef Key)
Definition: StringSet.h:38
size_t Match(StringRef Buffer, size_t &MatchLen, StringMap< StringRef > &VariableTable) const
Matches the pattern string against the input buffer Buffer.
Definition: FileCheck.cpp:274
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
Indicates the pattern only matches the end of file.
Definition: FileCheck.h:58
bool isValid() const
Definition: SMLoc.h:59
static size_t SkipWord(StringRef Str, size_t Loc)
Definition: FileCheck.cpp:670
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning &#39;\&#39;, &#39;&#39;, &#39; &#39;, &#39;"&#39;, and anything that doesn&#39;t satisfy llvm::isPrint into an escape...
Indicates a fuzzy match that serves as a suggestion for the next intended match for an expected patte...
Definition: FileCheck.h:191
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
size_t CheckDag(const SourceMgr &SM, StringRef Buffer, std::vector< const FileCheckPattern *> &NotStrings, StringMap< StringRef > &VariableTable, const FileCheckRequest &Req, std::vector< FileCheckDiag > *Diags) const
Match "dag strings" and their mixed "not strings".
Definition: FileCheck.cpp:1198
static std::pair< StringRef, StringRef > FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, unsigned &LineNumber, Check::FileCheckType &CheckTy)
Search the buffer for the first prefix in the prefix regular expression.
Definition: FileCheck.cpp:700
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Indicates no match for an expected pattern, but this might follow good matches when multiple matches ...
Definition: FileCheck.h:188
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool isValid(std::string &Error) const
isValid - returns the error encountered during regex compilation, or matching, if any...
Definition: Regex.cpp:55
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:534
unsigned InputEndCol
Definition: FileCheck.h:198
bool insert(MapEntryTy *KeyValue)
insert - Insert the specified key/value pair into the map.
Definition: StringMap.h:365
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:41
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
Definition: StringMap.h:219
static std::unique_ptr< MemoryBuffer > getMemBufferCopy(StringRef InputData, const Twine &BufferName="")
Open the specified memory range as a MemoryBuffer, copying the contents and taking ownership of it...
static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, const SourceMgr &SM, SMLoc Loc, Check::FileCheckType CheckTy, StringRef Buffer, size_t Pos, size_t Len, std::vector< FileCheckDiag > *Diags, bool AdjustPrevDiag=false)
Definition: FileCheck.cpp:414
SMLoc End
Definition: SMLoc.h:50
unsigned getNumMatches() const
getNumMatches - In a valid regex, return the number of parenthesized matches it contains.
Definition: Regex.cpp:68
iterator begin() const
Definition: StringRef.h:101
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
unsigned InputStartLine
The search range if MatchTy is MatchNoneAndExcluded or MatchNoneButExpected, or the match range other...
Definition: FileCheck.h:195
Indicates a match for an excluded pattern.
Definition: FileCheck.h:177
const char * getBufferEnd() const
Definition: MemoryBuffer.h:60
FileCheckType & setCount(int C)
Definition: FileCheck.cpp:575
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:144
static const size_t npos
Definition: StringRef.h:50
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Regex buildCheckPrefixRegex()
Definition: FileCheck.cpp:1357
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:380
#define I(x, y, z)
Definition: MD5.cpp:58
const char * getBufferStart() const
Definition: MemoryBuffer.h:59
static bool IsPartOfWord(char c)
Definition: FileCheck.cpp:571
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
unsigned InputStartCol
Definition: FileCheck.h:196
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM_NODISCARD char front() const
front - Get the first character in the string.
Definition: StringRef.h:134
LLVM Value Representation.
Definition: Value.h:72
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:27
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr)
matches - Match the regex against a given String.
Definition: Regex.cpp:72
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static void ClearLocalVars(StringMap< StringRef > &VariableTable)
Definition: FileCheck.cpp:1378
MatchType
What type of match result does this diagnostic describe?
Definition: FileCheck.h:173
int getCount() const
Definition: FileCheck.h:138
Represents a location in source code.
Definition: SMLoc.h:23
iterator end() const
Definition: StringRef.h:103
iterator end()
Definition: StringMap.h:317