LLVM  13.0.0git
edit_distance.h
Go to the documentation of this file.
1 //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a Levenshtein distance function that works for any two
10 // sequences, with each element of each sequence being analogous to a character
11 // in a string.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ADT_EDIT_DISTANCE_H
16 #define LLVM_ADT_EDIT_DISTANCE_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include <algorithm>
20 #include <memory>
21 
22 namespace llvm {
23 
24 /// Determine the edit distance between two sequences.
25 ///
26 /// \param FromArray the first sequence to compare.
27 ///
28 /// \param ToArray the second sequence to compare.
29 ///
30 /// \param AllowReplacements whether to allow element replacements (change one
31 /// element into another) as a single operation, rather than as two operations
32 /// (an insertion and a removal).
33 ///
34 /// \param MaxEditDistance If non-zero, the maximum edit distance that this
35 /// routine is allowed to compute. If the edit distance will exceed that
36 /// maximum, returns \c MaxEditDistance+1.
37 ///
38 /// \returns the minimum number of element insertions, removals, or (if
39 /// \p AllowReplacements is \c true) replacements needed to transform one of
40 /// the given sequences into the other. If zero, the sequences are identical.
41 template<typename T>
42 unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
43  bool AllowReplacements = true,
44  unsigned MaxEditDistance = 0) {
45  // The algorithm implemented below is the "classic"
46  // dynamic-programming algorithm for computing the Levenshtein
47  // distance, which is described here:
48  //
49  // http://en.wikipedia.org/wiki/Levenshtein_distance
50  //
51  // Although the algorithm is typically described using an m x n
52  // array, only one row plus one element are used at a time, so this
53  // implementation just keeps one vector for the row. To update one entry,
54  // only the entries to the left, top, and top-left are needed. The left
55  // entry is in Row[x-1], the top entry is what's in Row[x] from the last
56  // iteration, and the top-left entry is stored in Previous.
57  typename ArrayRef<T>::size_type m = FromArray.size();
58  typename ArrayRef<T>::size_type n = ToArray.size();
59 
60  const unsigned SmallBufferSize = 64;
61  unsigned SmallBuffer[SmallBufferSize];
62  std::unique_ptr<unsigned[]> Allocated;
63  unsigned *Row = SmallBuffer;
64  if (n + 1 > SmallBufferSize) {
65  Row = new unsigned[n + 1];
66  Allocated.reset(Row);
67  }
68 
69  for (unsigned i = 1; i <= n; ++i)
70  Row[i] = i;
71 
72  for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) {
73  Row[0] = y;
74  unsigned BestThisRow = Row[0];
75 
76  unsigned Previous = y - 1;
77  for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
78  int OldRow = Row[x];
79  if (AllowReplacements) {
80  Row[x] = std::min(
81  Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
82  std::min(Row[x-1], Row[x])+1);
83  }
84  else {
85  if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
86  else Row[x] = std::min(Row[x-1], Row[x]) + 1;
87  }
88  Previous = OldRow;
89  BestThisRow = std::min(BestThisRow, Row[x]);
90  }
91 
92  if (MaxEditDistance && BestThisRow > MaxEditDistance)
93  return MaxEditDistance + 1;
94  }
95 
96  unsigned Result = Row[n];
97  return Result;
98 }
99 
100 } // End llvm namespace
101 
102 #endif
i
i
Definition: README.txt:29
llvm
Definition: AllocatorList.h:23
size_t
ArrayRef.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
x
TODO unsigned x
Definition: README.txt:10
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::ComputeEditDistance
unsigned ComputeEditDistance(ArrayRef< T > FromArray, ArrayRef< T > ToArray, bool AllowReplacements=true, unsigned MaxEditDistance=0)
Determine the edit distance between two sequences.
Definition: edit_distance.h:42
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685