-
-
Notifications
You must be signed in to change notification settings - Fork 7.3k
/
minimum_edit_distance.cpp
176 lines (161 loc) · 6.28 KB
/
minimum_edit_distance.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
/**
* @file
* @brief Implementation of [Minimum Edit
* Distance](https://en.wikipedia.org/wiki/Edit_distance) using Dynamic
* Programing
*
* @details
*
* Given two strings str1 & str2 and we have to calculate the minimum
* number of operations (Insert, Remove, Replace) required to convert
* str1 to str2.
*
* ### Algorithm
*
* We will solve this problem using Naive recursion. But as we are
* approaching with a DP solution. So, we will take a DP array to
* store the solution of all sub-problems so that we don't have to
* perform recursion again and again. Now to solve the problem, We
* can traverse all characters from either right side of the strings
* or left side. Suppose we will do it from the right side. So, there
* are two possibilities for every pair of characters being traversed.
* 1. If the last characters of two strings are the same, Ignore
* the characters and get the count for the remaining string.
* So, we get the solution for lengths m-1 and n-1 in a DP array.
*
* 2. Else, (If last characters are not the same), we will consider all
* three operations (Insert, Remove, Replace) on the last character of
* the first string and compute the minimum cost for all three operations
* and take the minimum of three values in the DP array.
* For Insert: Recur for m and n-1
* For Remove: Recur for for m-1 and n
* For Replace: Recur for for m-1 and n-1
*
* @author [Nirjas Jakilim](github.com/nirzak)
*/
#include <cassert> /// for assert
#include <cstdint> /// for std::uint64_t
#include <iostream> /// for IO operations
#include <vector> /// for std::vector
/**
* @namespace dynamic_programming
* @brief Dynamic Programming algorithms
*/
namespace dynamic_programming {
/**
* @namespace Minimum Edit Distance
* @brief Implementation of [Minimum Edit
* Distance](https://en.wikipedia.org/wiki/Edit_distance) algorithm
*/
namespace minimum_edit_distance {
/**
* @brief Takes input of the cost of
* three operations: Insert, Replace and Delete
* and return the minimum cost among them.
* @param x used to pass minimum cost of Insert operations
* @param y used to pass minimum cost of Replace operations
* @param z used to pass minimum cost of Delete operations
* @returns x if `x` is the minimum value
* @returns y if `y` is the minimum value
* @returns z if `z` is the minimum value
*/
uint64_t min(uint64_t x, uint64_t y, uint64_t z) {
if (x <= y && x <= z) {
return x; /// returns x, if x is the minimum value
}
if (y <= x && y <= z) {
return y; /// returns y, if y is the minimum value
} else {
return z; /// returns z if z is the minimum value
}
}
/**
* @brief Calculates and stores the result
* of all the sub-problems, so that we don't have to recur to compute
* the minimum cost of a particular operation if it is already
* computed and stored in the `dp` vector.
* @param dp vector to store the computed minimum costs
* @param str1 to pass the 1st string
* @param str2 to pass the 2nd string
* @param m the length of str1
* @param n the length of str2
* @returns dp[m][n] the minimum cost of operations
* needed to convert str1 to str2
*/
uint64_t editDistDP(std::string str1, std::string str2, uint64_t m,
uint64_t n) {
/// Create a table to store results of subproblems
std::vector<std::vector<uint64_t>> dp(
m + 1,
std::vector<uint64_t>(
n +
1)); /// creasting 2D vector dp to store the results of subproblems
/// Fill d[][] in bottom up manner
for (uint64_t i = 0; i <= m; i++) {
for (uint64_t j = 0; j <= n; j++) {
/// If first string is empty, only option is to
/// insert all characters of second string
if (i == 0) {
dp[i][j] = j; /// Minimum operations = j
}
/// If second string is empty, only option is to
/// remove all characters of second string
else if (j == 0) {
dp[i][j] = i; /// Minimum operations = i
}
/// If last characters are same, ignore last char
/// and recur for remaining string
else if (str1[i - 1] == str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
}
/// If the last character is different, consider all
/// possibilities and find the minimum
else {
dp[i][j] = 1 + min(dp[i][j - 1], // Insert
dp[i - 1][j], // Remove
dp[i - 1][j - 1]); // Replace
}
}
}
return dp[m][n]; /// returning the minimum cost of operations needed to
/// convert str1 to str2
}
} // namespace minimum_edit_distance
} // namespace dynamic_programming
/**
* @brief Self-test implementations
* @returns void
*/
static void test() {
// 1st test
std::string str1 = "INTENTION"; // Sample input of 1st string
std::string str2 = "EXECUTION"; // Sample input of 2nd string
uint64_t expected_output1 = 5; // Expected minimum cost
uint64_t output1 = dynamic_programming::minimum_edit_distance::editDistDP(
str1, str2, str1.length(),
str2.length()); // calling the editDistDP function and storing the
// result on output1
assert(output1 ==
expected_output1); // comparing the output with the expected output
std::cout << "Minimum Number of Operations Required: " << output1
<< std::endl;
// 2nd test
std::string str3 = "SATURDAY";
std::string str4 = "SUNDAY";
uint64_t expected_output2 = 3;
uint64_t output2 = dynamic_programming::minimum_edit_distance::editDistDP(
str3, str4, str3.length(), str4.length());
assert(output2 == expected_output2);
std::cout << "Minimum Number of Operations Required: " << output2
<< std::endl;
}
/**
* @brief main function
* @param argc commandline argument count (ignored)
* @param argv commandline array of arguments (ignored)
* @returns 0 on exit
*/
int main(int argc, char *argv[]) {
test(); // run self-test implementations
return 0;
}