Minimum number of deletions and insertions to transform one string into another
Given two strings ‘str1’ and ‘str2’ of size m and n respectively. The task is to remove/delete and insert the minimum number of characters from/in str1 to transform it into str2. It could be possible that the same character needs to be removed/deleted from one point of str1 and inserted at some another point.
Example 1:
Input :
str1 = "heap", str2 = "pea"
Output :
Minimum Deletion = 2 and
Minimum Insertion = 1
Explanation:
p and h deleted from heap
Then, p is inserted at the beginning
One thing to note, though p was required yet
it was removed/deleted first from its position and
then it is inserted to some other position.
Thus, p contributes one to the deletion_count
and one to the insertion_count.
Example 2:
Input :
str1 = "w3wiki", str2 = "Beginner"
Output :
Minimum Deletion = 8
Minimum Insertion = 0
Simple Approach:
A simple approach is to consider all subsequences of str1 and for each subsequence calculate minimum deletions and insertions so as to transform it into str2. A very complex method and the time complexity of this solution is exponential.
Efficient Approach:
An efficient approach uses the concept of finding the length of the longest common subsequence of the given two sequences.
Algorithm:
- str1 and str2 be the given strings.
- m and n be their lengths respectively.
- len be the length of the longest common subsequence of str1 and str2
- minimum number of deletions minDel = m – len (as we only need to delete from str1 because we are reducing it to str2)
- minimum number of Insertions minInsert = n – len (as we are inserting x in str1 , x is a number of characters in str2 which are not taking part in the string which is longest common subsequence )
Below is the implementation of the above code:
C++
// Dynamic Programming C++ implementation to find // minimum number of deletions and insertions #include <bits/stdc++.h> using namespace std; // Returns length of longest common subsequence // for str1[0..m-1], str2[0..n-1] int lcs(string str1, string str2, int m, int n) { int L[m + 1][n + 1]; int i, j; // Following steps build L[m+1][n+1] in bottom // up fashion. Note that L[i][j] contains // length of LCS of str1[0..i-1] and str2[0..j-1] for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { if (i == 0 || j == 0) L[i][j] = 0; else if (str1.at(i - 1) == str2.at(j - 1)) L[i][j] = L[i - 1][j - 1] + 1; else L[i][j] = max(L[i - 1][j], L[i][j - 1]); } } // L[m][n] contains length of LCS // for X[0..n-1] and Y[0..m-1] return L[m][n]; } // function to find minimum number // of deletions and insertions void printMinDelAndInsert(string str1, string str2) { int m = str1.size(); int n = str2.size(); int len = lcs(str1, str2, m, n); cout << "Minimum number of deletions = " << (m - len) << endl; cout << "Minimum number of insertions = " << (n - len) << endl; } // Driver Code int main() { string str1 = "heap" ; string str2 = "pea" ; // Function Call printMinDelAndInsert(str1, str2); return 0; } |
Java
// Dynamic Programming Java implementation // to find minimum number of deletions and // insertions import java.io.*; class GFG { // Returns length of length common // subsequence for str1[0..m-1], // str2[0..n-1] static int lcs(String str1, String str2, int m, int n) { int L[][] = new int [m + 1 ][n + 1 ]; int i, j; // Following steps build L[m+1][n+1] in // bottom up fashion. Note that L[i][j] // contains length of LCS of str1[0..i-1] // and str2[0..j-1] for (i = 0 ; i <= m; i++) { for (j = 0 ; j <= n; j++) { if (i == 0 || j == 0 ) L[i][j] = 0 ; else if (str1.charAt(i - 1 ) == str2.charAt(j - 1 )) L[i][j] = L[i - 1 ][j - 1 ] + 1 ; else L[i][j] = Math.max(L[i - 1 ][j], L[i][j - 1 ]); } } // L[m][n] contains length of LCS // for X[0..n-1] and Y[0..m-1] return L[m][n]; } // function to find minimum number // of deletions and insertions static void printMinDelAndInsert(String str1, String str2) { int m = str1.length(); int n = str2.length(); int len = lcs(str1, str2, m, n); System.out.println( "Minimum number of " + "deletions = " ); System.out.println(m - len); System.out.println( "Minimum number of " + "insertions = " ); System.out.println(n - len); } // Driver code public static void main(String[] args) { String str1 = new String( "heap" ); String str2 = new String( "pea" ); // Function Call printMinDelAndInsert(str1, str2); } } // This code is contributed by Prerna Saini |
Python3
# Dynamic Programming Python3 # implementation to find minimum # number of deletions and insertions # Returns length of length # common subsequence for # str1[0..m-1], str2[0..n-1] def lcs(str1, str2, m, n): L = [[ 0 for i in range (n + 1 )] for i in range (m + 1 )] # Following steps build L[m+1][n+1] # in bottom up fashion. Note that # L[i][j] contains length of LCS # of str1[0..i-1] and str2[0..j-1] for i in range (m + 1 ): for j in range (n + 1 ): if (i = = 0 or j = = 0 ): L[i][j] = 0 elif (str1[i - 1 ] = = str2[j - 1 ]): L[i][j] = L[i - 1 ][j - 1 ] + 1 else : L[i][j] = max (L[i - 1 ][j], L[i][j - 1 ]) # L[m][n] contains length of LCS # for X[0..n-1] and Y[0..m-1] return L[m][n] # function to find minimum number # of deletions and insertions def printMinDelAndInsert(str1, str2): m = len (str1) n = len (str2) leng = lcs(str1, str2, m, n) print ( "Minimum number of deletions = " , m - leng, sep = ' ' ) print ( "Minimum number of insertions = " , n - leng, sep = ' ' ) # Driver Code str1 = "heap" str2 = "pea" # Function Call printMinDelAndInsert(str1, str2) # This code is contributed # by sahilshelangia |
C#
// Dynamic Programming C# implementation // to find minimum number of deletions and // insertions using System; class GFG { // Returns length of length common // subsequence for str1[0..m-1], // str2[0..n-1] static int lcs( string str1, string str2, int m, int n) { int [, ] L = new int [m + 1, n + 1]; int i, j; // Following steps build L[m+1][n+1] in // bottom up fashion. Note that L[i][j] // contains length of LCS of str1[0..i-1] // and str2[0..j-1] for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { if (i == 0 || j == 0) L[i, j] = 0; else if (str1[i - 1] == str2[j - 1]) L[i, j] = L[i - 1, j - 1] + 1; else L[i, j] = Math.Max(L[i - 1, j], L[i, j - 1]); } } // L[m][n] contains length of LCS // for X[0..n-1] and Y[0..m-1] return L[m, n]; } // function to find minimum number // of deletions and insertions static void printMinDelAndInsert( string str1, string str2) { int m = str1.Length; int n = str2.Length; int len = lcs(str1, str2, m, n); Console.Write( "Minimum number of " + "deletions = " ); Console.WriteLine(m - len); Console.Write( "Minimum number of " + "insertions = " ); Console.Write(n - len); } // Driver code public static void Main() { string str1 = new string ( "heap" ); string str2 = new string ( "pea" ); // Function Call printMinDelAndInsert(str1, str2); } } // This code is contributed by nitin mittal. |
Javascript
<script> // Dynamic Programming Javascript implementation // to find minimum number of deletions and // insertions // Returns length of length common // subsequence for str1[0..m-1], // str2[0..n-1] function lcs(str1, str2, m, n) { let L = new Array(m + 1); let i, j; for (i = 0; i <= m; i++) { L[i] = new Array(n + 1); for (j = 0; j <= n; j++) { L[i][j] = 0; } } // Following steps build L[m+1][n+1] in // bottom up fashion. Note that L[i][j] // contains length of LCS of str1[0..i-1] // and str2[0..j-1] for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { if (i == 0 || j == 0) L[i][j] = 0; else if (str1[i - 1] == str2[j - 1]) L[i][j] = L[i - 1][j - 1] + 1; else L[i][j] = Math.max(L[i - 1][j], L[i][j - 1]); } } // L[m][n] contains length of LCS // for X[0..n-1] and Y[0..m-1] return L[m][n]; } // function to find minimum number // of deletions and insertions function printMinDelAndInsert(str1, str2) { let m = str1.length; let n = str2.length; let len = lcs(str1, str2, m, n); document.write( "Minimum number of " + "deletions = " ); document.write((m - len) + "</br>" ); document.write( "Minimum number of " + "insertions = " ); document.write((n - len) + "</br>" ); } let str1 = "heap" ; let str2 = "pea" ; // Function Call printMinDelAndInsert(str1, str2); // This code is contributed by rameshtravel07. </script> |
Minimum number of deletions = 2 Minimum number of insertions = 1
Time Complexity: O(m * n)
Auxiliary Space: O(m * n)
Method 2: Top-down approach using memoization
This method also uses the idea of finding the length of the longest common subsequence of the given two sequences. But this approach is using a top-down approach using memoization.
C++
// Dynamic Programming C++ implementation to find // minimum number of deletions and insertions // using memoization technique #include <bits/stdc++.h> using namespace std; int dp[1001][1001]; // Returns length of longest common subsequence int lcs(string& s1, string& s2, int i, int j) { if (i == 0 || j == 0) { return 0; } if (dp[i][j] != -1) { return dp[i][j]; } if (s1[i - 1] == s2[j - 1]) { return dp[i][j] = 1 + lcs(s1, s2, i - 1, j - 1); } else { return dp[i][j] = max(lcs(s1, s2, i, j - 1), lcs(s1, s2, i - 1, j)); } } // function to find minimum number // of deletions and insertions void printMinDelAndInsert(string str1, string str2) { int m = str1.size(); int n = str2.size(); dp[m][n]; // initialising dp array as -1 memset (dp, -1, sizeof (dp)); int len = lcs(str1, str2, m, n); cout << "Minimum number of deletions = " << (m - len) << endl; cout << "Minimum number of insertions = " << (n - len) << endl; } // driver code int main() { string str1 = "heap" ; string str2 = "pea" ; // Function Call printMinDelAndInsert(str1, str2); return 0; } // This code is contributed by Arun Bang. |
Java
// Dynamic Programming Java implementation // to find minimum number of deletions and // insertions import java.io.*; import java.util.Arrays; class GFG { public static int dp[][]= new int [ 1001 ][ 1001 ]; static int lcs(String str1, String str2, int i, int j) { if (i == 0 || j == 0 ) { return 0 ; } if (dp[i][j] != - 1 ) { return dp[i][j]; } if (str1.charAt(i - 1 ) == str2.charAt(j - 1 )) { return dp[i][j] = 1 + lcs(str1, str2, i - 1 , j - 1 ); } else { return dp[i][j] = Math.max(lcs(str1, str2, i, j - 1 ), lcs(str1, str2, i - 1 , j)); } } // function to find minimum number // of deletions and insertions static void printMinDelAndInsert(String str1, String str2) { int m = str1.length(); int n = str2.length(); int len = lcs(str1, str2, m, n); System.out.println( "Minimum number of " + "deletions = " +(m - len)); System.out.println( "Minimum number of " + "insertions = " +(n - len)); } // Driver code public static void main(String[] args) { String str1 = new String( "heap" ); String str2 = new String( "pea" ); for ( int i= 0 ;i< 1001 ;i++){ Arrays.fill(dp[i],- 1 ); } // Function Call printMinDelAndInsert(str1, str2); } } // This code is contributed by nikitamehrotra99. |
Python3
# Dynamic Programming C++ implementation to find # minimum number of deletions and insertions # using memoization technique dp = [[ - 1 for i in range ( 1001 )] for j in range ( 1001 )] # Returns length of longest common subsequence def lcs(s1, s2, i, j): if i = = 0 or j = = 0 : return 0 if dp[i][j] ! = - 1 : return dp[i][j] if s1[i - 1 ] = = s2[j - 1 ]: dp[i][j] = 1 + lcs(s1, s2, i - 1 , j - 1 ) return dp[i][j] else : dp[i][j] = max (lcs(s1, s2, i, j - 1 ), lcs(s1, s2, i - 1 , j)) return dp[i][j] # function to find minimum number # of deletions and insertions def printMinDelAndInsert(str1, str2): m = len (str1) n = len (str2) # initialising dp array as -1 dp = [[ - 1 for i in range (n)] for j in range (m)] length = lcs(str1, str2, m, n) print ( "Minimum number of deletions = " , (m - length)) print ( "Minimum number of insertions = " , (n - length)) # driver code if __name__ = = "__main__" : str1 = "heap" str2 = "pea" # Function Call printMinDelAndInsert(str1, str2) # This code is contributed by Vivek Maddeshiya |
C#
using System; namespace MinDelAndInsert { class Program { static int [, ] dp = new int [1001, 1001]; // Returns length of longest common subsequence static int lcs( string s1, string s2, int i, int j) { if (i == 0 || j == 0) { return 0; } if (dp[i, j] != -1) { return dp[i, j]; } if (s1[i - 1] == s2[j - 1]) { return dp[i, j] = 1 + lcs(s1, s2, i - 1, j - 1); } else { return dp[i, j] = Math.Max(lcs(s1, s2, i, j - 1), lcs(s1, s2, i - 1, j)); } } // function to find minimum number // of deletions and insertions static void PrintMinDelAndInsert( string str1, string str2) { int m = str1.Length; int n = str2.Length; // Initializing dp array as -1 for ( int i = 0; i <= m; i++) { for ( int j = 0; j <= n; j++) { dp[i, j] = -1; } } int len = lcs(str1, str2, m, n); Console.WriteLine( "Minimum number of deletions = " + (m - len)); Console.WriteLine( "Minimum number of insertions = " + (n - len)); } static void Main( string [] args) { string str1 = "heap" ; string str2 = "pea" ; // Function Call PrintMinDelAndInsert(str1, str2); } } } // This code is contributed by divyansh2212 |
Javascript
// Dynamic Programming JavaScript implementation to find // minimum number of deletions and insertions // using memoization technique const dp = new Array(1001).fill(-1).map(() => new Array(1001).fill(-1)); // Returns length of longest common subsequence const lcs = (s1, s2, i, j) => { if (i === 0 || j === 0) { return 0; } if (dp[i][j] !== -1) { return dp[i][j]; } if (s1[i - 1] === s2[j - 1]) { dp[i][j] = 1 + lcs(s1, s2, i - 1, j - 1); return dp[i][j]; } else { dp[i][j] = Math.max(lcs(s1, s2, i, j - 1), lcs(s1, s2, i - 1, j)); return dp[i][j]; } }; // function to find minimum number // of deletions and insertions const printMinDelAndInsert = (str1, str2) => { let m = str1.length; let n = str2.length; // initialising dp array as -1 let len = lcs(str1, str2, m, n); console.log(`Minimum number of deletions = ${m - len}`); console.log(`Minimum number of insertions = ${n - len}`); } // driver code let str1 = "heap" ; let str2 = "pea" ; // Function Call printMinDelAndInsert(str1, str2); // This code is contributed by lokeshpotta20. |
Minimum number of deletions = 2 Minimum number of insertions = 1
Time Complexity: O(m * n)
Auxiliary Space: O(m * n)
Efficient approach: Space optimization
In previous approach the current value dp[i][j] is only depend upon the current and previous row values of DP. So to optimize the space complexity we use a single 1D array to store the computations.
Implementation steps:
- Create a 1D vector dp of size n+1.
- Set a base case by initializing the values of DP .
- Now iterate over subproblems by the help of nested loop and get the current value from previous computations.
- Now initialize a variable prev used to store the previous computations and a variable temp to store the current computation.
- After every iteration assign the value of prev to temp for further iteration.
- At last return and print the final answer stored in dp[0].
Implementation:
C++
#include <algorithm> #include <iostream> #include <vector> using namespace std; int lcs(string str1, string str2) { int m = str1.size(); int n = str2.size(); // Single vector to store the previous row vector< int > dp(n + 1, 0); for ( int i = 1; i <= m; i++) { // Store the value of dp[j-1] from the previous // iteration int prev = 0; for ( int j = 1; j <= n; j++) { // Store the current value of dp[j] before // modifying it int temp = dp[j]; if (str1[i - 1] == str2[j - 1]) dp[j] = prev + 1; else dp[j] = max(dp[j], dp[j - 1]); // Update prev for the next iteration prev = temp; } } return dp[n]; // Return the length of the LCS } void printMinDelAndInsert(string str1, string str2) { int len = lcs(str1, str2); int m = str1.size(); int n = str2.size(); cout << "Minimum number of deletions = " << (m - len) << endl; cout << "Minimum number of insertions = " << (n - len) << endl; } // Driver Code int main() { string str1 = "heap" ; string str2 = "pea" ; printMinDelAndInsert(str1, str2); return 0; } // -- by bhardwajji |
Java
import java.util.*; import java.io.*; public class GFG { public static int lcs(String str1, String str2) { int m = str1.length(); int n = str2.length(); // Single array to store the previous row int [] dp = new int [n + 1 ]; for ( int i = 1 ; i <= m; i++) { // Store the value of dp[j-1] from the previous iteration int prev = 0 ; for ( int j = 1 ; j <= n; j++) { // Store the current value of dp[j] before modifying it int temp = dp[j]; if (str1.charAt(i - 1 ) == str2.charAt(j - 1 )) dp[j] = prev + 1 ; else dp[j] = Math.max(dp[j], dp[j - 1 ]); // Update prev for the next iteration prev = temp; } } return dp[n]; // Return the length of the LCS } public static void printMinDelAndInsert(String str1, String str2) { int len = lcs(str1, str2); int m = str1.length(); int n = str2.length(); System.out.println( "Minimum number of deletions = " + (m - len)); System.out.println( "Minimum number of insertions = " + (n - len)); } // Driver Code public static void main(String[] args) { String str1 = "heap" ; String str2 = "pea" ; printMinDelAndInsert(str1, str2); } } // by srihari2222 |
Python3
def lcs(str1, str2): m = len (str1) n = len (str2) # Create a matrix to store the LCS lengths dp = [[ 0 ] * (n + 1 ) for _ in range (m + 1 )] for i in range ( 1 , m + 1 ): for j in range ( 1 , n + 1 ): if str1[i - 1 ] = = str2[j - 1 ]: # If characters match, extend LCS by 1 dp[i][j] = dp[i - 1 ][j - 1 ] + 1 else : # Otherwise, take the maximum LCS length from the previous row or column dp[i][j] = max (dp[i - 1 ][j], dp[i][j - 1 ]) return dp[m][n] def print_min_del_and_insert(str1, str2): len_lcs = lcs(str1, str2) # Calculate the length of the Longest Common Subsequence m = len (str1) n = len (str2) # Calculate the minimum number of deletions and insertions min_deletions = m - len_lcs min_insertions = n - len_lcs print ( "Minimum number of deletions =" , min_deletions) print ( "Minimum number of insertions =" , min_insertions) # Driver Code if __name__ = = "__main__" : str1 = "heap" str2 = "pea" print_min_del_and_insert(str1, str2) |
C#
using System; class Program { static int LCS( string str1, string str2) { int m = str1.Length; int n = str2.Length; // Single array to store the previous row int [] dp = new int [n + 1]; for ( int i = 1; i <= m; i++) { // Store the value of dp[j-1] from the previous iteration int prev = 0; for ( int j = 1; j <= n; j++) { // Store the current value of dp[j] before modifying it int temp = dp[j]; if (str1[i - 1] == str2[j - 1]) { dp[j] = prev + 1; } else { dp[j] = Math.Max(dp[j], dp[j - 1]); } // Update prev for the next iteration prev = temp; } } return dp[n]; // Return the length of the LCS } static void PrintMinDelAndInsert( string str1, string str2) { int len = LCS(str1, str2); int m = str1.Length; int n = str2.Length; Console.WriteLine( "Minimum number of deletions = " + (m - len)); Console.WriteLine( "Minimum number of insertions = " + (n - len)); } static void Main() { string str1 = "heap" ; string str2 = "pea" ; PrintMinDelAndInsert(str1, str2); } } |
Javascript
function lcs(str1, str2) { const m = str1.length; const n = str2.length; // Single array to store the previous row const dp = new Array(n + 1).fill(0); for (let i = 1; i <= m; i++) { // Store the value of dp[j-1] from the previous iteration let prev = 0; for (let j = 1; j <= n; j++) { // Store the current value of dp[j] before modifying it let temp = dp[j]; if (str1[i - 1] === str2[j - 1]) { dp[j] = prev + 1; } else { dp[j] = Math.max(dp[j], dp[j - 1]); } // Update prev for the next iteration prev = temp; } } return dp[n]; // Return the length of the LCS } function printMinDelAndInsert(str1, str2) { const len = lcs(str1, str2); const m = str1.length; const n = str2.length; console.log( "Minimum number of deletions = " + (m - len)); console.log( "Minimum number of insertions = " + (n - len)); } // Driver Code const str1 = "heap" ; const str2 = "pea" ; printMinDelAndInsert(str1, str2); // --> by srihari2222 |
Minimum number of deletions = 2 Minimum number of insertions = 1
Time Complexity: O(m * n)
Auxiliary Space: O(n)
This article is contributed by Ayush Jauhari.
Contact Us