Rabin-Karp algorithm for Pattern Searching in Matrix
Given matrices txt[][] of dimensions m1 x m2 and pattern pat[][] of dimensions n1 x n2, the task is to check whether a pattern exists in the matrix or not, and if yes then print the top most indices of the pat[][] in txt[][]. It is assumed that m1, m2 ? n1, n2
Examples:
Input: txt[][] = {{G, H, I, P} {J, K, L, Q} {R, G, H, I} {S, J, K, L} } pat[][] = {{G, H, I}, {J, K, L} } Output: Pattern found at ( 0, 0 ) Pattern found at ( 2, 1 ) Explanation: Input: txt[][] = { {A, B, C}, {D, E, F}, {G, H, I} } pat[][] = { {E, F}, {H, I} } Output: Pattern found at (1, 1)
Approach: In order to find a pattern in a 2-D array using Rabin-Karp algorithm, consider an input matrix txt[m1][m2] and a pattern pat[n1][n2]. The idea is to find the hash of each columns of mat[][] and pat[][] and compare the hash values. For any column if hash values are equals than check for the corresponding rows values. Below are the steps:
- Find the hash values of each column for the first N1 rows in both txt[][] and pat[][] matrix.
- Apply Rabin-Karp Algorithm by finding hash values for the column hashes found in step 1.
- If a match is found compare txt[][] and pat[][] matrices for the specific rows and columns.
- Else slide down the column hashes by 1 row in the txt matrix using a rolling hash.
- Repeat steps 2 to 4 for all the hash values and if we found any pat[][] match in txt[][] then print the indices of top most cell in the txt[][].
To find the hash value: In order to find the hash value of a substring of size N in a text using rolling hash follow below steps:
- Remove the first character from the string: hash(txt[s:s+n-1])-(radix**(n-1)*txt[s])%prime_number
- Add the next character to the string: hash(txt[s:s+n-1])*radix + txt[n]
Below is the implementation of the above approach:
C++
#include <bits/stdc++.h> using namespace std; long long mod = 257; //The modular value long long r = 256; //radix long long dr = 1; //Highest power for row hashing long long dc = 1; //Highest power for col hashing //func that return a power n under mod m in LogN long long power( int a, int n, long long m){ if (n == 0){ return 1; } if (n == 1){ return a%m; } long long pow = power(a,n/2,m); if (n&1){ return ((a%m)*( pow )%m * ( pow )%m)%m; } else { return (( pow )%m * ( pow )%m)%m; } } //Checks if all values of pattern matches with the text bool check(vector<vector< char >> &txt,vector<vector< char >> &pat, long long r, long long c){ for ( long long i=0;i<pat.size();i++){ for ( long long j=0;j<pat[0].size();j++){ if (pat[i][j] != txt[i+r][j+c]) return false ; } } return true ; } //Finds the first hash of first n rows where n is no. of rows in pattern vector< long long > findHash(vector<vector< char >> &mat, long long row){ vector< long long > hash; long long col = mat[0].size(); for ( long long i=0;i<col;i++){ long long h = 0; for ( long long j=0;j<row;j++){ h = ((h*r)%mod + mat[j][i]%mod)%mod; } hash.push_back(h); } return hash; } //rolling hash function for columns void colRollingHash(vector<vector< char >> &txt, vector< long long > &t_hash, long long row, long long p_row){ for ( long long i=0;i<t_hash.size();i++){ t_hash[i] = (t_hash[i]%mod - ((txt[row][i])%mod*(dr)%mod)%mod)%mod; t_hash[i] = ((t_hash[i]%mod) * (r%mod))%mod; t_hash[i] = (t_hash[i]%mod + txt[row+p_row][i]%mod)%mod; } } void rabinKarp(vector<vector< char >> &txt, vector<vector< char >> &pat){ long long t_row = txt.size(); long long t_col = txt[0].size(); long long p_row = pat.size(); long long p_col = pat[0].size(); dr = power(r,p_row-1,mod); dc = power(r,p_col-1,mod); vector< long long > t_hash = findHash(txt,p_row); //column hash of p_row rows vector< long long > p_hash = findHash(pat,p_row); //column hash of p_row rows long long p_val = 0; //hash of entire pattern matrix for ( long long i=0;i<p_col;i++){ p_val = (p_val*r + p_hash[i])%mod; } for ( long long i=0;i<=(t_row-p_row);i++){ long long t_val = 0; for ( long long i=0;i<p_col;i++){ t_val = ((t_val*r) + t_hash[i])%mod; } for ( long long j=0;j<=(t_col-p_col);j++){ if (p_val == t_val){ if (check(txt,pat,i,j)){ cout<<i<< " " <<j<<endl; } } //calculating t_val for next set of columns t_val = (t_val%mod - ((t_hash[j]%mod)*(dc%mod))%mod + mod)%mod; t_val = (t_val%mod * r%mod)%mod; t_val = (t_val%mod + t_hash[j+p_col]%mod)%mod; } if (i < t_row-p_row){ //call this function for hashing form next row colRollingHash(txt,t_hash,i,p_row); } } } int main(){ vector<vector< char >> txt{{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' }}; vector<vector< char >> pat{{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' }}; //function prints the indices of row and col where its a match in txt rabinKarp(txt,pat); return 0; } |
Python3
# Python implementation for the # pattern matching in 2-D matrix # Function to find the hash-value # of the given columns of text def findHash(arr, col, row): hashCol = [] add = 0 radix = 256 # For each column for i in range ( 0 , col): for j in reversed ( range ( 0 , row)): add = add + (radix * * (row - j - 1 ) * ord (arr[j][i])) % 101 hashCol.append(add % 101 ); add = 0 return hashCol # Function to check equality of the # two strings def checkEquality(txt, row, col, flag): txt = [txt[i][col:patCol + col] for i in range (row, patRow + row)] # If pattern found if txt = = pat: flag = 1 print ( "Pattern found at" , \ "(" , row, ", " , col, ")" ) return flag # Function to find the hash value of # of the next column using rolling-hash # of the Rabin-karp def colRollingHash(txtHash, nxtRow): radix = 256 # Find the hash of the matrix for j in range ( len (txtHash)): txtHash[j] = (txtHash[j] * radix \ + ord (txt[nxtRow][j])) % 101 txtHash[j] = txtHash[j] - (radix * * (patRow) * ord (txt[nxtRow - patRow][j])) % 101 txtHash[j] = txtHash[j] % 101 return txtHash # Function to match a pattern in # the given 2D Matrix def search(txt, pat): # List of the hashed value for # the text and pattern columns patHash = [] txtHash = [] # Hash value of the # pat_hash and txt_hash patVal = 0 txtVal = 0 # Radix value for the input characters radix = 256 # Variable to determine if # pattern was found or not flag = 0 # Function call to find the # hash value of columns txtHash = findHash(txt, txtCol, patRow) patHash = findHash(pat, patCol, patRow) # Calculate hash value for patHash for i in range ( len (patHash)): patVal = patVal \ + (radix * * ( len (patHash) - i - 1 ) * patHash[i] % 101 ) patVal = patVal % 101 # Applying Rabin-Karp to compare # txtHash and patHash for i in range (patRow - 1 , txtRow): col = 0 txtVal = 0 # Find the hash value txtHash for j in range ( len (patHash)): txtVal = txtVal\ + (radix * * ( len (patHash) - j - 1 ) * txtHash[j]) % 101 txtVal = txtVal % 101 if txtVal = = patVal: flag = checkEquality(\ txt, i + 1 - patRow, col, flag) else : # Roll the txt window by one character for k in range ( len (patHash), len (txtHash)): txtVal = txtVal \ * radix + (txtHash[k]) % 101 txtVal = txtVal \ - (radix * * ( len (patHash)) * (txtHash[k - len (patHash)])) % 101 txtVal = txtVal % 101 col = col + 1 # Check if txtVal and patVal are equal if patVal = = txtVal: flag = checkEquality(\ txt, i + 1 - patRow, col, flag) else : continue # To make sure i does not exceed txtRow if i + 1 <txtRow: txtHash = colRollingHash(txtHash, i + 1 ) if flag = = 0 : print ( "Pattern not found" ) # Driver Code if __name__ = = "__main__" : # Given Text txt = [[ 'A' , 'B' , 'C' ], \ [ 'D' , 'E' , 'F' ], \ [ 'G' , 'H' , 'I' ]] # Given Pattern pat = [[ 'E' , 'F' ], [ 'H' , 'I' ]] # Dimensions of the text txtRow = 3 txtCol = 3 # Dimensions for the pattern patRow = 2 patCol = 2 # Function Call search(txt, pat) |
Javascript
// JS program to implement the approach let mod = 257; // The modular value let r = 256; // radix let dr = 1; // Highest power for row hashing let dc = 1; // Highest power for col hashing // func that return a power n under mod m in LogN function power(a, n, m) { if (n == 0) { return 1; } if (n == 1) { return a % m; } let pow = power(a, Math.floor(n / 2), m); if (n & 1) { return ((a % m) * (pow) % m * (pow) % m) % m; } else { return ((pow) % m * (pow) % m) % m; } } // Checks if all values of pattern matches with the text function check(txt, pat, r, c) { for (let i = 0; i < pat.length; i++) { for (let j = 0; j < pat[0].length; j++) { if (pat[i][j] != txt[i + r][j + c]) return false ; } } return true ; } // Finds the first hash of first n rows where n is no. of // rows in pattern function findHash(mat, row) { let hash = []; let col = (mat[0]).length; for (let i = 0; i < col; i++) { let h = 0; for (let j = 0; j < row; j++) { h = ((h * r) % mod + mat[j][i] % mod) % mod; } hash.push(h); } return hash; } // rolling hash function for columns function colRollingHash(txt, t_hash, row, p_row) { for (let i = 0; i < t_hash.length; i++) { t_hash[i] = (t_hash[i] % mod - ((txt[row][i]) % mod * (dr) % mod) % mod) % mod; t_hash[i] = ((t_hash[i] % mod) * (r % mod)) % mod; t_hash[i] = (t_hash[i] % mod + txt[row + p_row][i] % mod) % mod; } return t_hash } function rabinKarp(txt, pat) { let t_row = txt.length; let t_col = (txt[0]).length; let p_row = pat.length; let p_col = (pat[0]).length; dr = power(r, p_row - 1, mod); dc = power(r, p_col - 1, mod); let t_hash = findHash(txt, p_row); // column hash of p_row rows let p_hash = findHash(pat, p_row); // column hash of p_row rows let p_val = 0; // hash of entire pattern matrix for (let i = 0; i < p_col; i++) { p_val = (p_val * r + p_hash[i]) % mod; } for (let i = 0; i <= (t_row - p_row); i++) { let t_val = 0; for (let i = 0; i < p_col; i++) { t_val = ((t_val * r) + t_hash[i]) % mod; } for (let j = 0; j <= (t_col - p_col); j++) { if (p_val == t_val) { if (check(txt, pat, i, j)) { console.log(i + " " + j); } } // calculating t_val for next set of columns t_val = (t_val % mod - ((t_hash[j] % mod) * (dc % mod)) % mod + mod) % mod; t_val = (t_val % mod * r % mod) % mod; t_val = (t_val % mod + t_hash[j + p_col] % mod) % mod; } if (i < t_row - p_row) { // call this function for hashing form next row t_hash = colRollingHash(txt, t_hash, i, p_row); } } } let txt = [ [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ] ]; let pat = [ [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ], [ 'A' , 'B' , 'C' , 'D' , 'E' ] ]; // function prints the indices of row and col where its a // match in txt rabinKarp(txt, pat); // This code is contributed by phasing17. |
Java
import java.io.*; import java.util.*; public class Main { static long mod = 257 ; // The modular value static long r = 256 ; // radix static long dr = 1 ; // Highest power for row hashing static long dc = 1 ; // Highest power for col hashing // func that return a power n under mod m in LogN static long power( int a, int n, long m) { if (n == 0 ) { return 1 ; } if (n == 1 ) { return a % m; } long pow = power(a, n / 2 , m); if (n % 2 == 1 ) { return ((a % m) * (pow) % m * (pow) % m) % m; } else { return ((pow) % m * (pow) % m) % m; } } // Checks if all values of pattern matches with the text static boolean check( char [][] txt, char [][] pat, long r, long c) { for ( long i = 0 ; i < pat.length; i++) { for ( long j = 0 ; j < pat[ 0 ].length; j++) { if (pat[( int )i][( int )j] != txt[( int )(i + r)][( int )(j + c)]) return false ; } } return true ; } // Finds the first hash of first n rows where n is no. // of rows in pattern static List<Long> findHash( char [][] mat, long row) { List<Long> hash = new ArrayList<Long>(); long col = mat[ 0 ].length; for ( int i = 0 ; i < col; i++) { long h = 0 ; for ( int j = 0 ; j < row; j++) { h = ((h * r) % mod + mat[j][i] % mod) % mod; } hash.add(h); } return hash; } // rolling hash function for columns static void colRollingHash( char [][] txt, List<Long> t_hash, long row, long p_row) { for ( int i = 0 ; i < t_hash.size(); i++) { t_hash.set(i, (t_hash.get(i) % mod - ((txt[( int )row][i]) % mod * (dr) % mod) % mod) % mod); t_hash.set(i, ((t_hash.get(i) % mod) * (r % mod)) % mod); t_hash.set(i, (t_hash.get(i) % mod + txt[( int )(row + p_row)][i] % mod) % mod); } } static void rabinKarp( char [][] txt, char [][] pat) { long t_row = txt.length; long t_col = txt[ 0 ].length; long p_row = pat.length; long p_col = pat[ 0 ].length; dr = power(( int )r, ( int )p_row - 1 , mod); dc = power(( int )r, ( int )p_col - 1 , mod); List<Long> t_hash = findHash( txt, p_row); // Column hash of p_row rows List<Long> p_hash = findHash( pat, p_row); // Column hash of p_row rows long p_val = 0 ; // Hash of entire pattern matrix for ( long i = 0 ; i < p_col; i++) { p_val = (p_val * r + p_hash.get(( int )i)) % mod; } for ( long i = 0 ; i <= (t_row - p_row); i++) { long t_val = 0 ; for ( long j = 0 ; j < p_col; j++) { t_val = ((t_val * r) + t_hash.get(( int )j)) % mod; } for ( long j = 0 ; j <= (t_col - p_col); j++) { if (p_val == t_val) { if (check(txt, pat, i, j)) { System.out.println(i + " " + j); } } // Calculating t_val for next set of columns t_val = (t_val % mod - ((t_hash.get(( int )j) % mod) * (dc % mod)) % mod + mod) % mod; t_val = (t_val % mod * r % mod) % mod; t_val = (t_val % mod + t_hash.get(( int )(j)) % mod) % mod; } if (i < t_row - p_row) { // Call this function for hashing from next // row colRollingHash(txt, t_hash, i, p_row); } } } public static void main(String[] args) { char [][] txt = { { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' } }; char [][] pat = { { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' }, { 'A' , 'B' , 'C' , 'D' , 'E' } }; // function prints the indices of row and col where // its a match in txt rabinKarp(txt, pat); } } |
C#
using System; using System.Collections.Generic; class Program { static long mod = 257; // The modular value static long r = 256; // radix static long dr = 1; // Highest power for row hashing static long dc = 1; // Highest power for col hashing // func that return a power n under mod m in LogN static long power( int a, int n, long m) { if (n == 0) { return 1; } if (n == 1) { return a % m; } long pow = power(a, n / 2, m); if ((n & 1) != 0) { return ((a % m) * (pow % m) * (pow % m)) % m; } else { return ((pow % m) * (pow % m)) % m; } } // Checks if all values of pattern matches with the text static bool check(List<List< char > > txt, List<List< char > > pat, long r, long c) { for ( long i = 0; i < pat.Count; i++) { for ( long j = 0; j < pat[0].Count; j++) { if (pat[( int )i][( int )j] != txt[( int )i + ( int )r] [( int )j + ( int )c]) return false ; } } return true ; } // Finds the first hash of first n rows where n is no. // of rows in pattern static List< long > findHash(List<List< char > > mat, long row) { List< long > hash = new List< long >(); long col = mat[0].Count; for ( long i = 0; i < col; i++) { long h = 0; for ( long j = 0; j < row; j++) { h = ((h * r) % mod + mat[( int )j][( int )i] % mod) % mod; } hash.Add(h); } return hash; } // rolling hash function for columns static void colRollingHash(List<List< char > > txt, List< long > t_hash, long row, long p_row) { for ( long i = 0; i < t_hash.Count; i++) { t_hash[( int )i] = (t_hash[( int )i] % mod - ((txt[( int )row][( int )i] % mod) * (dr % mod)) % mod) % mod; t_hash[( int )i] = ((t_hash[( int )i] % mod) * (r % mod)) % mod; t_hash[( int )i] = (t_hash[( int )i] % mod + txt[( int )row + ( int )p_row][( int )i] % mod) % mod; } } static void rabinKarp(List<List< char > > txt, List<List< char > > pat) { long t_row = txt.Count; long t_col = txt[0].Count; long p_row = pat.Count; long p_col = pat[0].Count; dr = power(( int )r, ( int )p_row - 1, mod); dc = power(( int )r, ( int )p_col - 1, mod); List< long > t_hash = findHash( txt, p_row); // Column hash of p_row rows List< long > p_hash = findHash( pat, p_row); // Column hash of p_row rows long p_val = 0; // Hash of entire pattern matrix for ( long i = 0; i < p_col; i++) { p_val = (p_val * r + p_hash[( int )i]) % mod; } for ( long i = 0; i <= (t_row - p_row); i++) { long t_val = 0; for ( long j = 0; j < p_col; j++) { t_val = ((t_val * r) + t_hash[( int )j]) % mod; } for ( long j = 0; j <= (t_col - p_col); j++) { if (p_val == t_val) { if (check(txt, pat, i, j)) { Console.WriteLine(i + " " + j); } } // Calculating t_val for next set of columns t_val = (t_val % mod - ((t_hash[( int )j] % mod) * (dc % mod)) % mod + mod) % mod; t_val = (t_val % mod * r % mod) % mod; t_val = (t_val % mod + t_hash[( int )j] % mod) % mod; } if (i < t_row - p_row) { // Call this function for hashing from next // row colRollingHash(txt, t_hash, i, p_row); } } } public static void Main() { List<List< char > > txt = new List<List< char > >{ new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' } }; List<List< char > > pat = new List<List< char > >{ new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, new List< char >{ 'A' , 'B' , 'C' , 'D' , 'E' }, }; // function prints the indices of row and col where // its a match in txt rabinKarp(txt, pat); } } |
Pattern found at ( 1 , 1 )
Contact Us