KMP Algorithm
Example: Pattern : kaykayak Text : kaykaykaykayak
Filling the partial match table for pattern kaykayak. The values at indices in partial match table are filled with the length of the longest proper prefix that matches the longest proper suffix in the same substring (subpattern).
Length | Subpattern | Proper Prefixes | Proper Suffixes | Value |
---|---|---|---|---|
1 | k | - | - | 0 (No matching prefix that is also a suffix) |
2 | ka | k | a | 0 (No matching prefix that is also a suffix) |
3 | kay | k,ka | y,ay | 0 (No matching prefix that is also a suffix) |
4 | kayk | k,ka,kay | k,yk,ayk | 1 (Prefix k of length 1 is also the suffix k) |
5 | kayka | k,ka,kay,kayk | a, ka,yka,ayka |
2 (Prefix ka of length 2 is also the suffix ka) |
6 | kaykay | k,ka,kay,kayk,kayka | y,ay, kay,ykay,aykay |
3 (Prefix kay of length 3 is also the suffix kay) |
7 | kaykaya | k,ka,kay,kayk,kayka, kaykay |
a,ya,aya, kaya,ykaya,aykaya |
0 (No matching prefix that is also a suffix) |
8 | kaykayak | k,ka,kay,kayk,kayka, kaykay,kayakaya |
k,ak,yak,ayak, kayak,ykayak,aykayak |
1 (Prefix k of length 1 is also the suffix k) |
Partial Match Table for pattern kaykayak
Pattern | k | a | y | k | a | y | a | k |
---|---|---|---|---|---|---|---|---|
Index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
Value | 0 | 0 | 0 | 1 | 2 | 3 | 0 | 1 |
Time complexity of KMP pattern matching algorithm : O(m+n), where ‘m’ is the length of the text and ‘n’ is the length of the pattern. As the time taken to create the partial match table is O(n) and the time taken to search the pattern is O(m), the overall time complexity is O(m+n).
C++14 KMP pattern match
#include<iostream>
#include<string>
#include<vector>
using namespace std;
typedef vector<int> VI;
class KMP {
private:
vector<int> partial_match_table;
vector<int> location;
// Create a partial match table to efficiently skip the matched prefixes
// in the pattern
void Construct_PartialMatchTable(string pattern){
int sz = pattern.size();
partial_match_table.resize(sz);
int index = 0;
int position = 1;
while(position < sz){
if(pattern.at(position) == pattern.at(index)){
partial_match_table[position] = index + 1;
position++; index++;
} else {
if(index != 0){
index = partial_match_table[index-1];
} else {
partial_match_table[position] = 0;
position++;
}
}
}
Print_PartialMatchTable(pattern);
}
void Print_PartialMatchTable(string pattern){
cout << "Partial Match Table" << endl;
cout << "Pattern: ";
for(auto& i : pattern)
cout << i << " ";
cout << endl;
cout << "Index : ";
for(int i=0; i<pattern.size(); i++)
cout << i << " ";
cout << endl;
cout << "Value : ";
for(auto& i : partial_match_table)
cout << i << " ";
cout << endl;
}
public:
void Search_Pattern(string pattern, string text){
cout << "Searching pattern:[" << pattern << "] inside text:[" << text << "]" << endl;
// Before searching the pattern, construct the partial match table
Construct_PartialMatchTable(pattern);
int t = 0; // t tracks the location in text
int p = 0; // p tracks the location in pattern
while(t < text.size()){
if(pattern.at(p) == text.at(t)){
p++, t++;
} else {
if(p != 0){
p = partial_match_table[p-1];
} else {
t++;
}
}
if(p == pattern.size()){
location.push_back(t-p);
// If in the already matched text, there is a suffix that matches
// the prefix in the pattern (example: "al" in Itsalgoal); for searching further, start from
// the beginning of the suffix in the text and not from the next location after the matched text.
// Example: For searching pattern:"algoal" inside text:"Itsalgoalgoalgoal", first match occurs at 3..8,
// for searching further instead of starting with the text postion 9, start at location 7.
// Itsalgoalgoalgoal
// 0 3 7 11
// ^ ^ ^ ^
// | | | |
// => Pattern occours at [3 7 11]
t = t-partial_match_table[p-1];
p = 0;
}
}
cout << "Pattern found at locations: ";
for(auto &i : location)
cout << i << " " ;
cout << endl << endl;
// Clear the locations and the partial match table
location.clear();
partial_match_table.clear();
}
};
int main(){
string pattern("algoal");
string text("Itsalgoalgoalgoal");
KMP k;
k.Search_Pattern(pattern, text);
pattern = "kaykayak";
text = "kaykaykaykayak";
k.Search_Pattern(pattern, text);
pattern = "abc";
text = "abcabcabcabc";
k.Search_Pattern(pattern, text);
pattern = "kayak";
text = "Thisiskayakayakkayaxkayak";
k.Search_Pattern(pattern, text);
pattern = "abababa";
text = "abababdababababababc";
k.Search_Pattern(pattern, text);
return 0;
}
Output
Searching pattern:[algoal] inside text:[Itsalgoalgoalgoal]
Partial Match Table
Pattern: a l g o a l
Index : 0 1 2 3 4 5
Value : 0 0 0 0 1 2
Pattern found at locations: 3 7 11
Searching pattern:[kaykayak] inside text:[kaykaykaykayak]
Partial Match Table
Pattern: k a y k a y a k
Index : 0 1 2 3 4 5 6 7
Value : 0 0 0 1 2 3 0 1
Pattern found at locations: 6
Searching pattern:[abc] inside text:[abcabcabcabc]
Partial Match Table
Pattern: a b c
Index : 0 1 2
Value : 0 0 0
Pattern found at locations: 0 3 6 9
Searching pattern:[kayak] inside text:[Thisiskayakayakkayaxkayak]
Partial Match Table
Pattern: k a y a k
Index : 0 1 2 3 4
Value : 0 0 0 0 1
Pattern found at locations: 6 10 20
Searching pattern:[abababa] inside text:[abababdababababababc]
Partial Match Table
Pattern: a b a b a b a
Index : 0 1 2 3 4 5 6
Value : 0 0 1 2 3 4 5
Pattern found at locations: 7 9 11