KMP algorithm is designed for finding a string pattern in a given text or a paragraph. This algorithm makes use of a partial match table for efficiently searching the pattern in a given text. Thus, this algorithm
Proper Prefix & Proper Suffix.
For a given string abcd we get the following proper prefixes and suffixes.
Proper Prefixes : a, ab, abc.
Proper Suffixes : d, cd, bcd.
Consider the below example: Pattern : kaykayak Text : kaykaykaykayak
Filling the partial match table for pattern kaykayak.
The values at the indices in the partial match table are filled with the length of the longest proper prefix that matches the longest proper suffix in the same substring (subpattern).
Length | Subpattern | Proper Prefixes | Proper Suffixes | Value |
---|---|---|---|---|
1 | k | - | - | 0 (No matching prefix that is also a suffix) |
2 | ka | k | a | 0 (No matching prefix that is also a suffix) |
3 | kay | k,ka | y,ay | 0 (No matching prefix that is also a suffix) |
4 | kayk | k,ka,kay | k,yk,ayk | 1 (Prefix k of length 1 is also the suffix k) |
5 | kayka | k,ka,kay,kayk | a, ka,yka,ayka |
2 (Prefix ka of length 2 is also the suffix ka) |
6 | kaykay | k,ka,kay,kayk,kayka | y,ay, kay,ykay,aykay |
3 (Prefix kay of length 3 is also the suffix kay) |
7 | kaykaya | k,ka,kay,kayk,kayka, kaykay |
a,ya,aya, kaya,ykaya,aykaya |
0 (No matching prefix that is also a suffix) |
8 | kaykayak | k,ka,kay,kayk,kayka, kaykay,kayakaya |
k,ak,yak,ayak, kayak,ykayak,aykayak |
1 (Prefix k of length 1 is also the suffix k) |
Generated Partial Match Table for pattern kaykayak
Pattern | k | a | y | k | a | y | a | k |
---|---|---|---|---|---|---|---|---|
Index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
Value | 0 | 0 | 0 | 1 | 2 | 3 | 0 | 1 |
Consider the below steps taken to match the pattern.
Time complexity of KMP pattern matching algorithm : O ( m + n ), where ’m’ is the length of the text and ’n’ is the length of the pattern. As the time taken to create the partial match table is O ( n ) and the time taken to search the pattern is O ( m ), the overall time complexity is O ( m + n ).
Implementation of KMP pattern matching algorithm
class KMP:
def __init__(self):
self.partial_match_table = []
self.location = []
# The below function constructs a partial match table to efficiently skip
# the matched prefixes in the pattern
def Create_PMT (self, pattern):
sz = len(pattern)
self.partial_match_table = [0] * sz
index = 0
position = 1
while ( position < sz ):
if (pattern[position] == pattern[index]):
self.partial_match_table[position] = index + 1
position += 1
index += 1
else :
if (index != 0):
index = self.partial_match_table[index-1]
else:
self.partial_match_table[position] = 0
position += 1
self.Print_PMT(pattern)
# The below function prints the partial match table
def Print_PMT (self, pattern):
print("Partial Match Table")
print("Pattern: ", end = ' ')
for p in pattern:
print(p, end = ' ')
print("\nIndex : ", end = ' ')
for i in range(len(pattern)):
print(i, end = ' ')
print("\nValue : ", end = ' ')
for i in self.partial_match_table:
print(i, end= ' ')
def Search (self, pattern, text):
print("Searching pattern : [" + pattern + "] inside text : [" + text + "]")
# Before searching the pattern, construct the partial match table
self.Create_PMT(pattern)
t = 0 # t tracks the location in text
p = 0 # p tracks the location in pattern
while (t < len(text)):
if ( pattern[p] == text[t] ):
p += 1
t += 1
else:
if ( p != 0 ):
p = self.partial_match_table[p-1]
else:
t += 1
if ( p == len(pattern) ):
self.location.append(t-p)
# If in the already matched text, there is a suffix that matches
# the prefix in the pattern (example: "al" in Itsalgoal); for searching further, start from
# the beginning of the suffix in the text and not from the next location after the matched text.
# Example: For searching pattern:"algoal" inside text:"Itsalgoalgoalgoal", first match occurs at 3..8,
# for searching further instead of starting with the text postion 9, start at location 7.
# Itsalgoalgoalgoal
# 0 3 7 11
# ^ ^ ^ ^
# | | | |
# => Pattern occours at [3 7 11]
t = t - self.partial_match_table[p-1]
p = 0
print("\nPattern found at locations: ", end = ' ')
for i in (self.location):
print(str(i) + " ", end = ' ')
print("\n")
# Clear the locations and the partial match table
self.location.clear()
self.partial_match_table.clear()
def main():
k = KMP()
pattern = "algoal"
text = "Itsalgoalgoalgoal"
k.Search(pattern, text)
pattern = "kaykayak"
text = "kaykaykaykayak"
k.Search(pattern, text)
pattern = "abc"
text = "abcabcabcabc"
k.Search(pattern, text)
pattern = "kayak"
text = "Thisiskayakayakkayaxkayak"
k.Search(pattern, text)
pattern = "abababa"
text = "abababdababababababc"
k.Search(pattern, text)
if __name__ == "__main__":
main()
Output
Searching pattern : [algoal] inside text : [Itsalgoalgoalgoal]
Partial Match Table
Pattern: a l g o a l
Index : 0 1 2 3 4 5
Value : 0 0 0 0 1 2
Pattern found at locations: 3 7 11
Searching pattern : [kaykayak] inside text : [kaykaykaykayak]
Partial Match Table
Pattern: k a y k a y a k
Index : 0 1 2 3 4 5 6 7
Value : 0 0 0 1 2 3 0 1
Pattern found at locations: 6
Searching pattern : [abc] inside text : [abcabcabcabc]
Partial Match Table
Pattern: a b c
Index : 0 1 2
Value : 0 0 0
Pattern found at locations: 0 3 6 9
Searching pattern : [kayak] inside text : [Thisiskayakayakkayaxkayak]
Partial Match Table
Pattern: k a y a k
Index : 0 1 2 3 4
Value : 0 0 0 0 1
Pattern found at locations: 6 10 20
Searching pattern : [abababa] inside text : [abababdababababababc]
Partial Match Table
Pattern: a b a b a b a
Index : 0 1 2 3 4 5 6
Value : 0 0 1 2 3 4 5
Pattern found at locations: 7 9 11
#include<iostream>
#include<string>
#include<vector>
using namespace std;
typedef vector<int> VI;
class KMP {
private:
vector<int> partial_match_table; // PMT
vector<int> location;
// The below function construct a partial match table (PMT) to
// efficiently skip the matched prefixes in the pattern
void Construct_PMT (string pattern) {
int sz = pattern.size();
partial_match_table.resize(sz);
int index = 0;
int position = 1;
while (position < sz) {
if (pattern.at(position) == pattern.at(index)) {
partial_match_table[position] = index + 1;
position++; index++;
} else {
if (index != 0) {
index = partial_match_table[index-1];
} else {
partial_match_table[position] = 0;
position++;
}
}
}
Print_PMT (pattern);
}
void Print_PMT (string pattern) {
cout << "Partial Match Table" << endl;
cout << "Pattern: ";
for(auto& i : pattern)
cout << i << " ";
cout << endl;
cout << "Index : ";
for(int i=0; i<pattern.size(); i++)
cout << i << " ";
cout << endl;
cout << "Value : ";
for(auto& i : partial_match_table)
cout << i << " ";
cout << endl;
}
public:
void Search (string pattern, string text) {
cout << "Searching pattern:[" << pattern << "] inside text:[" << text << "]" << endl;
// Before searching the pattern, construct the partial match table
Construct_PMT (pattern);
int t = 0; // t tracks the location in text
int p = 0; // p tracks the location in pattern
while (t < text.size()) {
if (pattern.at(p) == text.at(t)) {
p++, t++;
} else {
if (p != 0) {
p = partial_match_table[p-1];
} else {
t++;
}
}
if (p == pattern.size()) {
location.push_back(t-p);
// If in the already matched text, there is a suffix that matches
// the prefix in the pattern (example: "al" in Itsalgoal); for searching further, start from
// the beginning of the suffix in the text and not from the next location after the matched text.
// Example: For searching pattern:"algoal" inside text:"Itsalgoalgoalgoal", first match occurs at 3..8,
// for searching further instead of starting with the text postion 9, start at location 7.
// Itsalgoalgoalgoal
// 0 3 7 11
// ^ ^ ^ ^
// | | | |
// => Pattern occours at [3 7 11]
t = t - partial_match_table[p-1];
p = 0;
}
}
cout << "Pattern found at locations: ";
for (auto &i : location)
cout << i << " " ;
cout << endl << endl;
// Clear the locations and the partial match table
location.clear();
partial_match_table.clear();
}
};
int main(){
KMP k;
string pattern("algoal");
string text("Itsalgoalgoalgoal");
k.Search(pattern, text);
pattern = "kaykayak";
text = "kaykaykaykayak";
k.Search(pattern, text);
pattern = "abc";
text = "abcabcabcabc";
k.Search(pattern, text);
pattern = "kayak";
text = "Thisiskayakayakkayaxkayak";
k.Search(pattern, text);
pattern = "abababa";
text = "abababdababababababc";
k.Search(pattern, text);
return 0;
}
Output
Searching pattern:[algoal] inside text:[Itsalgoalgoalgoal]
Partial Match Table
Pattern: a l g o a l
Index : 0 1 2 3 4 5
Value : 0 0 0 0 1 2
Pattern found at locations: 3 7 11
Searching pattern:[kaykayak] inside text:[kaykaykaykayak]
Partial Match Table
Pattern: k a y k a y a k
Index : 0 1 2 3 4 5 6 7
Value : 0 0 0 1 2 3 0 1
Pattern found at locations: 6
Searching pattern:[abc] inside text:[abcabcabcabc]
Partial Match Table
Pattern: a b c
Index : 0 1 2
Value : 0 0 0
Pattern found at locations: 0 3 6 9
Searching pattern:[kayak] inside text:[Thisiskayakayakkayaxkayak]
Partial Match Table
Pattern: k a y a k
Index : 0 1 2 3 4
Value : 0 0 0 0 1
Pattern found at locations: 6 10 20
Searching pattern:[abababa] inside text:[abababdababababababc]
Partial Match Table
Pattern: a b a b a b a
Index : 0 1 2 3 4 5 6
Value : 0 0 1 2 3 4 5
Pattern found at locations: 7 9 11
import java.util.List;
import java.util.ArrayList;
class KMP {
List<Integer> partial_match_table; // PMT
List<Integer> location;
KMP() {
partial_match_table = new ArrayList<>();
location = new ArrayList<>();
}
// The below function construct a partial match table (PMT) to
// efficiently skip the matched prefixes in the pattern
void Construct_PMT (String pattern) {
int sz = pattern.length();
int index = 0;
int position = 1;
// Initially set all the values to 0 in the PMT before we construct it.
while (partial_match_table.size() <= sz)
partial_match_table.add(0);
while (position < sz) {
if (pattern.charAt(position) == pattern.charAt(index)) {
partial_match_table.set(position, index + 1);
position++; index++;
} else {
if (index != 0) {
index = partial_match_table.get(index-1);
} else {
partial_match_table.set(position, 0);
position++;
}
}
}
Print_PMT (pattern);
}
void Print_PMT (String pattern) {
System.out.print ("Partial Match Table");
System.out.print ("Pattern: ");
for(int i=0; i < pattern.length(); i++)
System.out.print(pattern.charAt(i) + " ");
System.out.println();
System.out.print("Index : ");
for(int i=0; i<pattern.length(); i++)
System.out.print(i + " ");
System.out.println();
System.out.print("Value : ");
for(int i : partial_match_table)
System.out.print(i + " ");
System.out.println();
}
void Search (String pattern, String text) {
System.out.println( "Searching pattern : [" + pattern + "] inside text : [" + text + "]");
// Before searching the pattern, construct the partial match table
Construct_PMT (pattern);
int t = 0; // t tracks the location in text
int p = 0; // p tracks the location in pattern
while (t < text.length()) {
if (pattern.charAt(p) == text.charAt(t)) {
p++;
t++;
} else {
if (p != 0) {
p = partial_match_table.get(p-1);
} else {
t++;
}
}
if (p == pattern.length()) {
location.add(t-p);
// If in the already matched text, there is a suffix that matches
// the prefix in the pattern (example: "al" in Itsalgoal); for searching further, start from
// the beginning of the suffix in the text and not from the next location after the matched text.
// Example: For searching pattern:"algoal" inside text:"Itsalgoalgoalgoal", first match occurs at 3..8,
// for searching further instead of starting with the text postion 9, start at location 7.
// Itsalgoalgoalgoal
// 0 3 7 11
// ^ ^ ^ ^
// | | | |
// => Pattern occours at [3 7 11]
t = t - partial_match_table.get(p-1);
p = 0;
}
}
System.out.print("Pattern found at locations: ");
for (int i : location)
System.out.print( i + " ");
System.out.println();
// Clear the locations and the partial match table
location.clear();
partial_match_table.clear();
}
public static void main (String[] args) {
KMP k = new KMP();
String pattern = "algoal";
String text = "Itsalgoalgoalgoal";
k.Search(pattern, text);
pattern = "kaykayak";
text = "kaykaykaykayak";
k.Search(pattern, text);
pattern = "abc";
text = "abcabcabcabc";
k.Search(pattern, text);
pattern = "kayak";
text = "Thisiskayakayakkayaxkayak";
k.Search(pattern, text);
pattern = "abababa";
text = "abababdababababababc";
k.Search(pattern, text);
}
}
Output
Searching pattern : [algoal] inside text : [Itsalgoalgoalgoal]
Partial Match TablePattern: a l g o a l
Index : 0 1 2 3 4 5
Value : 0 0 0 0 1 2 0
Pattern found at locations: 3 7 11
Searching pattern : [kaykayak] inside text : [kaykaykaykayak]
Partial Match TablePattern: k a y k a y a k
Index : 0 1 2 3 4 5 6 7
Value : 0 0 0 1 2 3 0 1 0
Pattern found at locations: 6
Searching pattern : [abc] inside text : [abcabcabcabc]
Partial Match TablePattern: a b c
Index : 0 1 2
Value : 0 0 0 0
Pattern found at locations: 0 3 6 9
Searching pattern : [kayak] inside text : [Thisiskayakayakkayaxkayak]
Partial Match TablePattern: k a y a k
Index : 0 1 2 3 4
Value : 0 0 0 0 1 0
Pattern found at locations: 6 10 20
Searching pattern : [abababa] inside text : [abababdababababababc]
Partial Match TablePattern: a b a b a b a
Index : 0 1 2 3 4 5 6
Value : 0 0 1 2 3 4 5 0
Pattern found at locations: 7 9 11