KMP Pattern Search

Searching a pattern using KMP pattern search algorithm

KMP Algorithm
1. Create a partial match table for the pattern being searched.
2. Use the partial match table to efficiently skip the length of the matched prefixes while searching the pattern.

Example:
Pattern: kaykayak
Text : kaykaykaykayak

Filling the partial match table for pattern kaykayak. The values at indices in partial match table are filled with the length of the longest proper prefix that matches the longest proper suffix in the same substring (subpattern).

Length Subpattern Proper Prefixes Proper Suffixes Value
1 k - - 0 (No matching prefix that is also a suffix)
2 ka k a 0 (No matching prefix that is also a suffix)
3 kay k,
ka
y,
ay
0 (No matching prefix that is also a suffix)
4 kayk k,
ka,
kay
k,
yk,
ayk
1 (Prefix k of length 1 is also the suffix k)
5 kayka k,
ka,
kay,
kayk
a,
ka,
yka,
ayka
2 (Prefix ka of length 2 is also the suffix ka)
6 kaykay k,
ka,
kay,
kayk,
kayka
y,
ay,
kay,
ykay,
aykay
3 (Prefix kay of length 3 is also the suffix kay)
7 kaykaya k,
ka,
kay,
kayk,
kayka,
kaykay
a,
ya,
aya,
kaya,
ykaya,
aykaya
0 (No matching prefix that is also a suffix)
8 kaykayak k,
ka,
kay,
kayk,
kayka,
kaykay,
kayakaya
k,
ak,
yak,
ayak,
kayak,
ykayak,
aykayak
1 (Prefix k of length 1 is also the suffix k)

Partial Match Table for pattern kaykayak

Pattern k a y k a y a k
Index 0 1 2 3 4 5 6 7
Value 0 0 0 1 2 3 0 1

KMP_Pattern_Match

Time complexity of KMP pattern matching algorithm : O(m+n), where ’m’ is the length of the text and ‘n’ is the length of the pattern. As the time taken to create the partial match table is O(n) and the time taken to search the pattern is O(m), the overall time complexity is O(m+n).


C++14 KMP pattern match

#include<iostream>
#include<string>
#include<vector>

using namespace std;
typedef vector<int> VI; 

class KMP {

    private:
        vector<int> partial_match_table;
        vector<int> location;

        // Create a partial match table to efficiently skip the matched prefixes
        // in the pattern

        void Construct_PartialMatchTable(string pattern){

            int sz = pattern.size();
            partial_match_table.resize(sz);
            int index = 0;
            int position = 1;   

            while(position < sz){
                if(pattern.at(position) == pattern.at(index)){
                   partial_match_table[position] = index + 1;
                   position++; index++;
                } else {
                   if(index != 0){ 
                      index = partial_match_table[index-1];
                   } else {
                      partial_match_table[position] = 0;
                      position++;
                   }
                }
            }

            Print_PartialMatchTable(pattern);
        }

        void Print_PartialMatchTable(string pattern){
            cout << "Partial Match Table" << endl;
            cout << "Pattern: ";
            for(auto& i : pattern)
                cout << i << " "; 
            cout << endl;
            cout << "Index  : ";
            for(int i=0; i<pattern.size(); i++)
                cout << i << " ";
            cout << endl;
            cout << "Value  : ";
            for(auto& i : partial_match_table)
                cout << i << " ";
            cout << endl;
        }
 
    public:
        void Search_Pattern(string pattern, string text){

            cout << "Searching pattern:[" << pattern << "] inside text:[" << text << "]" << endl;
            // Before searching the pattern, construct the partial match table
            Construct_PartialMatchTable(pattern);

            int t = 0; // t tracks the location in text
            int p = 0; // p tracks the location in pattern

            while(t < text.size()){

                if(pattern.at(p) == text.at(t)){
                    p++, t++;
                } else {
                    if(p != 0){ 
                       p = partial_match_table[p-1];
                    } else {
                       t++;
                    }
                }

                if(p == pattern.size()){
                    location.push_back(t-p);
                    // If in the already matched text, there is a suffix that matches
                    // the prefix in the pattern (example: "al" in Itsalgoal); for searching further, start from 
                    // the beginning of the suffix in the text and not from the next location after the matched text.
                    // Example: For searching pattern:"algoal" inside  text:"Itsalgoalgoalgoal", first match occurs at 3..8, 
                    // for searching further instead of starting with the text postion 9, start at location 7.
                    // Itsalgoalgoalgoal
                    // 0  3   7   11 
                    // ^  ^   ^   ^
                    // |  |   |   | 
                    // => Pattern occours at [3 7 11]
                    t = t-partial_match_table[p-1];
                    p = 0;
                }
            }
            cout << "Pattern found at locations: ";  
            for(auto &i : location)
                cout << i << " " ;
            cout << endl << endl;
            // Clear the locations and the partial match table
            location.clear();
            partial_match_table.clear();
        }
};

int main(){

    string pattern("algoal");
    string text("Itsalgoalgoalgoal");

    KMP k;
    k.Search_Pattern(pattern, text);

    pattern = "kaykayak"; 
    text = "kaykaykaykayak"; 
    k.Search_Pattern(pattern, text);

    pattern = "abc"; 
    text = "abcabcabcabc"; 
    k.Search_Pattern(pattern, text);

    pattern = "kayak"; 
    text = "Thisiskayakayakkayaxkayak"; 
    k.Search_Pattern(pattern, text);

    pattern = "abababa"; 
    text = "abababdababababababc"; 
    k.Search_Pattern(pattern, text);

    return 0;
}

Output

Searching pattern:[algoal] inside text:[Itsalgoalgoalgoal]
Partial Match Table
Pattern: a l g o a l 
Index  : 0 1 2 3 4 5 
Value  : 0 0 0 0 1 2 
Pattern found at locations: 3 7 11 

Searching pattern:[kaykayak] inside text:[kaykaykaykayak]
Partial Match Table
Pattern: k a y k a y a k 
Index  : 0 1 2 3 4 5 6 7 
Value  : 0 0 0 1 2 3 0 1 
Pattern found at locations: 6

Searching pattern:[abc] inside text:[abcabcabcabc]
Partial Match Table
Pattern: a b c 
Index  : 0 1 2 
Value  : 0 0 0 
Pattern found at locations: 0 3 6 9 

Searching pattern:[kayak] inside text:[Thisiskayakayakkayaxkayak]
Partial Match Table
Pattern: k a y a k 
Index  : 0 1 2 3 4 
Value  : 0 0 0 0 1 
Pattern found at locations: 6 10 20 

Searching pattern:[abababa] inside text:[abababdababababababc]
Partial Match Table
Pattern: a b a b a b a 
Index  : 0 1 2 3 4 5 6 
Value  : 0 0 1 2 3 4 5 
Pattern found at locations: 7 9 11 

Reference:
https://www.youtube.com/watch?v=GTJr8OvyEVQ

Copyright © 2019-2020, Algotree.org.
All rights reserved.