The idea behind counting all substrings in a given string is as below
The number of nodes in the Trie is the same as the number of unique substrings in a given string.
Consider the below example of finding all substrings of a given string [ ababa ].
The number of unique substrings for string [ ababa ] : [ a ], [ ab ], [ aba ], [ abab ], [ ababa ], [ b ], [ ba ], [ bab ] and [ baba ]. Thus we have 9 unqiue substrings.
We insert the following substrings in the Trie : [ ababc ], [ baba ], [ aba ], [ ba ] & [ a ]. The nodes marked in yellow indicate the end of the string. We see that the number of nodes (excluding root) are 9 i.e same as the number of unique substrings.
C++ : Counting all substrings in a given string
#include<iostream>
using namespace std;
class TrieNode {
public:
TrieNode * children[129];
TrieNode() {
for (int i=0; i<=128; i++) {
children[i] = NULL;
}
}
};
class Trie {
public:
TrieNode root;
int node_count;
Trie () {
// Track the number of nodes in the trie.
node_count = 0;
}
// Insert a string into the trie.
void Insert(string str) {
TrieNode * parent = &root;
for (size_t i=0; i<str.size(); i++) {
int N = str.at(i);
if (parent->children[N] == NULL) {
parent->children[N] = new TrieNode;
node_count++;
}
parent = parent->children[N];
}
}
};
int main() {
int tests;
cout << "Enter number of strings : ";
cin >> tests;
while (tests--) {
Trie t;
string str;
cin >> str;
/* Insert substrings in the trie like below
Example for string abc,
Insert : abc
Insert : bc
Insert : c
*/
for (size_t i=0; i<str.size(); i++) {
t.Insert(str.substr(i, str.size()-i));
}
cout << "Unique substring count : " << t.node_count << endl;
}
return 0;
}
Output
Enter number of strings : 5
abc
Unique substring count : 6
ab
Unique substring count : 3
a
Unique substring count : 1
ababa
Unique substring count : 9
goalgo
Unique substring count : 18