To count all substrings in a given string, we do the following :
The number of nodes in the Trie is the same as the number of unique substrings in a given string.
Example: Consider the string [ ababa ].
The number of unique substrings for string [ ababa ] : [ a ], [ ab ], [ aba ], [ abab ], [ ababa ], [ b ], [ ba ], [ bab ] and [ baba ]. Thus we have 9 unqiue substrings.
We insert the following substrings in the Trie : [ ababc ], [ baba ], [ aba ], [ ba ] & [ a ]. The nodes marked in yellow indicate the end of the string.
We see that the number of nodes (excluding root) are 9 i.e same as the number of unique substrings.
Program for counting the number of distinct substrings in a given string
class TrieNode:
"""A trie node"""
def __init__(self, char):
# Character stored in this node
self.char = char
# A flag that marks if the word ends on this particular node.
self.end_of_word = False
# A dictionary of child nodes where the keys are the characters (letters)
# and values are the nodes
self.children = {}
class Trie:
"""The trie structure"""
def __init__(self):
"""
The root node of a trie is empty and does not store any character.
"""
self.root = TrieNode("")
self.node_count = 0
def Insert (self, string):
"""Insert a string i.e a word into the trie"""
node = self.root
# Check each character in the string
# If none of the children of the current node contains the character,
# create a new child of the current node for storing the character.
for char in string:
if char in node.children:
node = node.children[char]
else:
# As the character is not found, create a new trie node
new_node = TrieNode(char)
node.children[char] = new_node
node = new_node
self.node_count += 1
# Mark the end of a word
node.end_of_word = True
def main():
t = Trie()
string = input("Enter string : ")
# For a given string abc
# Insert abc
# Insert bc
# Insert c
for beg in range (0, len(string)):
t.Insert (string[beg:])
print ("Unique substring count : " + str(t.node_count))
if __name__ == "__main__" :
main()
Output
Enter string : abc
Unique substring count : 6
Enter string : goalgo
Unique substring count : 18
Enter string : a
Unique substring count : 1
Enter string : xyzzy
Unique substring count : 13
#include<iostream>
using namespace std;
class TrieNode {
public:
TrieNode * children[129];
TrieNode() {
for (int i=0; i<=128; i++) {
children[i] = NULL;
}
}
};
class Trie {
public:
TrieNode root;
int node_count;
Trie () {
// Track the number of nodes in the trie.
node_count = 0;
}
// Insert a string into the trie.
void Insert(string str) {
TrieNode * parent = &root;
for (size_t i=0; i<str.size(); i++) {
int N = str.at(i);
if (parent->children[N] == NULL) {
parent->children[N] = new TrieNode;
node_count++;
}
parent = parent->children[N];
}
}
};
int main() {
int tests;
cout << "Enter number of strings : ";
cin >> tests;
while (tests--) {
Trie t;
string str;
cin >> str;
/* Insert substrings in the trie like below
Example for string abc,
Insert : abc
Insert : bc
Insert : c
*/
for (size_t i=0; i<str.size(); i++) {
t.Insert(str.substr(i, str.size()-i));
}
cout << "Unique substring count : " << t.node_count << endl;
}
return 0;
}
Output
Enter number of strings : 5
abc
Unique substring count : 6
ab
Unique substring count : 3
a
Unique substring count : 1
ababa
Unique substring count : 9
goalgo
Unique substring count : 18
import java.util.Scanner;
import java.util.HashMap;
class TrieNode {
public char letter;
public boolean end_of_word; // A flag that marks if the word ends on this particular node.
public HashMap <Character, TrieNode> children;
TrieNode (char c) {
// Character stored in this node
letter = c;
end_of_word = false;
// A dictionary of child nodes where the keys are the characters (letters)
// and values are the nodes
children = new HashMap<Character, TrieNode>();
}
};
class Trie {
TrieNode root;
public int node_count;
Trie () {
// The root node of a trie is empty and does not store any character.
root = new TrieNode('0');
node_count = 0;
}
// Insert a string i.e a word into the trie
public void Insert (String word) {
TrieNode node = root;
// Check each character in the string
// If none of the children of the current node contains the character,
// create a new child of the current node for storing the character.
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
if (node.children.containsKey(c)) {
node = node.children.get(c);
} else {
// As the character is not found, create a new trie node
TrieNode new_node = new TrieNode(c);
node.children.put(c, new_node);
node = new_node;
node_count++;
}
}
// Mark the end of a word
node.end_of_word = true;
}
public static void main (String args[]) {
Trie t = new Trie();
// Search for a prefix in the Trie
Scanner scanner_obj = new Scanner(System.in); // Create a Scanner object
System.out.print("Enter string: ");
String str = scanner_obj.nextLine();
/* For a given string abc
# Insert abc
# Insert bc
# Insert c */
for (int beg = 0; beg < str.length(); beg++) {
int end = str.length();
t.Insert (str.substring(beg, end));
}
System.out.println("Unique substring count : " + t.node_count);
}
}
Output
Enter string: abc
Unique substring count : 6
Enter string: goalgo
Unique substring count : 18
Enter string: ababa
Unique substring count : 9
Enter string: a
Unique substring count : 1