# Counting Distinct Substrings In A Given String Using Trie

To count all substrings in a given string, we do the following :

• Construct a Trie with all the substrings that are the suffixes of a given string.
• Keep a count of nodes that are being created in the Trie while inserting the substrings (suffixes).

The number of nodes in the Trie is the same as the number of unique substrings in a given string.

Example: Consider the string [ ababa ].
The number of unique substrings for string [ ababa ] : [ a ], [ ab ], [ aba ], [ abab ], [ ababa ], [ b ], [ ba ], [ bab ] and [ baba ]. Thus we have 9 unqiue substrings.

We insert the following substrings in the Trie : [ ababc ], [ baba ], [ aba ], [ ba ] & [ a ]. The nodes marked in yellow indicate the end of the string.
We see that the number of nodes (excluding root) are 9 i.e same as the number of unique substrings.

Program for counting the number of distinct substrings in a given string

``````class TrieNode:
"""A trie node"""

def __init__(self, char):

# Character stored in this node
self.char = char

# A flag that marks if the word ends on this particular node.
self.end_of_word = False

# A dictionary of child nodes where the keys are the characters (letters)
# and values are the nodes
self.children = {}

class Trie:
"""The trie structure"""

def __init__(self):
"""
The root node of a trie is empty and does not store any character.
"""
self.root = TrieNode("")
self.node_count = 0

def Insert (self, string):
"""Insert a string i.e a word into the trie"""
node = self.root

# Check each character in the string
# If none of the children of the current node contains the character,
# create a new child of the current node for storing the character.
for char in string:
if char in node.children:
node = node.children[char]
else:
# As the character is not found, create a new trie node
new_node = TrieNode(char)
node.children[char] = new_node
node = new_node
self.node_count += 1

# Mark the end of a word
node.end_of_word = True

def main():

t = Trie()

string = input("Enter string : ")
# For a given string abc
# Insert abc
# Insert bc
# Insert c
for beg in range (0, len(string)):
t.Insert (string[beg:])

print ("Unique substring count : " + str(t.node_count))

if __name__ == "__main__" :
main()
``````

Output

``````Enter string : abc
Unique substring count : 6

Enter string : goalgo
Unique substring count : 18

Enter string : a
Unique substring count : 1

Enter string : xyzzy
Unique substring count : 13
``````
``````#include<iostream>
using namespace std;

class TrieNode {

public:

TrieNode * children[129];

TrieNode() {
for (int i=0; i<=128; i++) {
children[i] = NULL;
}
}
};

class Trie {

public:

TrieNode root;
int node_count;

Trie () {
// Track the number of nodes in the trie.
node_count = 0;
}

// Insert a string into the trie.
void Insert(string str) {

TrieNode * parent = &root;

for (size_t i=0; i<str.size(); i++) {
int N = str.at(i);
if (parent->children[N] == NULL) {
parent->children[N] = new TrieNode;
node_count++;
}
parent = parent->children[N];
}
}
};

int main() {

int tests;
cout << "Enter number of strings : ";
cin >> tests;

while (tests--) {

Trie t;
string str;
cin >> str;
/* Insert substrings in the trie like below
Example for string abc,
Insert : abc
Insert : bc
Insert : c
*/
for (size_t i=0; i<str.size(); i++) {
t.Insert(str.substr(i, str.size()-i));
}
cout << "Unique substring count : " << t.node_count << endl;
}
return 0;
}
``````

Output

``````Enter number of strings : 5

abc
Unique substring count : 6

ab
Unique substring count : 3

a
Unique substring count : 1

ababa
Unique substring count : 9

goalgo
Unique substring count : 18
``````
``````import java.util.Scanner;
import java.util.HashMap;

class TrieNode {

public char letter;
public boolean end_of_word; // A flag that marks if the word ends on this particular node.
public HashMap <Character, TrieNode> children;

TrieNode (char c) {
// Character stored in this node
letter = c;
end_of_word = false;

// A dictionary of child nodes where the keys are the characters (letters)
// and values are the nodes
children = new HashMap<Character, TrieNode>();
}

};

class Trie {

TrieNode root;
public int node_count;

Trie () {
// The root node of a trie is empty and does not store any character.
root = new TrieNode('0');
node_count = 0;
}

// Insert a string i.e a word into the trie
public void Insert (String word) {

TrieNode node = root;

// Check each character in the string
// If none of the children of the current node contains the character,
// create a new child of the current node for storing the character.
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
if (node.children.containsKey(c)) {
node = node.children.get(c);
} else {
// As the character is not found, create a new trie node
TrieNode new_node = new TrieNode(c);
node.children.put(c, new_node);
node = new_node;
node_count++;
}
}
// Mark the end of a word
node.end_of_word = true;
}

public static void main (String args[]) {

Trie t = new Trie();

// Search for a prefix in the Trie
Scanner scanner_obj = new Scanner(System.in); // Create a Scanner object
System.out.print("Enter string: ");

String str = scanner_obj.nextLine();

/* For a given string abc
# Insert abc
# Insert bc
# Insert c */

for (int beg = 0; beg < str.length(); beg++) {
int end = str.length();
t.Insert (str.substring(beg, end));
}
System.out.println("Unique substring count : " +  t.node_count);
}
}
``````

Output

``````Enter string: abc
Unique substring count : 6

Enter string: goalgo
Unique substring count : 18

Enter string: ababa
Unique substring count : 9

Enter string: a
Unique substring count : 1
``````