Given : A string of characters, some of which could be repeating.
Objective : To find the longest substring within the given string that does not contain any repeating characters.
Example : In the given string [ a b b b a c ], the longest substring without repeating characters is [ b a c ].
Idea : To solve this problem a simple sliding window protocol is used where the window holds non-repeating characters.
- The window expands to the right when a non-repeating character is found.
- The window contracts from the left to exclude the repeating character.
Sliding Window
Data structures used : A map or a set ( C++ ) could be used to keep a record of the characters in the sliding window. Whenever the End pointer reads a character, this map / set is read to check if the character is found in the map ( repeating ) or not found ( non-repeating ).
Example : Consider the string : [ a b b a ]
Start | End | Contents of Map / Set |
Max length (ending at End) ( End - Start + 1 ) |
Max String | Operation |
---|---|---|---|---|---|
[ a b b a ] 0 |
[ a b b a ] 0 |
( ) | New character a found. Insert a into the map. 0 - 0 + 1 = 1 |
[ a ] | -Move End pointer. |
[ a b b a ] 0 |
[ a b b a ] 1 |
( a ) | New character b found. Insert b into the map. 1 - 0 + 1 = 2 |
[ a b ] | -Move End pointer. |
[ a b b a ] 0 |
[ a b b a ] 2 |
( a b ) | Repeated character b found in the map. No new max length calculated. |
No new max string calculated. |
-Move start pointer beyond the repeated character b. -Remove all the characters till the repeated character b. - i.e Remove ( a, b ). -Insert the current character b. Move End pointer. |
[ a b b a ] 2 |
[ a b b a ] 3 |
( b ) | New character a found. Insert a into the map. 3 - 2 + 1 = 2 |
[ b a ] not > [ a b ] | -Move End pointer. |
Longest max string found : [ a b ]
Finding the longest substring with non-repeated characters
# Function for finding the longest substring without repeating characters.
def LongestSubstring (string : str) :
table = {}
start, end, max_length, result = 0, 0, 0, ""
while (end < len(string)) :
if string[end] not in table : # New character found. Expand the window.
if (max_length < ( end - start + 1)) :
max_length = end - start + 1
result = string [start : end + 1]
else :
# Repeating character found. Contract the window.
# Move the start pointer till it gets beyond the repeated character,
# also remove the entries of all the charcters found till the repated character.
while ( string[start] != string[end] ) :
table.pop(string[start], None)
start += 1
table.pop(string[start], None) # erase the repeated character.
start += 1
table[string[end]] = end # Store the found characters.
end += 1 # expand the window by moving the end pointer.
print ("Length : " + str(max_length) + " Substr : " + result)
def main () :
strings = [ "abcabcbb", "brazebra", "dickdicky", "smallcatcattysmall", "", "picochu", "oscarslap" ]
for string in strings :
print ("\nString : " + string)
LongestSubstring (string)
if __name__ == "__main__" :
main()
Output
String : abcabcbb
Length : 3 Substr : abc
String : brazebra
Length : 5 Substr : braze
String : dickdicky
Length : 5 Substr : dicky
String : smallcatcattysmall
Length : 6 Substr : tysmal
String :
Length : 0 Substr :
String : picochu
Length : 4 Substr : pico
String : oscarslap
Length : 5 Substr : oscar
#include<iostream>
#include<string>
#include<map>
#include<vector>
#include<set>
using namespace std;
// Function for finding the longest substring without repeated characters.
void LongestSubstring (string str) {
// map<char,int> table;
// Set performs better than a map, since we don't really use the index position of the
// characters.
set<char> table;
int start = 0, end = 0, max_length = 0;
string result;
while (end < str.size()) {
if (table.find(str[end]) == table.end()) { // New character found. Expand the window.
if (max_length < ( end - start + 1)) {
max_length = end - start + 1;
result = str.substr(start, end - start + 1);
}
} else {
// Repeating character found. Contract the window.
// Move the start pointer till it gets beyond the repeated character,
// also remove the entries of all the charcters found till the repated character.
while (str[start] != str[end]) {
table.erase(str[start]);
start++;
}
table.erase(str[start]); // erase the repeated character.
start++;
}
//table[s[end]] = end; // for map
table.insert(str[end]); // Store the found characters.
end++; // expand the window by moving the end pointer.
}
cout << "Length : " << max_length << " Substr : " << result << endl;
}
int main () {
vector<string> strings = { "abcabcbb", "abba", "aaaa", "abcdeabcdef", "", "pwwkew", "abbbac" };
for (const auto & str : strings) {
cout << "\nString : " << str << endl;
LongestSubstring(str);
}
return 0;
}
Output
String : abcabcbb
Length : 3 Substr : abc
String : abba
Length : 2 Substr : ab
String : aaaa
Length : 1 Substr : a
String : abcdeabcdef
Length : 6 Substr : abcdef
String :
Length : 0 Substr :
String : pwwkew
Length : 3 Substr : wke
String : abbbac
Length : 3 Substr : bac