Code
/*
Ukkonen's Suffix Tree Construction is implemented in C++
We create a generalised suffix tree for a given string Str and reverse Rev,
and then find Find the longest palindromic substring of the given string Str
*/
#include <bits/stdc++.h>
#define MAX_CHAR 256
using namespace std;
struct SuffixTreeNode {
struct SuffixTreeNode *children[MAX_CHAR];
//pointer to other node via suffix link
struct SuffixTreeNode *suffixLink;
/*
The (start, end) interval indicates the edge that connects the node to its parent node.
The Each edge will connect two nodes, one parent and one child, and the child node will record the (start, end)
interval of each edge. If two nodes A and B are joined by an edge with the indices (5, 8) stored in node B,
then the indices (5, 8) will be stored in node B.
*/
int start;
int *end;
// It saves the suffix index for the path from root to leaf for leaf nodes.
int suffixIndex;
// To keep track of the indices of children suffixes in a string.
unordered_set<int> *forward_Indices;
// In a reversed string, save the indices of children suffixes.
unordered_set<int> *reverse_Indices;
};
typedef struct SuffixTreeNode Node;
char text[100]; //Input string
Node *root = NULL; //Pointer to root node
/*
lastNewNode will point to a newly generated internal node that is awaiting the setting
of its suffix link, which may receive a new suffix link (other than root) in the following
phase extension. When the previous newly generated internal node (if any) has its suffix
link reset to a new internal node created in the following extension of the same phase,
lastNewNode will be set to NULL.
*/
Node *lastNewNode = NULL;
Node *active_Node = NULL;
// activeEdge is represented as a character index in the input string (not the character itself)
int activeEdge = -1;
int activeLength = 0;
// remainingSuffixCount indicates how many suffixes in the tree have yet to be added.
int remainingSuffixCount = 0;
int leafEnd = -1;
int *rootEnd = NULL;
int *splitEnd = NULL;
//input string length
int size = -1;
//1st string size
int size1 = 0;
// In a reversed string, the index of a suffix
int reverseIndex;
unordered_set<int>::iterator forwardIndex;
Node *newNode(int start, int *end)
{
Node *node =(Node*) malloc(sizeof(Node));
int i;
for (i = 0; i < MAX_CHAR; i++)
node->children[i] = NULL;
/*
The suffixLink property will be set to NULL for the root node. In the current extension,
the suffixLink will be set to root by default for internal nodes, however this may change in
the future.
*/
node->suffixLink = root;
node->start = start;
node->end = end;
/*
By default, suffixIndex is set to -1, and the actual suffix index is set afterwards for
leaves at the end of all phases.
*/
node->suffixIndex = -1;
node->forward_Indices = new unordered_set<int>;
node->reverse_Indices = new unordered_set<int>;
return node;
}
int edgeLength(Node *n) {
if(n == root)
return 0;
return *(n->end) - (n->start) + 1;
}
int walkDown(Node *currNode)
{
/*
Using the Skip/Count Trick, alter activePoint for a walk down (APCFWD) (Trick 1). Set next internal node as
active_Node and change activeEdge and activeLength to represent same activePoint if activeLength is higher
than current edge length.
*/
if (activeLength >= edgeLength(currNode))
{
activeEdge += edgeLength(currNode);
activeLength -= edgeLength(currNode);
active_Node = currNode;
return 1;
}
return 0;
}
void extendSuffixTree(int pos)
{
// Extension Rule 1 is responsible for extending all of the leaves that have been formed so far in the tree.
leafEnd = pos;
/*
Increase the amount of time left. SuffixCount indicates that a new suffix has been added to the list of
suffixes that have not yet been added to the tree.
*/
remainingSuffixCount++;
/*
When entering a new phase, set lastNewNode to NULL to indicate that no internal node is waiting for its suffix
link to be reset in the current phase.
*/
lastNewNode = NULL;
//Add all suffixes (that haven't been added yet) to the tree one by one.
while(remainingSuffixCount > 0) {
if (activeLength == 0)
activeEdge = pos; //APCFALZ
// Starting with activeEdge from active_Node, there is no outgoing edge.
if (active_Node->children] == NULL)
{
//2nd Extending Rule (A new leaf edge gets created)
active_Node->children] =
newNode(pos, &leafEnd);
/*
In the above line, a new leaf edge is generated, starting from an existing node (the current active_Node),
and if any internal nodes are waiting for their suffix links to be reset, point the suffix link from that
last internal node to the current active_Node. Then set lastNewNode to NULL to indicate that there are no
more nodes waiting for the suffix link to be reset.
*/
if (lastNewNode != NULL)
{
lastNewNode->suffixLink = active_Node;
lastNewNode = NULL;
}
}
// From active_Node, there is an outgoing edge called activeEdge.
else
{
// Start with activeEdge to get the next node at the end of the edge.
Node *next = active_Node->children] ;
if (walkDown(next))//Do walkdown
{
//Start from next node (the new active_Node)
continue;
}
/*
Extension Rule 3 (the currently processed character is already on the edge)
*/
if (textarray[next->start + activeLength] == textarray[pos])
{
//APCFER3
activeLength++;
// STOP ALL PROCESSING IN THIS STAGE AND MOVE ON TO THE NEXT STAGE
break;
}
/*
We shall be here when activePoint is in the centre of the edge being traversed and the currently
processed character is not on the edge (we fall off the tree). In this scenario, we're going to
add a new internal node and a new leaf edge that comes out of it. This is Extension Rule 2, which
creates a new leaf edge and an internal node.
*/
splitEnd = (int*) malloc(sizeof(int));
*splitEnd = next->start + activeLength - 1;
//New internal node
Node *split = newNode(next->start, splitEnd);
active_Node->children] = split;
//A new leaf emerges from a new internal node.
split->children] = newNode(pos, &leafEnd);
next->start += activeLength;
split->children] = next;
/*
We've added a new internal node to the mix. If any internal nodes
generated in previous extensions of the same phase have yet to have
their suffix links reset, do so now.
*/
if (lastNewNode != NULL)
{
lastNewNode->suffixLink = split;
}
/*
Make the current newly constructed internal node wait for its suffix link
to be reset (which is currently pointing to root). If we come across any
other internal node (existing or freshly formed) in the next extension of
the same phase, suffixLink of this node will point to that internal node
when a new leaf edge is added (i.e. when Extension Rule 2 applies in any of
the next extensions of the same phase).
*/
lastNewNode = split;
}
// One suffix has been added to the tree; reduce the number of suffixes that need to be added.
remainingSuffixCount--;
if (active_Node == root && activeLength > 0) //APCFER2C1
{
activeLength--;
activeEdge = pos - remainingSuffixCount + 1;
}
else if (active_Node != root) //APCFER2C2
{
active_Node = active_Node->suffixLink;
}
}
}
void print(int i, int j)
{
int x;
for (x=i; x<=j && textarray[x] != '#'; x++)
cout<<textarray[x];
if(x<=j)
cout<<"#";
}
/*
As well as printing the suffix tree, you may also set the suffix index.
As a result, the tree will be printed in DFS format.
Each edge will be printed together with its suffix index.
*/
void setSuffixIndexByDFS(Node *n, int labelHeight)
{
if (n == NULL) return;
if (n->start != -1) //A non-root node
{
/*
From the parent to the current node, print the label on the edge.
Uncomment the line below to print the suffix tree: cout*(n->end);
*/
}
int leaf = 1;
int i;
for (i = 0; i < MAX_CHAR; i++)
{
if (n->children[i] != NULL)
{
//Current node is not a leaf as it has outgoing edges from it.
leaf = 0;
setSuffixIndexByDFS(n->children[i], labelHeight +
edgeLength(n->children[i]));
if(n != root)
{
//Add children's suffix indices in parent
n->forward_Indices->insert(
n->children[i]->forward_Indices->begin(),
n->children[i]->forward_Indices->end());
n->reverse_Indices->insert(
n->children[i]->reverse_Indices->begin(),
n->children[i]->reverse_Indices->end());
}
}
}
if (leaf == 1)
{
for(i= n->start; i<= *(n->end); i++)
{
if(textarray[i] == '#')
{
n->end = (int*) malloc(sizeof(int));
*(n->end) = i;
}
}
n->suffixIndex = size - labelHeight;
//Suffix of Given String
if(n->suffixIndex < size1)
n->forward_Indices->insert(n->suffixIndex);
//Suffix of Reversed String
else
n->reverse_Indices->insert(n->suffixIndex - size1);
//Uncomment the line below to print the suffix index cout<<n->suffixIndex;
}
}
void freeSuffixTreeByPostOrder(Node *n)
{
if (n == NULL)
return;
int i;
for (i = 0; i < MAX_CHAR; i++)
{
if (n->children[i] != NULL)
{
freeSuffixTreeByPostOrder(n->children[i]);
}
}
if (n->suffixIndex == -1)
free(n->end);
free(n);
}
/*
Create the suffix tree and output the edge labels, as well as suffixIndex. For leaf edges,
suffixIndex will be >= 0 while for non-leaf edges, suffixIndex will be -1.
*/
void buildSuffixTree()
{
size = strlen(textarray);
int i;
rootEnd = (int*) malloc(sizeof(int));
*rootEnd = - 1;
/*
Because it has no parent from which an edge comes to root, root is a
special node with start and end indices of -1.
*/
root = newNode(-1, rootEnd);
active_Node = root; //First active_Node will be root
for (i=0; i<size; i++)
extendSuffixTree(i);
int labelHeight = 0;
setSuffixIndexByDFS(root, labelHeight);
}
void doTraversal(Node *n, int labelHeight, int* maxHeight,
int* substringStartIndex)
{
if(n == NULL)
{
return;
}
int i=0;
int ret = -1;
if(n->suffixIndex < 0) //If it is internal node
{
for (i = 0; i < MAX_CHAR; i++)
{
if(n->children[i] != NULL)
{
doTraversal(n->children[i], labelHeight +
edgeLength(n->children[i]),
maxHeight, substringStartIndex);
if(*maxHeight < labelHeight
&& n->forward_Indices->size() > 0 &&
n->reverse_Indices->size() > 0)
{
for (forwardIndex=n->forward_Indices->begin();
forwardIndex!=n->forward_Indices->end();
++forwardIndex)
{
reverseIndex = (size1 - 2) -
(*forwardIndex + labelHeight - 1);
/*
If the reverse suffix originates from the SAME place in the
string, Keep an eye on the deepest node.
*/
if(n->reverse_Indices->find(reverseIndex) !=
n->reverse_Indices->end())
{
*maxHeight = labelHeight;
*substringStartIndex = *(n->end) -
labelHeight + 1;
break;
}
}
}
}
}
}
}
void getLongestPalindromicSubstring()
{
int maxHeight = 0;
int substringStartIndex = 0;
doTraversal(root, 0, &maxHeight, &substringStartIndex);
int k;
for (k=0; k<maxHeight; k++)
cout<<textarray[k + substringStartIndex];
if(k == 0)
cout<<"No palindromic substring";
else
cout<<"of length"<<maxHeight;
cout<<endl;
}
// driver program to test above functions
int main(int argc, char *argv[])
{
size1 = 9;
cout<<"Longest Palindromic Substring in cabbaabb is: ";
strcpy(textarray, "cabbaabb#bbaabbac$"); cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 17;
cout<<"Longest Palindromic Substring in forgeeksskeegfor is: ";
strcpy(textarray, "forgeeksskeegfor#rofgeeksskeegrof$"); cout<<end;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 6;
cout<<"Longest Palindromic Substring in abcde is: ";
strcpy(textarray, "abcde#edcba$"); cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 7;
cout<<"Longest Palindromic Substring in abcdae is: ";
strcpy(textarray, "abcdae#eadcba$");cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 6;
cout<<"Longest Palindromic Substring in abacd is: ";
strcpy(textarray, "abacd#dcaba$");cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 6;
cout<<"Longest Palindromic Substring in abcdc is: ";
strcpy(textarray, "abcdc#cdcba$"); cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 13;
cout<<"Longest Palindromic Substring in abacdfgdcaba is: ";
strcpy(textarray, "abacdfgdcaba#abacdgfdcaba$");cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 15;
cout<<"Longest Palindromic Substring in xyabacdfgdcaba is: ";
strcpy(textarray, "xyabacdfgdcaba#abacdgfdcabayx$"); cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 9;
cout<<"Longest Palindromic Substring in xababayz is: ";
strcpy(textarray, "xababayz#zyababax$"); cout<<endl;
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
size1 = 6;
cout<<"Longest Palindromic Substring in xabax is: "<<endl;
strcpy(textarray, "xabax#xabax$");
buildSuffixTree();
getLongestPalindromicSubstring();
//Free the dynamically allocated memory
freeSuffixTreeByPostOrder(root);
return 0;
}
You can also try this code with Online C++ Compiler
Run Code
Output:
Longest Palindromic Substring in cabbaabb is: bbaabb, of length: 6
Longest Palindromic Substring in forgeeksskeegfor is: geeksskeeg, of length: 10
Longest Palindromic Substring in abcde is: a, of length: 1
Longest Palindromic Substring in abcdae is: a, of length: 1
Longest Palindromic Substring in abacd is: aba, of length: 3
Longest Palindromic Substring in abcdc is: cdc, of length: 3
Longest Palindromic Substring in abacdfgdcaba is: aba, of length: 3
Longest Palindromic Substring in xyabacdfgdcaba is: aba, of length: 3
Longest Palindromic Substring in xababayz is: ababa, of length: 5
Longest Palindromic Substring in xabax is: xabax, of length: 5
You can also try this code with Online C++ Compiler
Run Code
Time Complexity
O(n)
The time complexity to find Longest Palindromic Substring using the suffix tree method will cost only linear time complexity and is the most optimized approach for finding the Longest Palindromic Substring. The first construction of the entire suffix tree will cost o(n) time complexity, where n is the size of the given string. So in the worst-case iteration on the entire tree may cost o(n) time complexity, so the total time complexity to solve the entire problem will be o(n) time.
Space Complexity
O(n)
The space complexity to find Longest Palindromic Substring using the suffix tree method will cost only linear space complexity and is the most optimized approach for finding the Longest Palindromic Substring. The entire construction of the suffix tree will cost o(n) space complexity, where n is the size of the given string.
FAQs
-
What is a palindrome string?
If the reverse of a string is the same as the real string, it is said to be a palindrome. For instance, while "abba" is a palindrome, "abbc" is not.
-
Can a single character be a palindrome?
Yes, because it reads the same forward and backward, a single character is considered a palindrome.
-
What are suffix trees?
A suffix tree (also known as a PAT tree or, in an earlier form, a position tree) is a compressed trie that contains all the suffixes of a given text as keys and text positions as values. Many key string operations can be implemented very quickly using suffix trees.
Key Takeaways
In this blog, we discussed the most optimized solution to find the Longest Palindromic Substring using suffix trees. Along with the solution, We also explored the time and space complexity of the solution.
Recommended Readings:
Recommended problems -
If you want to learn more about such hidden number theory algorithms and want to practice some quality questions which require you to excel your preparation s a notch higher, then you can visit our Guided Path for these number theory algorithms on Coding Ninjas Studio.To be more confident in data structures and algorithms, try out problems such as Longest Palindromic Subsequence and also check out our DS and Algo Course.