I used to calculate longest common Substring using dynamic programming O(m * n), suffix tree O(m + n), suffix array O(nlog^2 n) according to my need. Recently I have learnt Suffix Automaton which performs in O(n) which is very impressive.
I can write the code by which I can calculate the length of longest common substring easily. For example:
Input:
abcdef
xyzabc
Output:
3
And this is the code:
#include <bits/stdc++.h>
using namespace std;
const int maxN = 250500;
const int maxState = maxN << 1;
struct State {
State *go[26], *suffix;
int depth, id;
long long cnt;
};
State pool[maxState], *point, *root, *sink;
int size;
State *newState(int dep) {
point->id = size++;
point->depth = dep;
return point++;
}
void init() {
point = pool;
size = 0;
root = sink = newState(0);
}
void insert(int a) {
State *p = newState(sink->depth+1);
State *cur = sink, *sufState;
while (cur && !cur->go[a]) {
cur->go[a] = p;
cur = cur->suffix;
}
if (!cur)
sufState = root;
else {
State *q = cur->go[a];
if (q->depth == cur->depth + 1)
sufState = q;
else {
State *r = newState(cur->depth+1);
memcpy(r->go, q->go, sizeof(q->go));
r->suffix = q->suffix;
q->suffix = r;
sufState = r;
while (cur && cur->go[a] == q) {
cur->go[a] = r;
cur = cur->suffix;
}
}
}
p->suffix = sufState;
sink = p;
}
int work(char buf[]) {
//printf("%s", buf);
int len = strlen(buf);
int tmp = 0, ans = 0;
State *cur = root;
for (int i = 0; i < len; i++) {
if (cur->go[buf[i]-'a']) {
tmp++;
cur = cur->go[buf[i]-'a'];
} else {
while (cur && !cur->go[buf[i]-'a'])
cur = cur->suffix;
if (!cur) {
cur = root;
tmp = 0;
} else {
tmp = cur->depth + 1;
cur = cur->go[buf[i]-'a'];
}
}
ans = max(ans, tmp);
}
return ans;
}
char ch[maxN];
int main() {
scanf("%s", ch);
init();
int len = strlen(ch);
for (int i = 0; i < len; i++)
insert(ch[i]-'a');
scanf("%s", ch);
printf("%d\n", work(ch));
return 0;
}
But now I need to print the longest Common Substring itself, not the length. But I can't be able to modify my code :( How this code can be modified to print the longest common Substring?
When you are at this line:
ans = max(ans, tmp);
The starting position in buf
that achieved depth tmp
was i - tmp + 1
. Now you know the positions of all longest common substrings in the second string. Just pick any and output the result.