I've got a small set of sequences of unique values that I want to combine into a single super-sequence where the relative order of each value is maintained, to the extent possible. For instance (quotes around strings ignored for simplicity):
list1 = [Mary, Bob, Sue, Roger]
list2 = [Bob, Alice, Sue, Dave]
list3 = [Mary, Bob, Larry, Sue, Roger]
superSequence = [Mary, Bob, Alice, Larry, Sue, Roger, Dave]
The goal is to generate an object from which the original lists can be recreated, such as:
obj = {
Mary: [1, 3],
Bob: [1, 2, 3],
Alice: [2],
Larry: [3],
Sue: [1, 2, 3],
Roger: [1, 3],
Dave: [2]
}
Ignoring for the moment that object key order isn't guaranteed in all cases, one could iterate over these keys and use the indices in each associated array to regenerate the original lists. Because the super-sequence is represented as a JS object, the values in it must be unique.
Clearly, not every set of sequences can be combined in this way:
list1 = [Mary, Bob, Sue]
list2 = [Bob, Mary, Sue]
superSequence = [Mary, Bob, Sue] OR
superSequence = [Bob, Mary, Sue]
Picking one or the other should be fine in most cases, with bonus points for an algorithm that can highlight where the order was indeterminate.
In researching this, I seem to have stumbled upon an NP-hard problem that's well-studied in compression, bioinformatics, and other fields. There's something called a majority merge algorithm that seems to be a reasonably decent approximation for this problem, but my ability to translate academic paper pseudo-code into something usable has atrophied over the years. So I was hoping to find an actual implementation in JS, or C, or Python or something that doesn't rely on magic libraries.
So after some further thought, I realized there is a much simpler answer to your question. Since we can assume the same name does not occur multiple times in a given list this will work:
var simpleCombine = function (arr1, arr2) {
"use strict";
arr1 = JSON.parse(JSON.stringify(arr1));
arr2 = JSON.parse(JSON.stringify(arr2));
var i, j, arrOut = [];
while (arr1.length) {
var val = arr1.shift(), found = false;
for (j = 0; j < arr2.length; j += 1) {
if (val === arr2[j]) {
//If we wound an overlap then put everything to the left of it
// in arr2 into the final array
found = true;
var pos = arrOut.length;
arrOut.push(val);
var newVals = arr2.splice(0, j);
while (newVals.length) {
arrOut.splice(pos, 0, newVals.pop());
}
arr2.shift(); //get rid of dup
break;
}
}
if (!found) {
//No overlap found, just add it to the out array
arrOut.push(val);
}
}
//anything left in arr2? Add it to out array
arrOut = arrOut.concat(arr2);
//check for duplicates based on user requirement of each item in the
// sequence only occurs once.
for (i = 0; i < arrOut.length; i += 1) {
for (j = i + 1; j < arrOut.length; j += 1) {
if (arrOut[i] === arrOut[j]) {
//If we find an overlap warn the user, and remove the dup.
console.warn('Even with strict ordering, multiple solutions are possible');
arrOut.splice(i,1);
i -= 1;
break;
}
}
}
return arrOut;
};
var findMultipleSCS = function (arr) {
var first = arr.shift();
while (arr.length) {
first = simpleCombine(first, arr.shift());
}
return first;
};
list1 = ["Mary", "Bob", "Sue", "Roger"];
list2 = ["Bob", "Alice", "Sue", "Dave"];
list3 = ["Mary", "Bob", "Larry", "Sue", "Roger"];
console.log(findMultipleSCS([list1, list2, list3]));
My original answer is below because it is more accurate for lists that may contain the same name multiple times.
//This code works for things where the important thing is that order is
//maintained, not that each entry only occurs once
var findSCS = (function () {
'use strict';
var lcsLen, lcsBack, combine;
lcsLen = function(arr1, arr2) {
//This function moves through the arrays developing the
// length of the longest possible sequence of identical order.
var dists = [[0]], i, j;
for (i = 0; i < arr1.length; i += 1) {
dists[i + 1] = [];
for (j = 0; j < arr2.length; j += 1) {
dists[i + 1][0] = 0; // initialize 0'th column/row with 0
dists[0][j + 1] = 0; // this could be done in a separate loop
dists[i + 1][j + 1] = dists[i + 1][j + 1] || 0; // initialize i,j
if (arr1[i] === arr2[j]) {
//if this condition is met then we have a longer overlap
dists[i + 1][j + 1] = dists[i][j] + 1;
} else {
//if not take the max length so far
dists[i + 1][j + 1] = Math.max(dists[i][j + 1], dists[i + 1][j]);
}
}
}
return dists;
};
lcsBack = function (dists, x, y, i, j) {
//this recursive function takes the longest possible array and build
// the actual list starting from the bottom right of the matrix
// created by lcsLen
if (!i || !j) {
return [];
} else if(x[i - 1] === y[j - 1]) {
return lcsBack(dists, x, y, i - 1, j - 1).concat([x[i - 1]]);
} else {
if (dists[i][j-1] > dists[i-1][j]) {
return lcsBack(dists, x, y, i, j-1);
} else {
return lcsBack(dists,x,y,i-1,j);
}
}
};
combine = function (lcs, arr1, arr2) {
//this take the lcs and fills in the non-overlapping part of
// the original lists, creating the scs
var out = JSON.parse(JSON.stringify(arr1));
var i, testing = 0, outPos = 0, positions = [0];
for (i = 0; i < arr1.length && testing < lcs.length; i += 1) {
if (lcs[testing] === arr1[i]) {
positions[testing + 1] = i;
testing += 1;
}
}
testing = 0; outPos = 0;
for (i = 0; i < arr2.length; i += 1) {
if (lcs[testing] === undefined || lcs[testing] !== arr2[i]) {
out.splice(positions[testing] + outPos, 0, arr2[i]);
outPos += 1;
} else {
testing += 1;
outPos += 1;
}
}
return out;
};
return function (arr1, arr2) {
//get the length matrix to determine the maximum sequence overlap
var lcsLenMat = lcsLen(arr1,arr2);
//Take that distance matrix and build the actual sequence (recursively)
var lcs = lcsBack(lcsLenMat, arr1, arr2, arr1.length, arr2.length);
//Build the SCS
var tempScs = combine(lcs, arr1, arr2);
//This code will allow for duplicates, and in your second example
// It will generate a list with two bobs, which is arguably more
// correct for general purpose use.
return tempScs;
}());
var findMultipleSCS = function (arr) {
var first = arr.shift();
while (arr.length) {
first = findSCS(first, arr.shift());
}
return first;
};
list1 = ["Mary", "Bob", "Sue", "Roger"];
list2 = ["Bob", "Alice", "Sue", "Dave"];
list3 = ["Mary", "Bob", "Larry", "Sue", "Roger"];
console.log(findMultipleSCS([list1, list2, list3]));
Most of these ideas where taken from https://en.wikipedia.org/wiki/Longest_common_subsequence_problem and https://en.wikipedia.org/wiki/Shortest_common_supersequence_problem
The order you put these in will determine which non unique solution you get. For instance list1, list2, list3 give you your first answer, however list2, list3, list1 gives you the also correct:
["Mary", "Bob", "Larry", "Alice", "Sue", "Dave", "Roger"]
If you want to maintain the priority order than list1, list2, list3 does have a unique solution and this will alert you of a duplicate possibility with a console.warn looking for duplicates.