Given a bi-partite graph with node weights for both type A and type B nodes as shown below:
I want to output an ordered list of type B nodes as defined by the following heuristic:
The figure below shows an example:
For this example, the output set will be: (Y, Z, X)
The naive process will be to simply walk through this algorithm but assuming the bi-partite graph is huge, I am looking for the most efficient way to find the output set. Note, I just need the ordered list of type B nodes as output without the intermediate calculated values (eg. 50, 15, 2)
This is a further refinement of the algo suggested by Dave in a comment. It minimizes the number of times a node value needs to be recalculated.
I have implemented this algorithm in C++ based on my PathFinder graph class. The code, running on a 1 million node graph with half a and half b nodes, each b node connected to two random a nodes, requires 1 second.
Here is the code
void cPathFinder::karup()
{
raven::set::cRunWatch aWatcher("karup");
std::cout << "karup on " << nodeCount() << " node graph\n";
std::vector<int> output;
// calculate initial values of B nodes
std::multimap<int, int> mapValueNode;
for (auto &b : nodes())
{
if (b.second.myName[0] != 'b')
continue;
int value = 0;
for (auto a : b.second.myLink)
{
value += node(a.first).myCost;
}
value *= b.second.myCost;
mapValueNode.insert(std::make_pair(value, b.first));
}
// while not all B nodes output
while (mapValueNode.size())
{
raven::set::cRunWatch aWatcher("select");
// select node with highest value
auto remove_it = --mapValueNode.end();
int remove = remove_it->second;
if (!remove_it->first)
{
/** all remaining nodes have zero value
* all the links from B nodes to A nodes have been removed
* output remaining nodes in order of decreasing node weight
*/
raven::set::cRunWatch aWatcher("Bunlinked");
std::multimap<int, int> mapNodeValueNode;
for (auto &nv : mapValueNode)
{
mapNodeValueNode.insert(
std::make_pair(
node(nv.second).myCost,
nv.second ));
}
for( auto& nv : mapNodeValueNode )
{
myPath.push_back( nv.second );
}
break;
}
bool OK = true;
int value = 0;
{
raven::set::cRunWatch aWatcher("check");
// check that no nodes providing value have been removed
// std::cout << "checking neighbors of " << name(remove) << "\n";
auto &vl = node(remove).myLink;
for (auto it = vl.begin(); it != vl.end();)
{
if (!myG.count(it->first))
{
// A neighbour has been removed
OK = false;
it = vl.erase(it);
}
else
{
// A neighbour remains
value += node(it->first).myCost;
it++;
}
}
}
if (OK)
{
raven::set::cRunWatch aWatcher("store");
// we have a node whose values is highest and valid
// store result
output.push_back(remove);
// remove neighbour A nodes
auto &ls = node(remove).myLink;
for (auto &l : ls)
{
myG.erase(l.first);
}
// remove the B node
// std::cout << "remove " << name( remove ) << "\n";
mapValueNode.erase(remove_it);
}
else
{
// replace old value with new
raven::set::cRunWatch aWatcher("replace");
value *= node(remove).myCost;
mapValueNode.erase(remove_it);
mapValueNode.insert(std::make_pair(value, remove));
}
}
}
Here are the timing results
karup on 1000000 node graph
raven::set::cRunWatch code timing profile
Calls Mean (secs) Total Scope
1 1.16767 1.16767 karup
581457 1.37921e-06 0.801951 select
581456 4.71585e-07 0.274206 check
564546 3.04042e-07 0.171646 replace
1 0.153269 0.153269 Bunlinked
16910 8.10422e-06 0.137042 store