I was working on Datalog interpreter for my CS class and I ran into a strange problem where my Rule evaluations took too many passes to complete. After looking at my code I made two modifications in the following that fixed my evaluations to execute in the correct number of passes:
//original form
bool addedFacts = false;
for (X x: xs) {
addedFacts = addedFacts || set.insert(x).second;
}
//modified form
bool addedFacts = false;
for (X x: xs) {
if (set.insert(x).second) {
addedFacts = true;
}
}
To me these two code structures appear logically equivalent. Is there a reason why one executes correctly and one execute incorrectly/inefficiently? Here is a buildable example of the problem that was occurring:
#include <iostream>
#include <set>
#include <vector>
using std::set;
using std::vector;
using std::cout;
using std::endl;
const int CAP = 100;
class Rule {
public:
int factor;
Rule(int factor) {
this->factor = factor;
}
bool evaluateInefficient(set<int>& facts) {
vector<int> data;
bool addedFacts = false;
for (int fact : facts) {
data.push_back(fact);
}
for (int datum : data) {
int newFact = datum * factor;
if (newFact < CAP) {
addedFacts = addedFacts || facts.insert(newFact).second;
}
}
return addedFacts;
}
bool evaluate(set<int>& facts) {
vector<int> data;
bool addedFacts = false;
for (int fact : facts) {
data.push_back(fact);
}
for (int datum : data) {
int newFact = datum * factor;
if (newFact < CAP) {
if (facts.insert(newFact).second) {
addedFacts = true;
}
}
}
return addedFacts;
}
};
int doublyInefficient(vector<Rule>& rules) {
set<int> facts;
facts.insert(1);
bool addedFacts = true;
int passes = 0;
while (addedFacts) {
passes++;
addedFacts = false;
for (Rule rule : rules) {
addedFacts = addedFacts || rule.evaluateInefficient(facts);
}
}
return passes;
}
int singlyInefficient(vector<Rule>& rules) {
set<int> facts;
facts.insert(1);
bool addedFacts = true;
int passes = 0;
while (addedFacts) {
passes++;
addedFacts = false;
for (Rule rule : rules) {
addedFacts = addedFacts || rule.evaluate(facts);
}
}
return passes;
}
int efficient(vector<Rule>& rules) {
set<int> facts;
facts.insert(1);
bool addedFacts = true;
int passes = 0;
while (addedFacts) {
passes++;
addedFacts = false;
for (Rule rule : rules) {
if (rule.evaluate(facts)) {
addedFacts = true;
}
}
}
return passes;
}
int main(int argc, char* argv[]) {
//build the rules
vector<Rule> rules;
rules.push_back(Rule(2));
rules.push_back(Rule(3));
rules.push_back(Rule(5));
rules.push_back(Rule(7));
rules.push_back(Rule(11));
rules.push_back(Rule(13));
//Show three different codes that should (in my mind) take the same amount of passes over the rules but don't
cout << "Facts populated after " << doublyInefficient(rules) << " passes through the Rules." << endl;
cout << "Facts populated after " << singlyInefficient(rules) << " passes through the Rules." << endl;
cout << "Facts populated after " << efficient(rules) << " passes through the Rules." << endl;
getchar();
}
I get the following output when running in debug and release mode (32-bit) on visual studio 2017. To my knowledge the code is not optimized.
Facts populated after 61 passes through the Rules.
Facts populated after 17 passes through the Rules.
Facts populated after 7 passes through the Rules.
A difference arises because of short-circuit evaluation:
Consider an expression of the form (expr1 || expr2)
. Short-circuit means that if expr1
evaluates to true
, expression expr2
will not get evaluated at all (cf. this online c++ standard draft, ephasis are mine):
5.15 Logical OR operator
The || operator groups left-to-right. The operands are both contextually converted to bool (Clause [conv]). It returns true if either of its operands is true, and false otherwise. Unlike |, || guarantees left-to-right evaluation; moreover, the second operand is not evaluated if the first operand evaluates to true.
Hence, in your expression addedFacts || set.insert(x).second
, from the point where addedFacts
becomes true
the first time, expression set.insert(x).second
will not get executed any more. I suppose that this is the "wrong" behaviour, as your set
will not contain the respective x
es then.