I'm trying to calculate P50 and P95 for streaming list of integers. Here's an example code: Live On Compiler Explorer
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
#include <boost/accumulators/statistics/extended_p_square_quantile.hpp>
namespace ba = boost::accumulators;
int main() {
std::vector<int> values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::vector<double> probs = {0.5, 0.95};
ba::accumulator_set<int, ba::stats<ba::tag::extended_p_square_quantile>> //
acc(ba::extended_p_square_probabilities = probs);
for (auto val : values) {
acc(val);
}
std::cout << "P: " << ba::quantile(acc, ba::quantile_probability = 0.5) << " "
<< ba::quantile(acc, ba::quantile_probability = 0.95) << std::endl;
}
But this is returning unexpected values:
P: 3 7
I expected the answer to be 5 9
. How do I get what I want?
The statistic is an estimation. Not only should you feed it more data, you may also expect different values. Perhaps the 95% percentile estimate should be closer to 10.
You can see it at work with this expansion of the test program:
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
#include <boost/accumulators/statistics/extended_p_square_quantile.hpp>
namespace ba = boost::accumulators;
namespace tag = ba::tag;
template <typename... T> using Accum = ba::accumulator_set<int, ba::stats<T...>>;
template <typename... T> Accum<T...> make() {
std::vector<double> probs = {0.5, 0.95};
// unused arguments are ignored
return Accum<T...>(ba::extended_p_square_probabilities = probs,
tag::p_square_cumulative_distribution::num_cells = 11);
}
int main() {
std::vector<int> values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto simple = make<tag::median>();
auto p_sq_est = make<tag::with_p_square_cumulative_distribution_median>();
auto acc = make<tag::extended_p_square_quantile_quadratic>();
for (int i = 0; i < 100; ++i) {
for (auto val : values) {
simple(val);
p_sq_est(val);
acc(val);
}
printf("P: %6.3f %6.3f simple: %6.3f p_sq_est: %6.3f\n", //
ba::quantile(acc, ba::quantile_probability = 0.50), //
ba::quantile(acc, ba::quantile_probability = 0.95), //
ba::median(simple), ba::median(p_sq_est));
}
}
Printing:
P: 3.000 7.000 simple: 5.000 p_sq_est: 6.000
P: 5.500 9.000 simple: 5.231 p_sq_est: 6.281
P: 5.286 9.433 simple: 5.448 p_sq_est: 6.004
P: 5.460 9.688 simple: 5.346 p_sq_est: 6.136
P: 5.396 9.835 simple: 5.285 p_sq_est: 6.184
P: 5.353 9.835 simple: 5.240 p_sq_est: 6.201
P: 5.321 9.910 simple: 5.204 p_sq_est: 6.166
P: 5.297 9.951 simple: 5.175 p_sq_est: 6.263
// ...
P: 5.130 10.000 simple: 5.011 p_sq_est: 6.523
P: 5.129 10.000 simple: 5.011 p_sq_est: 6.511
P: 5.129 10.000 simple: 5.011 p_sq_est: 6.511
P: 5.129 10.000 simple: 5.011 p_sq_est: 6.517
P: 5.129 10.000 simple: 5.011 p_sq_est: 6.511