Search code examples
c++c++11randomintegernormal-distribution

How to generate integers between min and max using normal distribution?


I learnt that we use

 unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
 std::default_random_engine generator (seed);
 std::normal_distribution<double> distribution (mean_value,variance_value);

to generate real numbers. But I don't know how to give a range (min and max) to this generation and how to generate only integers in this scenario. In case of uniform_distribution, it is straight forward. Can anyone help please? Thanks!


Solution

  • Well, you could compute probabilities from normal distribution at given points, and use them for discrete sampling.

    Along the lines

    #include <cmath>
    #include <random>
    #include <iostream>
    
    constexpr double PI = 3.14159265359;
    
    static inline double squared(const double x) {
        return x * x;
    }
    
    double GaussPDF(const double x,
                    const double mu,
                    const double sigma) {
        return exp(-0.5 * squared((x - mu) / sigma)) / (sqrt(2.0 * PI) * sigma);
    }
    
    int SampleTruncIntGauss(const int xmin, const int xmax, const double mu, const double sigma, std::mt19937_64& rng) {
        int n = xmax - xmin + 1;
        std::vector<double> p(n);
        for (int k = 0; k != n; ++k)
            p[k] = GaussPDF(static_cast<double>(xmin) + k, mu, sigma);
    
        std::discrete_distribution<int> igauss{ p.begin(), p.end() };
    
        return xmin + igauss(rng);
    }
    
    int main() {
    
        int xmin = -3;
        int xmax =  5;
        int n = xmax - xmin + 1;
    
        double mu = 1.2;
        double sigma = 2.3;
    
        std::mt19937_64 rng{ 98761728941ULL };
    
        std::vector<int> h(n, 0);
    
        for (int k = 0; k != 10000; ++k) {
            int v = SampleTruncIntGauss(xmin, xmax, mu, sigma, rng);
            h[v - xmin] += 1;
        }
    
        int i = xmin;
        for (auto k : h) {
            std::cout << i << "   " << k << '\n';
            ++i;
        }
    
        return 0;
    }
    

    You could make code more optimal, I reinitialize probabilities array each time we sample, but it demonstrates the gist of the idea.

    UPDATE

    You could also use non-point probabilities for sampling, basically assuming that probability at integer point x means probability to have value in the range [x-0.5...x+0.5]. This could be easily expressed via Gaussian CDF.

    constexpr double INV_SQRT2 = 0.70710678118;
    
    double GaussCDF(const double x,
                    const double mu,
                    const double sigma) {
        double v = INV_SQRT2 * (x - mu) / sigma;
        return 0.5 * (1.0 + erf(v));
    }
    
    double ProbCDF(const int    x,
                   const double mu,
                   const double sigma) {
        return GaussCDF(static_cast<double>(x) + 0.5, mu, sigma) - GaussCDF(static_cast<double>(x) - 0.5, mu, sigma);
    }
    

    and code for probabilities would be

    for (int k = 0; k != n; ++k) {
        p[k] = ProbCDF(xmin + k, mu, sigma);
    

    Result is slightly different, but still resembles Gaussian