Search code examples
pythonpandasnumpyrandom

Mean, variance of normal distribution for each seed in Python


I am using numpy.random.seed() to get different normal distribution. But I want to have the mean,var to remain constant for each seed. How can I achieve this? I present the current and expected output.

from scipy.stats import truncnorm
import numpy as np
import os
import csv 
import pandas as pd
import random
import matplotlib.pyplot as plt

for i in range(0,2): 
    
    np.random.seed(i)
    mu, sigma = 49.925, 0.996 # mean and standard deviation
    Nodes=220
    r = (1e-6)*np.random.normal(mu, sigma, Nodes)

    sort_r = np.sort(r)
    r1=sort_r[::-1]
    r1=r1.reshape(1,Nodes)
    r2 = r.copy()
    np.random.shuffle(r2.ravel()[1:])
    r2=r2.reshape(1,Nodes)
    maximum = r2.max()
    indice1 = np.where(r2 == maximum)

    r2[indice1] = r2[0][0]
    r2[0][0] = maximum

    r2[0][Nodes-1] = maximum 

    mean=np.mean(r)
    print("Mean =",mean*1e6)
    var=np.var(r)
    print("var =",var*1e12)

    r=pd.Series(r*1e6)
    r.hist()
    plt.xlabel("Pore throat size [\u03BCm]",size=15)
    plt.ylabel("Count",size=15)
    plt.title(rf"var={round(var*1e12,1)}",size=25)
    plt.savefig(rf"C:\Users\USER\OneDrive - Technion\Research_Technion\Python_PNM\Surfactant A-D\220 nodes_1\var_1\Histogram_0.png")

The current output is

Mean = 49.99989488440971
var = 1.0009790234390512
Mean = 50.00911322245587
var = 0.8709906144821978

The expected output is

Mean = 50.000
var = 1.000
Mean = 50.000
var = 1.000

Solution

  • IIUC, you can use the standard normalization (z-score):

    for i in range(0,2):   
        np.random.seed(i)
        mu, sigma = 49.925, 2.996 # mean and standard deviation
        Nodes = 220
        r = (1e-6) * np.random.normal(mu, sigma, Nodes)
    
        # target
        t_mean = 50
        t_std = 3
    
        # standard scaler
        a = (1e-6) * (t_mean + (r - r.mean()) / (r.std() / t_std))
        print("Mean =", a.mean() * 1e6)
        print("var =", a.std() * 1e6)
    

    Output:

    Mean = 50.000000000000014
    var = 2.999999999999999
    Mean = 49.99999999999999
    var = 3.000000000000001