Search code examples
pythonnumpymatplotlibscipygaussian

How to implement a 2D Gaussian on a 2D numpy array


I have a 2D NumPy array of size 10 by 10, in which I am trying to implement a 2D Gaussian distribution on it so that I can use the new column as a feature in my ML model. The center (the peak of the Gaussian distribution) should be at (3, 5) of the 2D NumPy array. Is there any way to do this in Python? I have also included a heatmap of my np array.

Here is my data:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.stats import multivariate_normal
    my_np_list = [310.90634 , 287.137   , 271.87973 , 266.6     , 271.87973 ,
           287.137   , 310.90634 , 341.41458 , 377.02936 , 416.44254 ,
           266.6     , 238.4543  , 219.844   , 213.28001 , 219.844   ,
           238.4543  , 266.6     , 301.62347 , 341.41458 , 384.496   ,
           226.2176  , 192.248   , 168.61266 , 159.96    , 168.61266 ,
           192.248   , 226.2176  , 266.6     , 310.90634 , 357.68146 ,
           192.248   , 150.81174 , 119.22715 , 106.64001 , 119.22715 ,
           150.81174 , 192.248   , 238.4543  , 287.137   , 337.2253  ,
           168.61266 , 119.22715 ,  75.40587 ,  53.320004,  75.40587 ,
           119.22715 , 168.61266 , 219.844   , 271.87973 , 324.33292 ,
           159.96    , 106.64001 ,  53.320004,   0.      ,  53.320004,
           106.64001 , 159.96    , 213.28001 , 266.6     , 319.92    ,
           168.61266 , 119.22715 ,  75.40587 ,  53.320004,  75.40587 ,
           119.22715 , 168.61266 , 219.844   , 271.87973 , 324.33292 ,
           192.248   , 150.81174 , 119.22715 , 106.64001 , 119.22715 ,
           150.81174 , 192.248   , 238.4543  , 287.137   , 337.2253  ,
           226.2176  , 192.248   , 168.61266 , 159.96    , 168.61266 ,
           192.248   , 226.2176  , 266.6     , 310.90634 , 357.68146 ,
           266.6     , 238.4543  , 219.844   , 213.28001 , 219.844   ,
           238.4543  , 266.6     , 301.62347 , 341.41458 , 384.496   ]
    
    my_np_array = np.array(my_np_list).reshape(10, 10)
    plt.imshow(my_np_array, interpolation='none')
    plt.show()
    
    
    size = 100
    store_center = (3, 5)
    i_center = 3
    j_center = 5

enter image description here

I tried the scipy.stats.multivariate_normal.pdf on my array, but it didn't work.

    import scipy
    from scipy import stats
    
    my_np_array = my_np_array.reshape(-1)
    y = scipy.stats.multivariate_normal.pdf(my_np_array, mean=2, cov=0.5)
    
    y = y.reshape(10,10)
    
    yy = np.dot(y.T,y)

Solution

  • Here is a 2-Gaussian of best fit.

    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.optimize
    
    my_np_list = [
        310.90634 , 287.137   , 271.87973 , 266.6     , 271.87973 ,
        287.137   , 310.90634 , 341.41458 , 377.02936 , 416.44254 ,
        266.6     , 238.4543  , 219.844   , 213.28001 , 219.844   ,
        238.4543  , 266.6     , 301.62347 , 341.41458 , 384.496   ,
        226.2176  , 192.248   , 168.61266 , 159.96    , 168.61266 ,
        192.248   , 226.2176  , 266.6     , 310.90634 , 357.68146 ,
        192.248   , 150.81174 , 119.22715 , 106.64001 , 119.22715 ,
        150.81174 , 192.248   , 238.4543  , 287.137   , 337.2253  ,
        168.61266 , 119.22715 ,  75.40587 ,  53.320004,  75.40587 ,
        119.22715 , 168.61266 , 219.844   , 271.87973 , 324.33292 ,
        159.96    , 106.64001 ,  53.320004,   0.      ,  53.320004,
        106.64001 , 159.96    , 213.28001 , 266.6     , 319.92    ,
        168.61266 , 119.22715 ,  75.40587 ,  53.320004,  75.40587 ,
        119.22715 , 168.61266 , 219.844   , 271.87973 , 324.33292 ,
        192.248   , 150.81174 , 119.22715 , 106.64001 , 119.22715 ,
        150.81174 , 192.248   , 238.4543  , 287.137   , 337.2253  ,
        226.2176  , 192.248   , 168.61266 , 159.96    , 168.61266 ,
        192.248   , 226.2176  , 266.6     , 310.90634 , 357.68146 ,
        266.6     , 238.4543  , 219.844   , 213.28001 , 219.844   ,
        238.4543  , 266.6     , 301.62347 , 341.41458 , 384.496   ,
    ]
    
    my_np_array = np.array(my_np_list).reshape(10, -1)
    
    
    def gaussian2(xy: np.ndarray, a: float, b: float, c: float, d: float, e: float, f: float) -> np.ndarray:
        x, y = xy
    
        z = (
            a - b
            * np.exp(-((x - c)/d)**2)
            * np.exp(-((y - e)/f)**2)
        )
        return z
    
    
    xy = np.stack(
        np.meshgrid(
            np.arange(my_np_array.shape[1]),
            np.arange(my_np_array.shape[0]),
        )
    ).reshape((2, -1))
    
    param, _ = scipy.optimize.curve_fit(
        f=gaussian2,
        xdata=xy,
        ydata=my_np_array.ravel(),
        p0=(400, 400,
            3, 1,
            5, 1)
    )
    print(param)
    zfit = gaussian2(xy, *param).reshape(my_np_array.shape)
    
    fig, (ax_act, ax_fit) = plt.subplots(nrows=1, ncols=2)
    ax_act.imshow(my_np_array)
    ax_fit.imshow(zfit)
    plt.show()
    
    [447.47305265 394.42329346   3.02857599   5.53214092   4.98984104
       5.56048623]
    

    best fit

    It's too broad, so if you want a better fit you need to use something that isn't Gaussian. For instance, modified exponents of about 1.7 and 1.8 provide for an excellent fit - discounting your peak "0" which looks fake to me.

    def gaussian2(xy: np.ndarray, a: float, b: float, c: float, d: float, e: float, f: float, g: float, h: float) -> np.ndarray:
        x, y = xy
    
        z = (
            a - b
            * np.exp(-np.abs((x - c)/d)**e)
            * np.exp(-np.abs((y - f)/g)**h)
        )
        return z
    
    
    param, _ = scipy.optimize.curve_fit(
        f=gaussian2,
        xdata=xy,
        ydata=my_np_array.ravel(),
        p0=(400, 400,
            3, 5, 2,
            5, 5, 2)
    )
    
    [482.96976151 441.22504655   3.01091214   6.11061124   1.79338408
       5.00625763   6.27235212   1.69061652]
    

    non-gaussian fit

    This will improve even further if you exclude the fake peak from the fit:

    z_flat = my_np_array.ravel()
    not_zero, = np.nonzero(z_flat)
    z_flat = z_flat[not_zero]
    xy = xy[:, not_zero]
    # ...
    
    zfit = np.zeros_like(my_np_array)
    x, y = xy
    zfit[y, x] = gaussian2(xy, *param)