Search code examples
pythonnumpymatplotlibhistogramcurve-fitting

Plotting a histogram with Matplotlib on logscale


I am trying to plot the following sample data where a plane is worked on for (c_num days (x)) and how much someone works on that plane a day (MTC_Daily_Lbr_percent (y)).I created the below coding where I tried to do curve fit but I am very new to curve fit and Matplotlib. Is there any way to take this data and plot a histogram? I know that its supposed to be a left skewed bell (with maybe another slight upwards motion towards the end date)Also, is there some type of way to have an equation produced? That way I can forecast?

The data was too large so I can putting it in the comments below


Solution

  • Since you have two axes (days and Lbr), you can either plot a histogram of each variable, or a single 2D histogram that looks over both variables simultaneously. The figures below demonstrate this.

    The first figure is an amended version of the original, where all samples are used for fitting (I think that's what you were trying to do, but since the data's unsorted, x[[0, -1]] doesn't necessarily correspond to the smallest and largest values).

    enter image description here

    You could use this data to make predictions/forecasts for future days. A simple ARMA-type model would be a place to start, and depending on how it performs, you could try other techniques. I can speak to this more depending on what the objective is.

    Reproducible example

    Included data is from OP.

    import pandas as pd
    import numpy as np
    import datetime as dt
    from datetime import datetime, timedelta
    
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    
    data={
        "ID": ["ID42645674424.4224","  ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID42645674424.4224","   ID42645674424.4224","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID30380414661.1556","   ID30380414661.1556","   ID30380414661.1556","   ID30380414661.1556","   ID37511661093.7047","   ID37511661093.7047","   ID37511661093.7047","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID4830501589.4234","    ID4830501589.4234","    ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID24100797148.6495","   ID24100797148.6495","   ID24100797148.6495","   ID18093459046.6984","   ID18093459046.6984","   ID18093459046.6984","ID18093459046.6984"],
        "MODEL_CD": [  "A320","    A320"," A320"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A350"," A350"," B717"," B717"," B717"," B717"," B717"," B737"," B737"," B737"," B737"," B767"," B767"," B767"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A320"," A320"," A320"," A320"," A320"," A350"," A350"," A350"," A350"," B717"," B717"," B717"," A320"," A320"," A320"," A320"," A320"," A320"," A320"],
        "Check_Type": ["IFC"," IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  PTS","  SV","   SV","   SV","   SV","   SV","   SV","   SV","   SV","   121","  121","  REL","  REL","  REL","  REL","  REL","  IFC","  IFC","  IFC","  IFC","  FDA","  FDA","  FDA","  REL","  REL","  REL","  REL","  REL","  REL","  REL","  REL","  REL","  REL","  REL","  C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    C","    IFC","  IFC","  IFC","  IFC","  IFC","  SV","   SV","   SV","   SV","   REL","  REL","  REL","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC","  IFC"],
        "Start_Date": [ "2023-01-12","  2023-01-12","   2023-01-12","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-04","   2023-01-04","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-04","   2023-01-04","   2023-01-04","   2023-01-04","   2023-02-19","   2023-02-19","   2023-02-19","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-16","   2023-01-16","   2023-01-19","   2023-01-19","   2023-01-27","   2023-01-27","   2023-01-27","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15"],
        "End_Date": ["2023-01-15"," 2023-01-15","   2023-01-15","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-05","   2023-01-05","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-10","   2023-01-10","   2023-01-10","   2023-01-10","   2023-02-21","   2023-02-21","   2023-02-21","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-17","   2023-01-17","   2023-01-20","   2023-01-20","   2023-01-30","   2023-01-30","   2023-01-30","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-20","   2023-01-20","   2023-01-20","   2023-01-20"],
        "Tot_Lbr_Hrs": ["56.61","   56.61","    56.61","    182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   192.8","    192.8","    192.8","    192.8","    192.8","    192.8","    192.8","    1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   23.3"," 23.3"," 418.43","   418.43","   418.43","   418.43","   418.43","   91.62","    91.62","    91.62","    91.62","    59.67","    59.67","    59.67","    871.52","   871.52","   871.52","   871.52","   871.52","   764.58","   764.58","   764.58","   764.58","   764.58","   764.58","   4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  103.47","   103.47","   103.47","   103.47","   103.47","   56.47","    56.47","    25.21","    25.21","    327.29","   327.29","   327.29","   61","   61","   61","   59.07","    59.07","    59.07","    59.07"],
        "Daily_Tot_Lbr_Hrs": [ "29.87","   0.25"," 26.49","    6.24"," 60.93","    48.08","    7.33"," 1.38"," 10.74","    7.48"," 33.55","    6.63"," 40.88","    42.75","    23.44","    40.04","    12.69","    32.67","    0.33"," 5.88"," 99.49","    153.22","   177.87","   159.05","   163.57","   143.58","   224.45","   230.6","    161.06","   34.14","    5.66"," 17.75","    41.77","    4.84"," 7.16"," 53.12","    47.16","    34.13","    28.23","    1.99"," 21.31","    44.26","    178.55","   121.66","   73.63","    0.33"," 64.73","    6.15"," 2.69"," 18.05","    1.47"," 30.43","    27.77","    255.82","   306.65","   198.57","   91.9"," 18.58","    65.61","    98.63","    205.67","   222.87","   151.92","   19.88","    151","  245.66","   239.89","   307.71","   286.46","   301.59","   368.02","   451.74","   300.24","   369.01","   442.61","   341.18","   328.63","   187.87","   153.8","    113.14","   49.89","    32.6"," 10.13","    23.8"," 36.36","    22.01","    11.17","    44.52","    11.95","    2.21"," 23","   125.27","   127.97","   74.05","    18.75","    27.53","    14.72","    17.55","    6.72"," 17.13","    17.4"],
        "Day_Counter": ["1","   2","    3","    1","    2","    3","    4","    5","    6","    7","    8","    9","    1","    2","    3","    4","    5","    6","    7","    1","    2","    3","    4","    5","    6","    7","    8","    9","    10","   11","   12","   1","    2","    3","    4","    5","    6","    7","    8","    1","    2","    1","    2","    3","    4","    5","    1","    2","    3","    4","    1","    2","    3","    1","    2","    3","    4","    5","    1","    2","    3","    4","    5","    6","    1","    2","    3","    4","    5","    6","    7","    8","    9","    10","   11","   12","   13","   14","   15","   16","   17","   18","   1","    2","    3","    4","    5","    1","    2","    1","    2","    1","    2","    3","    1","    2","    3","    1","    2","    3","    4"],
        "MTC_Daily_Lbr_percent": ["0.53","    0","    0.47"," 0.03"," 0.33"," 0.26"," 0.04"," 0.01"," 0.06"," 0.04"," 0.18"," 0.04"," 0.21"," 0.22"," 0.12"," 0.21"," 0.07"," 0.17"," 0","    0","    0.06"," 0.1","  0.11"," 0.1","  0.1","  0.09"," 0.14"," 0.15"," 0.1","  0.02"," 0","    0.08"," 0.18"," 0.02"," 0.03"," 0.23"," 0.2","  0.15"," 0.12"," 0.09"," 0.91"," 0.11"," 0.43"," 0.29"," 0.18"," 0","    0.71"," 0.07"," 0.03"," 0.2","  0.02"," 0.51"," 0.47"," 0.29"," 0.35"," 0.23"," 0.11"," 0.02"," 0.09"," 0.13"," 0.27"," 0.29"," 0.2","  0.03"," 0.03"," 0.05"," 0.05"," 0.07"," 0.06"," 0.06"," 0.08"," 0.1","  0.06"," 0.08"," 0.09"," 0.07"," 0.07"," 0.04"," 0.03"," 0.02"," 0.01"," 0.01"," 0.1","  0.23"," 0.35"," 0.21"," 0.11"," 0.79"," 0.21"," 0.09"," 0.91"," 0.38"," 0.39"," 0.23"," 0.31"," 0.45"," 0.24"," 0.3","  0.11"," 0.29"," 0.29"]
    }
    
    df_orig = pd.DataFrame(data)
    df = df_orig.copy()
    
    #Convert to appropriate dtypes
    df = df.astype({
        'Tot_Lbr_Hrs': float,
        'Daily_Tot_Lbr_Hrs': float,
        'Day_Counter': int,
        'MTC_Daily_Lbr_percent': float
    })
    
    #columns to DateTime
    for col in ['Start_Date', 'End_Date']:
        df[col] = pd.to_datetime(df[col].str.strip())
    
    # c_days: the number of days the plane was getting fixed. removes days from cdays.
    # divide daycounter by the number in cdays
    df["c_days"] = (df["End_Date"] - df["Start_Date"] + timedelta(days=1)).dt.days
    df['day_normalized'] = df['Day_Counter'] / df['c_days']
    
    #There are a select few row where the start/end cdays does not equal the labor days.
    # We are removing those from further analyses
    optimal = df.loc[(df['day_normalized'] <= 1)]
    
    #
    #Select and view data
    #
    x_col = 'day_normalized'
    y_col = 'MTC_Daily_Lbr_percent'
    
    x = optimal[x_col].values
    y = optimal[y_col].values
    
    f, axs = plt.subplots(nrows=4, figsize=(6, 8), layout='tight')
    ax = axs[0]
    ax.scatter(x, y, marker='x', s=25, color='tab:brown', label='data')
    ax.set(xlabel=x_col, ylabel=y_col)
    
    #
    # Fit a polynomial
    #
    from numpy.polynomial import Polynomial as P
    p = P.fit(x, y, deg=3)
    
    # calculate new x's and y's
    x_new = np.linspace(x.min(), x.max(), num=100)
    y_new = p(x_new)
    
    #Overlay fit
    ax.plot(x_new, y_new, color='tab:green', linewidth=3, label=f'poly fit (deg={p.degree()})')
    ax.legend(ncol=2, loc='upper left', fontsize=8.5)
    
    #
    # Histograms of x, and of y
    #
    
    #Histogram of the x values
    ax = axs[1]
    ax.hist(x, bins=20, color='tab:brown')
    ax.set(xlabel=x_col, ylabel='counts', title='histogram of ' + x_col)
    
    ax = axs[2]
    ax.hist(y, bins=20, color='tab:brown')
    ax.set(xlabel=y_col, ylabel='counts', title='histogram of ' + y_col)
    
    #
    # Joint histogram of x and y (2D hist)
    #
    H_xy, x_edges, y_edges = np.histogram2d(x, y, bins=[15, 5])
    ax = axs[3]
    
    cmap = plt.get_cmap('Greens', np.unique(H_xy).size)
    im = ax.pcolormesh(x_edges, y_edges, H_xy.T, cmap=cmap)
    ax.set(xlabel=x_col, ylabel=y_col, title='2D histogram')
    f.colorbar(im, label='counts', aspect=5)
    
    for ax in axs[:-1]: ax.spines[['right', 'top']].set_visible(False)