Search code examples
matplotlibhistogrambinning

How can a bin width be made consistent between multiple matplotlib histograms?


I have a small function that is intended to take two lists of numbers and to compare them using their respective histograms overlaid and using a ratio plot. The bin width of the ratio plot is inconsistent with the bin width of the overlaid histograms. How could the ratio plot be made to have bin widths identical to those of the overlaid histograms?

import numpy
import matplotlib.pyplot
import datavision # sudo pip install datavision
import shijian    # sudo pip install shijian

def main():

    a = numpy.random.normal(2, 2, size = 120)
    b = numpy.random.normal(2, 2, size = 120)

    save_histogram_comparison_matplotlib(
        values_1      = a,
        values_2      = b,
        label_1       = "a",
        label_2       = "b",
        normalize     = True,
        label_ratio_x = "frequency",
        label_y       = "",
        title         = "comparison of a and b",
        filename      = "test.png"
    )

def save_histogram_comparison_matplotlib(
    values_1       = None,
    values_2       = None,
    filename       = None,
    number_of_bins = None,
    normalize      = True,
    label_x        = "",
    label_y        = None,
    label_ratio_x  = "frequency",
    label_ratio_y  = "ratio",
    title          = None,
    label_1        = "1",
    label_2        = "2",
    overwrite      = True,
    LaTeX          = False
    ):

    matplotlib.pyplot.ioff()
    if LaTeX is True:
        matplotlib.pyplot.rc("text", usetex = True)
        matplotlib.pyplot.rc("font", family = "serif")
    if number_of_bins is None:
        number_of_bins_1 = datavision.propose_number_of_bins(values_1)
        number_of_bins_2 = datavision.propose_number_of_bins(values_2)
        number_of_bins   = int((number_of_bins_1 + number_of_bins_2) / 2)
    if filename is None:
        filename = shijian.propose_filename(
            filename  = title.replace(" ", "_") + ".png",
            overwrite = overwrite
        )

    values = []
    values.append(values_1)
    values.append(values_2)
    figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(nrows = 2)
    ns, bins, patches = axis_1.hist(
        values,
        normed   = normalize,
        histtype = "stepfilled",
        bins     = number_of_bins,
        alpha    = 0.5,
        label    = [label_1, label_2]
    )
    axis_1.legend()
    axis_2.bar(
        bins[:-1],
        ns[0] / ns[1],
        alpha = 1,
    )
    axis_1.set_xlabel(label_x)
    axis_1.set_ylabel(label_y)
    axis_2.set_xlabel(label_ratio_x)
    axis_2.set_ylabel(label_ratio_y)
    matplotlib.pyplot.title(title)
    matplotlib.pyplot.savefig(filename)
    matplotlib.pyplot.close()

if __name__ == "__main__":
    main()

EDIT: temporary scratchboard because coding in comments isn't reasonable

import numpy
import matplotlib.pyplot
import datavision
import shijian

def main():

    a = numpy.random.normal(2, 2, size = 120)
    b = numpy.random.normal(2, 2, size = 120)

    save_histogram_comparison_matplotlib(
        values_1      = a,
        values_2      = b,
        label_1       = "a",
        label_2       = "b",
        normalize     = True,
        label_ratio_x = "frequency",
        label_y       = "",
        title         = "comparison of a and b",
        filename      = "test.png"
    )

def save_histogram_comparison_matplotlib(
    values_1       = None,
    values_2       = None,
    filename       = None,
    number_of_bins = None,
    normalize      = True,
    label_x        = "",
    label_y        = None,
    label_ratio_x  = "frequency",
    label_ratio_y  = "ratio",
    title          = None,
    label_1        = "1",
    label_2        = "2",
    overwrite      = True,
    LaTeX          = False
    ):

    matplotlib.pyplot.ioff()
    if LaTeX is True:
        matplotlib.pyplot.rc("text", usetex = True)
        matplotlib.pyplot.rc("font", family = "serif")
    if number_of_bins is None:
        number_of_bins_1 = datavision.propose_number_of_bins(values_1)
        number_of_bins_2 = datavision.propose_number_of_bins(values_2)
        number_of_bins   = int((number_of_bins_1 + number_of_bins_2) / 2)
    if filename is None:
        filename = shijian.propose_filename(
            filename  = title.replace(" ", "_") + ".png",
            overwrite = overwrite
        )

    bar_width = 1
    values = []
    values.append(values_1)
    values.append(values_2)
    figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(nrows = 2)
    ns, bins, patches = axis_1.hist(
        values,
        normed   = normalize,
        histtype = "stepfilled",
        bins     = number_of_bins,
        alpha    = 0.5,
        label    = [label_1, label_2],
        rwidth   = bar_width
    )
    axis_1.legend()
    axis_2.bar(
        bins[:-1],
        ns[0] / ns[1],
        alpha = 1,
        width = bar_width
    )
    axis_1.set_xlabel(label_x)
    axis_1.set_ylabel(label_y)
    axis_2.set_xlabel(label_ratio_x)
    axis_2.set_ylabel(label_ratio_y)
    matplotlib.pyplot.title(title)
    matplotlib.pyplot.savefig(filename)
    matplotlib.pyplot.close()

if __name__ == "__main__":
    main()

Solution

  • You need the rwidth parameter in your axis_1.hist(..) call

    You can adjust rwidth and bins to match your axis_2.bar(...) call (default width in bar is 0.8).

    e.g.

    matplotlib.pyplot.hist(a,bins=6,rwidth=0.8)
    

    enter image description here