How to scale data to make area under the graph equal to 1

I made a function which can plot statistics for large arrays (10**8) less than 2 seconds. How can I scale Y-axis to make area under the graph equal to 1?

def dis(inp):
  import numpy as np
  import vaex
  import matplotlib.pyplot as plt

  if getattr(inp, "numpy", None) is not None:
    inp1d = np.reshape(inp.numpy(), [-1])
  else:
    inp1d = np.reshape(inp, [-1])

  bin_count = 64
  df = vaex.from_arrays(x=inp1d)
  x_min, x_max = df.minmax(df.x)
  bins = df.count(binby=df.x, shape=bin_count, limits='minmax', edges=True)
  bins[-2] += bins[-1]
  bins[-1] = bins[-2]
  bins = bins[2:]
  hist_height = np.max(bins)
  edges = np.linspace(x_min, x_max, bin_count+1)
  mean = df.mean(df.x)
  std = df.std(df.x)

  for i, v in enumerate([x * std + mean for x in range(-3, 4)]):
    if i == 3:
      plt.plot([v, v], [0, hist_height * 1.02], color='#34A853', linewidth=1)
    else:
      plt.plot([v, v], [0, hist_height * 0.97], color='#34A853', linewidth=0.5)

  plt.step(edges, bins, where='post', color='#4285F4', linewidth=1)
  plt.show()
  print(f'{str(inp.shape) + " " if getattr(inp, "shape", None) is not None and inp.ndim > 1 else ""}{len(inp1d):,}\nmean: {mean}\nstd: {std}\nmin: {x_min}\nmax: {x_max}')

x = np.random.normal(0, 1, (10**8, ))

Complete answer if somebody wants to now how to plot big data statistics:

def dis(inp):
  import numpy as np
  import vaex
  import matplotlib.pyplot as plt

  if getattr(inp, "numpy", None) is not None:
    inp1d = np.reshape(inp.numpy(), [-1])
  else:
    inp1d = np.reshape(inp, [-1])

  bin_count = 64
  df = vaex.from_arrays(x=inp1d)
  x_min, x_max = df.minmax(df.x)
  bins = df.count(binby=df.x, shape=bin_count, limits='minmax', edges=True)
  bins[-2] += bins[-1]
  bins = bins[2:-1]
  edges = np.linspace(x_min, x_max, bin_count+1)
  left, right = edges[:-1], edges[1:]
  edges = np.reshape(np.array([left,right]).T, [-1])
  bins = np.reshape(np.array([bins,bins]).T, [-1])
  mean = df.mean(df.x)
  std = df.std(df.x)

  # Scale AUC to 1
  step = (x_max-x_min)/bin_count
  population = np.sum(bins)
  surface = population*step
  bins = bins/surface
  hist_height = np.max(bins)

  for i, v in enumerate([x * std + mean for x in range(-3, 4)]):
    if i == 3:
      plt.plot([v, v], [0, hist_height * 1.02], color='#34A853', linewidth=1)
    else:
      plt.plot([v, v], [0, hist_height * 0.97], color='#34A853', linewidth=0.5)

  plt.fill_between(edges, bins, step="pre", alpha=0.3)
  plt.plot(edges, bins, color='#4285F4', linewidth=1)
  plt.show()
  print(f'{str(inp.shape) + " " if getattr(inp, "shape", None) is not None and inp.ndim > 1 else ""}{len(inp1d):,}\nmean: {mean}\nstd: {std}\nmin: {x_min}\nmax: {x_max}')

To moderators: this site doesn't alow me to post code even if it is the answer: It looks like your post is mostly code; please add some more details.

Solution

The idea is to normalise your data set, ie to divide the height of each column by the AUC (area under curve) of your histogram.

Before "plt.step(...)" write:

step = (x_max-x_min)/bin_count
population = np.sum(bins)
surface = population*step
bins = bins/surface

hope that could help