Search code examples
pythonpython-3.xpandashistogramplotly

How to bin all outliers into one bin using Histogram in Plotly?


So the question is:

Can I plot a histogram in Plotly, where all values that are bigger than some threshold will be grouped into one bin?

The desired output:

enter image description here

But using standard plotly Histogram class I was able only to get this output:

import pandas as pd

from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode()

test_df = pd.DataFrame({'values': [1]*10 + [2]*9 + 
                              [3.1]*4 + [3.6]*4 +  
                              [4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
                              [9]*2 + [10]*1 + 
                              [111.2]*2 + [222.3]*2 + [333.4]*1})  # <- I want to group them into one bin "> 10"

data = [go.Histogram(x=test_df['values'], 
                     xbins=dict(
                        start=0,
                        end=11,
                        size=1
                     ),
                     autobinx = False)]

layout = go.Layout(
    title='values'
)
fig = go.Figure(data=data, layout=layout)

iplot(fig, filename='basic histogram')

enter image description here


Solution

  • So after spending some time I found a solution myself using numpy.Histogram and plotly Bar chart.

    Leaving it here in case anyone will face the same problem.

    def plot_bar_with_outliers(series, name, end):
        start = int(series.min())
        size = 1
    
        # Making a histogram
        largest_value = series.max()
        if largest_value > end:
            hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
        else:
            hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])
    
        # Adding labels to the chart
        labels = []
        for i, j in zip(hist[1][0::1], hist[1][1::1]):
            if j <= end:
                labels.append('{} - {}'.format(i, j))
            else:
                labels.append('> {}'.format(i))
    
        # Plotting the graph
        data = [go.Bar(x=labels,
                       y=hist[0])]
    
        layout = go.Layout(
            title=name
        )
        fig = go.Figure(data=data, layout=layout)
    
        iplot(fig, filename='basic histogram')
    
    
    plot_bar_with_outliers(test_df['values'], 'values', end=11)
    

    enter image description here