Search code examples
pythonmatplotlibbar-chart

Bar chart with slanted lines instead of horizontal lines


I wish to display a barchart over a time series canvas, where the bars have width that match the duration and where the edges connect the first value with the last value. In other words, how could I have slanted bars at the top to match the data?

I know how to make barcharts using either the last value (example 1) or the first value (example 2), but what I'm looking for are polygons that would follow the black line shown.

Example 1

enter image description here

Example 2

enter image description here

Code:

    import pandas as pd
    from pandas import Timestamp
    import datetime
    import matplotlib.pyplot as plt
    import numpy as np  # np.nan

    dd = {'Name': {0: 'A', 1: 'B', 2: 'C'}, 'Start': {0: Timestamp('1800-01-01 00:00:00'), 1: Timestamp('1850-01-01 00:00:00'), 2: Timestamp('1950-01-01 00:00:00')}, 'End': {0: Timestamp('1849-12-31 00:00:00'), 1: Timestamp('1949-12-31 00:00:00'), 2: Timestamp('1979-12-31 00:00:00')}, 'Team': {0: 'Red', 1: 'Blue', 2: 'Red'}, 'Duration': {0: 50*365-1, 1: 100*365-1, 2: 30*365-1}, 'First': {0: 5, 1: 10, 2: 8}, 'Last': {0: 10, 1: 8, 2: 12}}
    d = pd.DataFrame.from_dict(dd)
    d.dtypes
    d

    # set up colors for team
    colors = {'Red': '#E81B23', 'Blue': '#00AEF3'}

    # reshape data to get a single Date | is there a better way?
    def reshape(data):
            d1 = data[['Start', 'Name', 'Team', 'Duration', 'First']].rename(columns={'Start': 'Date', 'First': 'value'})
            d2 = data[['End', 'Name', 'Team', 'Duration', 'Last']].rename(columns={'End': 'Date', 'Last': 'value'})
            return pd.concat([d1, d2]).sort_values(by='Date').reset_index(drop=True)
    df = reshape(d)
    df.dtypes
    df

    plt.plot(df['Date'], df['value'], color='black')
    plt.bar(d['Start'], height=d['Last'], align='edge', 
            width=list(+d['Duration']), 
            edgecolor='white', linewidth=2,
            color=[colors[key] for key in d['Team']])
    plt.show()

    plt.plot(df['Date'], df['value'], color='black')
    plt.bar(d['End'], height=d['First'], align='edge', 
            width=list(-d['Duration']), 
            edgecolor='white', linewidth=2,
            color=[colors[key] for key in d['Team']])
    plt.show()
            

Solution

  • You can use Matplotlibs Axes.fill_between to generate these types of charts. Importantly this will accurately represent the gap between your rows where they exist, whereas the approach with the bars will make that gap appear to be wider than they truly are unless you set the edgewidth of the bars to 0.

    Additionally, for your data transformation this is a pandas.lreshape which is similar to performing multiple melts operations at the same time.

    import pandas as pd
    from pandas import Timestamp
    import matplotlib.pyplot as plt
    
    dd = pd.DataFrame({
        'Name':     ['A', 'B', 'C'],
        'Start':    pd.to_datetime(['1800-01-01', '1850-01-01', '1950-01-01']),
        'End':      pd.to_datetime(['1849-12-31', '1949-12-31', '1979-12-31']),
        'Team':     ['Red', 'Blue', 'Red'],
        'Duration': [50*365-1, 100*365-1, 30*365-1],
        'First':    [5, 10, 8],
        'Last':     [10, 8, 12]
    })
    df = (
        pd.lreshape(dd, groups={'Date': ['Start', 'End'], 'Value': ['First', 'Last']})
        .sort_values('Date')
    )
    colors = {'Red': '#E81B23', 'Blue': '#00AEF3'}
    
    
    fig, ax = plt.subplots()
    for team in df['Team'].unique():
        ax.fill_between(
            df['Date'],
            df['Value'],
            where=(df['Team'] == team),
            color=colors[team],
            linewidth=0,
        )
    ax.set_ylim(bottom=0)
    
    plt.show()
    

    enter image description here