Search code examples
pythonpandasplotlyplotly-python

Plot year by year in the same plot (plotly)


I tried plotting time series with many years in the same start and end day-month.

For example, what I need is data from 01/01 for 2018, 2019, etc in the same plot in order to compare different data from different years.

The code that I do, plot year by year in subplot, but I would like a single plotly plot.

Link of data to download and can run the script

Suplots from each year

from datetime import date, datetime

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df_decremento_municipio = pd.read_csv('app/data/decremento_municipio_202305291512.csv', index_col="view_date") 
template_graph = {
    "layout": {
        "modebar": {
            "remove": [
                "zoom",
                "pan",
                "select",
                "zoomIn",
                "zoomOut",
                "lasso2d",
                "autoscale",
            ]
        },
        "separators": ".",
        "showlegend": True,
    }
}

df_decremento_municipio.index = pd.to_datetime(df_decremento_municipio.index)
df_decremento_municipio["year"] = df_decremento_municipio.index.year

min_date = df_decremento_municipio.index.date.min()
max_date = df_decremento_municipio.index.date.max()

df=pd.Series(name="area_ha", dtype="float64")
for ano in df_decremento_municipio.index.year.unique()[
    df_decremento_municipio.index.year.unique() > 2016
]:
    # Remover os anos de 2015 e 2016 (dados muito ruins)

    df_por_ano = df_decremento_municipio[df_decremento_municipio["year"] == ano]
    dff_acumulacao = df_por_ano["area_ha"].groupby([df_por_ano.index]).sum().cumsum()
    df = df.append(dff_acumulacao)

fig = go.Figure()

# Primeira data de cada dado no ano
start = ["2020-01-01", "2018-01-04", "2021-01-05" , "2022-01-05", "2019-01-01", "2023-01-10", "2017-12-27"]
# Ultima data de cada dado no ano
end =   ["2020-12-31", "2018-12-27", "2021-12-16", "2022-12-31", "2019-12-22",  "2023-05-03", "2017-01-26"]

years = df.index.year.unique()[df.index.year.unique()>2016].sort_values()

for idx, (s,e) in enumerate(zip(start, end)):
    tmp = df[(df.index >= start[idx]) & (df.index <= end[idx])]
    fig.add_trace(go.Scatter(x=tmp.index,
                             y=tmp,
                             name=str(years[idx]),
                             mode='lines',
                            ))

fig.update_layout(height=600, xaxis_tickformat='%d-%m')
fig.update_xaxes(type='date')

fig.show()

Solution

  • What I've done, and I belive that is more rational than the suggestion in answer gone in A simple way to plot day and month only on the x axis to compare years?

    I've created a time lapse column and put it in x axes.

    Plotly doesn't work well with column with type of column as "timelapse". But, I think it is better that I can do.

    import pandas as pd
    import plotly.express as px
    
    df_decremento_municipio = pd.read_csv('app/graficos_dev/data/decremento_municipio_202305291512.csv', index_col="view_date") 
    
    template_graph = {
        "layout": {
            "modebar": {
                "remove": [
                    "zoom",
                    "pan",
                    "select",
                    "zoomIn",
                    "zoomOut",
                    "lasso2d",
                    "autoscale",
                ]
            },
            "separators": ".",
            "showlegend": True,
        }
    }
    
    df_decremento_municipio.index = pd.to_datetime(df_decremento_municipio.index)
    
    df_decremento_municipio = df_decremento_municipio[["area_ha"]].groupby(df_decremento_municipio.index).sum()
    
    t1 = pd.DataFrame()
    
    for ano in df_decremento_municipio.index.year.unique()[
        df_decremento_municipio.index.year.unique() > 2017
    ]:  
    
        df = df_decremento_municipio[df_decremento_municipio.index.year == ano]
        df["year"] = df.index.year
        df["timedelta"] = (df.index - (df.index.year.astype("str") + "-01-01").astype("datetime64[ns]"))/1000000
        df["cumsum"]= df["area_ha"].cumsum()
    
        t1 = pd.concat([t1, df[["year", "timedelta", "cumsum"]]])
    
    fig = px.line(t1, x="timedelta", y="cumsum", color='year')
    
    fig.update_layout(title="Desflorestamento por Tempo",
        xaxis={"title": "Data"},
        yaxis={"title": "Área (ha)"},
        xaxis_tickformat = '%d-%m'
    )
    
    fig.update_xaxes(type='date')
    
    fig.show()
    

    The result of figure: enter image description here