Search code examples
vega-literolling-computationvega-embed

Vega Lite Percent of a count for a rolling year,


This is what I am trying to accomplish codelink

I want a rolling percent for a rolling year that looks back either 365 days or 11 months, I am just not sure how the frames work. If a hover over a point lets say July 2023 the tooltip point should land on the percent of count over total for the pervious year, ie from July 01, 2022 to Jun 30 2023
This is the reproducible example that comes closest to my own code. Some days have rain others do not, the percent of rainy days versus all the other days containing percipitation (ie not sunny) for the duration of the graph. ALso, can you help me understand how frames work I have tried to find more information that just the standard documentation but I have not found anything, When I put this into my own data it gives me really jagged lines, as in multiple points for every x, not single values for every point on the x-axis

{
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": {"url": "data/seattle-weather.csv"},
  "mark": "bar",
  "transform": [
    {
      "calculate": " timeFormat(datetime(year(datum.date), month(datum.date), -364), '%d %b %Y') + ' - ' +    timeFormat(datetime(year(datum.date), month(datum.date), day('01'),-1), '%d %b %Y')",
      "as": "rolling_month_year"
    },
    {
      "calculate": "datum.weather=='sun' ? 'Sunny':'Not Sunny'",
      "as": "SunnyDay"
    },
    {
      "window": [
        {
          "field": "weather",
          "op": "count",
          "as": "rolling_count",
          "if": "datum.weather=='rain'"
        }
      ],
      "frame": [-11, 0],
      "sort": [{"field": "date"}]
    },
    {
      "window": [
        {
          "field": "weather",
          "op": "count",
          "filter": "datum.weather !='sun'",
          "as": "rolling_total"
        }
      ],
      "frame": [-11, 0],
      "sort": [{"field": "date"}]
    },
    {
      "calculate": "datum.rolling_count*100/datum.rolling_total",
      "as": "percentRain"
    }
  ],
  "encoding": {
    "x": {
      "field": "date",
      "type": "temporal",
      "timeUnit": "monthyear",
      "title": "Date"
    },
    "y": {
      "type": "quantitative",
      "axis": {"title": "Max Temperature and Rolling Mean"}
    }
  },
  "layer": [
    {
      "mark": {"type": "point", "opacity": 0.3},
      "encoding": {"y": {"field": "temp_max", "title": "Max Temperature"}}
    },
    {
      "mark": {"type": "line", "color": "red", "size": 3},
      "encoding": {
        "y2": {
          "field": "percentRain",
          "title": "Rolling Mean of Max Temperature"
        },
        "tooltip": [{"field": "rolling_month_year"}]
      }
    }
  ]
}

Thank you for any insight that you can provide

Loopy

I have tried ChatGPT looking on the internet changing the frames, tweaking the filters, changing the timeUnit on the calculation and/or the x-axis, Any information you have to provide would be great


Solution

  • Try something like this. Some solid testing is required to validate the running average.

    enter image description here

    {
      "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
      "description": "Plot showing a 30 day rolling average with raw values in the background.",
      "width": 800,
      "height": 300,
      "data": {"url": "data/seattle-weather.csv"},
      "transform": [
        {"calculate": "datum.weather=='sun' ? 0:1", "as": "NoSun"},
        {"calculate": "utcFormat(datum.date,'%Y-%B')", "as": "YearMonthDate"},
        {
          "window": [{"field": "NoSun", "op": "sum", "as": "rolling_sum"}],
          "frame": [-365, 0]
        },
        {
          "calculate": "round((datum.rolling_sum/365)*100,2)",
          "as": "rolling_avg_temp"
        },
        {
          "joinaggregate": [
            {"op": "max", "field": "rolling_avg_temp", "as": "rolling_avg"}
          ],
          "groupby": ["YearMonthDate"]
        }
      ],
      "encoding": {
        "x": {
          "field": "date",
          "type": "temporal",
          "timeUnit": "yearmonth",
          "title": "Date"
        },
        "y": {
          "type": "quantitative",
          "axis": {"title": "Max Temperature and Rolling Mean"}
        },
        "tooltip": [
          {"field": "weather"},
          {"field": "temp_max", "title": "Max Temperature"},
          {
            "field": "rolling_avg",
            "title": "% Rainy days last 365D",
            "format": ",.1f"
          }
        ]
      },
      "layer": [
        {
          "mark": {"type": "circle", "size": 60, "opacity": 0.5},
          "encoding": {
            "y": {"field": "temp_max", "title": "Max Temperature"},
            "color": {
              "field": "NoSun",
              "scale": {"domain": [0, 1], "range": ["#FACA03", "#76746D"]}
            }
          }
        },
        {
          "mark": {
            "type": "line",
            "color": "gray",
            "size": 2,
            "interpolate": "monotone"
          },
          "encoding": {
            "y": {
              "field": "rolling_avg",
              "title": "Rolling Mean of Max Temperature"
            }
          }
        }
      ]
    }
    

    Another design:

    enter image description here

    {
      "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
      "description": "Plot showing a 30 day rolling average with raw values in the background.",
      "width": 800,
      "height": 300,
      "data": {"url": "data/seattle-weather.csv"},
      "transform": [
        {"calculate": "datum.weather=='sun' ? 0:1", "as": "NoSun"},
        {"calculate": "utcFormat(datum.date,'%Y-%B')", "as": "YearMonthDate"},
        {
          "window": [{"field": "NoSun", "op": "sum", "as": "rolling_sum"}],
          "frame": [-365, 0]
        },
        {
          "calculate": "round((datum.rolling_sum/365)*100,2)",
          "as": "rolling_avg_temp"
        },
        {
          "joinaggregate": [
            {"op": "max", "field": "rolling_avg_temp", "as": "rolling_avg"}
          ],
          "groupby": ["YearMonthDate"]
        }
      ],
      "encoding": {
        "x": {
          "field": "date",
          "type": "temporal",
          "timeUnit": "yearmonth",
          "title": "Date"
        },
    
    
      },
      "layer": [
        {
          "mark": {"type": "area",  "opacity": 0.5,"interpolate": "monotone"},
          "encoding": {
            "y": {"field": "weather", "aggregate": "count","stack":  "normalize","title": "% Weather type per month"},
            "color": {
              "field": "weather",
          "scale": {
            "domain": ["sun", "fog", "drizzle", "rain", "snow"],
            "range": ["#e7ba52", "#c7c7c7", "#aec7e8", "#1f77b4", "#9467bd"]
          }
            }
          }
        },
        {
          "mark": {
            "type": "line",
            "color": "gray",
            "size": 4,
            "interpolate": "monotone"
          },
          "encoding": {
            "y": {
              "field": "rolling_avg",
              "title": "% Rainy Days",
              "sort": "descending"
            },
             "tooltip": [
          {
            "field": "rolling_avg",
            "title": "% Rainy days last 365D",
            "format": ",.1f"
          }
        ]
          }
        }
      ]
    }