Search code examples
rggplot2heatmapgeom-tile

Tile length in geom_tile heatmap incorrect


I'm trying to visualize aggregated values for weekdays and hours of day with a heatmap in R using ggplots geom_tile. With my test-data, the method works just fine, however, when I try an excerpt of another test dataset the length of tiles is suddenly incorrect.

Working test:

# constructing testframe
  set.seed(123)
  testframe <- cbind.data.frame(
    day = factor(sample(c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"),100, replace = TRUE), levels = rev(c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))),
    hour = sample(c(0:23),100, replace = TRUE),
    year = sample(c(2018,2019,2020),100, replace = TRUE),
    value = sample(seq(-312,324,1),100, replace = TRUE)
  )
  
  # trying to set scale limits somewhat intelligently
  UpperLim <- max(abs(c(max(testframe$value),min(testframe$value))))
  LowerLim <- -UpperLim
  
  # plotting
  ggplot(testframe, aes(hour, day)) +
    geom_tile(aes(fill = value), colour = "black") +
    labs(title = "Value by Weekday and Hour",
         x = "",
         y = "") +
    scale_fill_distiller(palette = "RdYlGn", direction = 1, limits = c(LowerLim, UpperLim)) +
    scale_y_discrete(drop = FALSE) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1),
          axis.ticks.x = element_blank(),
          legend.position = "bottom",
          legend.key.width = unit(2, "cm"),
          panel.grid = element_blank()) +
    coord_equal() +
    scale_x_continuous(breaks = seq(-0.5,23.5,1),
                       limits = c(-0.5,23.5),
                       labels = c("00:00",
                                  "01:00",
                                  "02:00",
                                  "03:00",
                                  "04:00",
                                  "05:00",
                                  "06:00",
                                  "07:00",
                                  "08:00",
                                  "09:00",
                                  "10:00",
                                  "11:00",
                                  "12:00",
                                  "13:00",
                                  "14:00",
                                  "15:00",
                                  "16:00",
                                  "17:00",
                                  "18:00",
                                  "19:00",
                                  "20:00",
                                  "21:00",
                                  "22:00",
                                  "23:00",
                                  "24:00"))

Correct Result: Testplot

This is exactly the plot I want to have. However, when I try the same code with an excerpt from another test dataset, it doesn't work in this way:

Other Test dataset:

helperframe <- structure(list(day = structure(c(7L, 7L, 6L), .Label = c("Sunday", 
"Saturday", "Friday", "Thursday", "Wednesday", "Tuesday", "Monday"
), class = "factor"), hour = c(12L, 23L, 0L), year = c(2018, 
2018, 2018), affect = c(0, 286.11, 44.44), PosAffect = c(0, 286.11, 
44.44), NegAffect = c(0, 0, 0)), row.names = c(NA, -3L), groups = structure(list(
    day = structure(c(6L, 7L, 7L), .Label = c("Sunday", "Saturday", 
    "Friday", "Thursday", "Wednesday", "Tuesday", "Monday"), class = "factor"), 
    hour = c(0L, 12L, 23L), .rows = structure(list(3L, 1L, 2L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

plotting helperframe

  # trying to set scale limits somewhat intelligently
  UpperLim <- max(abs(c(max(helperframe$affect),min(helperframe$affect))))
  LowerLim <- -UpperLim
  
  out <- ggplot(helperframe, aes(hour, day)) +
    geom_tile(aes(fill = affect), colour = "black") +
    labs(title = "Reported Affect by Weekday and Hour",
         subtitle = paste(starttime, " - ", endtime),
         x = "",
         y = "") +
    scale_fill_distiller(palette = "RdYlGn", direction = 1, limits = c(LowerLim, UpperLim)) +
    scale_y_discrete(drop = FALSE) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1),
          axis.ticks.x = element_blank(),
          legend.position = "bottom",
          legend.key.width = unit(2, "cm"),
          panel.grid = element_blank()) +
    coord_equal() +
    scale_x_contiunous(breaks = seq(-0.5,23.5,1),
                     limits = c(-0.5,23.5),
                     labels = c("00:00",
                                "01:00",
                                "02:00",
                                "03:00",
                                "04:00",
                                "05:00",
                                "06:00",
                                "07:00",
                                "08:00",
                                "09:00",
                                "10:00",
                                "11:00",
                                "12:00",
                                "13:00",
                                "14:00",
                                "15:00",
                                "16:00",
                                "17:00",
                                "18:00",
                                "19:00",
                                "20:00",
                                "21:00",
                                "22:00",
                                "23:00",
                                "24:00"))

This gives me an incorrect plot where the tile length is incorrect and the position of the tiles does not match the data

Incorrect plot

When I switch out scale_x_continuous for scale_x_discrete, I do get the correct tiles, but now the x-axis disappears...

Axis disappears

Are there any suggestions for getting the correct tile length and position while not losing the x-axis?


Solution

  • Try these changes on your code:

    First, format the x-axis variable:

    library(ggplot2)
    #Adjust hour
    helperframe$hour <- factor(helperframe$hour,
                               levels = 0:24,
                               labels = c("00:00",
                                          "01:00",
                                          "02:00",
                                          "03:00",
                                          "04:00",
                                          "05:00",
                                          "06:00",
                                          "07:00",
                                          "08:00",
                                          "09:00",
                                          "10:00",
                                          "11:00",
                                          "12:00",
                                          "13:00",
                                          "14:00",
                                          "15:00",
                                          "16:00",
                                          "17:00",
                                          "18:00",
                                          "19:00",
                                          "20:00",
                                          "21:00",
                                          "22:00",
                                          "23:00",
                                          "24:00"),
                               ordered = T)
    

    Now, the plot:

    #Code
    outplot <- ggplot(helperframe, aes(hour, day)) +
      geom_tile(aes(fill = affect), colour = "black") +
      labs(title = "Reported Affect by Weekday and Hour",
           subtitle = paste('starttime', " - ", 'endtime'),
           x = "",
           y = "") +
      scale_fill_distiller(palette = "RdYlGn", direction = 1, limits = c(LowerLim, UpperLim)) +
      scale_y_discrete(drop = FALSE) +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 90, hjust = 1),
            axis.ticks.x = element_blank(),
            legend.position = "bottom",
            legend.key.width = unit(2, "cm"),
            panel.grid = element_blank()) +
      coord_equal()+
      scale_x_discrete(limits = c("00:00",
                                    "01:00",
                                    "02:00",
                                    "03:00",
                                    "04:00",
                                    "05:00",
                                    "06:00",
                                    "07:00",
                                    "08:00",
                                    "09:00",
                                    "10:00",
                                    "11:00",
                                    "12:00",
                                    "13:00",
                                    "14:00",
                                    "15:00",
                                    "16:00",
                                    "17:00",
                                    "18:00",
                                    "19:00",
                                    "20:00",
                                    "21:00",
                                    "22:00",
                                    "23:00",
                                    "24:00"))
    

    Output:

    enter image description here