How to add background colours with geom_gitter and geom based on x and y axis (following question from another)

This is the following question from another question: How to add links to similar jobs on ggplot points?

==============================

Hi,

I've created the visualization with the help of Kat from the previous question above. What I want to do is to add background colors to this visualization below.

# Create nodes dataframe with x and y coordinates
nodes <- data.frame(
  name = all_occupations,
  x = factor(jobType$Job_type[match(all_occupations, jobType$Occupation)], levels = all_job_types),
  y = factor(experience$Strata.Level[match(all_occupations, experience$Occupation)], levels = experience_levels)
)

# Remove rows with missing x or y values
nodes <- nodes[complete.cases(nodes$x, nodes$y), ]

# Remove rows with identical Occupation1 and Occupation2
filtered_data <- filtered_data[!(filtered_data$Occ1 == filtered_data$Occ2), ]

gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
  geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") +
  labs(x = "Job Type", y = "Experience Level") +
  theme_minimal() +
  theme(panel.grid = element_blank()) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(20, 20, 20, 20))

p <- ggplotly(gg) %>% config(doubleClickDelay = 1000) 

# capture jitter data
df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                  nm = nodes$name, x1 = nodes$x, y1 = nodes$y)


 xx <- lapply(1:nrow(filtered_data), function(j) { filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% select(x, y)}) %>% bind_rows(); fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

# create a simulation of jobs that match
invisible(lapply(1:nrow(df3), function(j) {
  dt <- df3[j, ]                          # point the lines will originate from
  mtch <- fd2 %>% 
    filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
    select(Occ2) %>% unlist(use.names = F)
  nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
  if(nrow(nodes4) < 1) {
    p <<- p %>%                           # create trace so indices remain correct!
      add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
    return()                              # if no similar occupations
  }
  # create segment vectors for x and y
  xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
  ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
  
  # get row numbers of connected data
  vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
  cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
  p <<- p %>% 
    add_lines(x = xs, y = ys, visible = F)                # create lines
}))

p


p %>% htmlwidgets::onRender(
  "function(el, x) {
    nms = ['curveNumber', 'pointNumber'];
    coll = [];                                      
    giveMe = [];                                 
    oArr = el.data[0];                 
    redu = function(val, arr) {                 
      return arr.reduce((these, those) => {
        return Math.abs(those - val) < Math.abs(these - val) ? those : these;
      });
    }
    closest = function(xval, yval) { /* p.xvals/yvals from pt data; arr is x/y data obj */
      /* id nearest x and nearest y, make sure they match, if no match, take larger index */
      xpt = redu(xval, oArr.x);           /* get closest data point for x axis*/
      ypt = redu(yval, oArr.y);           /* get closest data point for y axis*/
      xi = oArr.x.indexOf(xpt);           /* get index value for x data point */
      yi = oArr.x.indexOf(ypt);           /* get index value for x data point */
      return xi > yi ? xi : yi;          /* if the indices != return larger # */
    }
    el.on('plotly_hover', function(p) {
      pt = p;                                   /* global: for use in unhover */
    })
    el.on('plotly_unhover', function(p) {       /* create persistent tooltips */
      if(coll.length > 0){           /* if click occurred else no persistence */
        if(giveMe.length < 1) return;   /* are there lines connecting points? */
        if(!Array.isArray(giveMe)) giveMe = [giveMe]; /* make sure its an array */
        whatNow = closest(pt.xvals[0], pt.yvals[0]);  /* mouse on connected point? */
        if(giveMe.includes(whatNow)) {    /* if hover pointIndex is connected */
          coll[1] = whatNow;         /* add connected point to array for tips */
          hvr = [];                     /* clear array for curve & point list */
          for(ea in coll) {                       /* create list for hovering */ 
            var oj = {}; oj[nms[0]] = 0; 
            oj[nms[1]] = coll[ea]; 
            hvr.push(oj);
          }
        } else {
          hvr = [{'curveNumber': 0, 'pointNumber': coll[0]}]; /* if coll, create tooltip */
        }
        Plotly.Fx.hover(el, hvr);                      /* persistent tooltips */
      } 
    })
    el.on('plotly_click', function(p) {     /* create persistent lines upon click */
                                          /* if any lines already vis-- hide them */
      Plotly.restyle(el, {'visible': false}, pt.xaxes[0]._traceIndices.slice(1,));
      giveIt = p.points[0].pointIndex;  /* capture scatter index for curve number */
      if(p.points[0].customdata) {
        giveMe = p.points[0].customdata;       /* get point's array of customdata */
      } else {giveMe = []}
      coll[0] = giveIt;                   /* collect index for persistent tooltip */
      Plotly.restyle(el, {'visible': true}, [giveIt + 1]);
    })
    el.on('plotly_doubleclick', function(p) { /* remove lines & pers tooltips */
      Plotly.restyle(el, {'visible': false}, pt.xaxes[0]._traceIndices.slice(1,));
      coll = [];      /* reset arrays, until next double click */
      giveMe = [];
    }) 
  }")

I've tried this method, but there are some issues with the colors.

# Create a new data frame for the background
background_data <- expand.grid(x = levels(nodes$x), y = levels(nodes$y))
background_data$color_group <- interaction(background_data$x, background_data$y)

# Define colors vector
colors <- c("#FFFFE0", "#FFEF00", "#FFDb66", "#e4d99d", "#FFFF33", "#FFEF55", "#FFDc99", "#e4d99e", "#FFFF99", "#FFEE55", "#FFDd99", "#e4d99d", "#FFFF66", "#FFEA00", "#FFDF00", "#e4d00a", "#FFFF33", "#FFEB00", "#FFDa66", "#e4d55b")

gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
  labs(x = "Job Type", y = "Experience Level") +
  theme_minimal() +
  theme(panel.grid = element_blank()) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(20, 20, 20, 20))

# Create the plot with geom_tile for background colors
gg_with_background <- gg +
  geom_tile(data = nodes, aes(x = x, y = y, fill = paste(x, y)), alpha = 0.5, color = "white", show.legend = FALSE) +
  scale_fill_manual(values = colors) +  # Use the manual colors here
  guides(fill = FALSE)

# Add the points with geom_jitter()
gg_with_jitters <- gg_with_background +
  geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") 
  
# Print the final combined plot
print(gg_with_jitters)


p <- ggplotly(gg_with_jitters) %>% config(doubleClickDelay = 1000) 

# capture jitter data
df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                  nm = nodes$name, x1 = nodes$x, y1 = nodes$y)


 xx <- lapply(1:nrow(filtered_data), function(j) { filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% select(x, y)}) %>% bind_rows(); fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

# create a simulation of jobs that match
invisible(lapply(1:nrow(df3), function(j) {
  dt <- df3[j, ]                          # point the lines will originate from
  mtch <- fd2 %>% 
    filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
    select(Occ2) %>% unlist(use.names = F)
  nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
  if(nrow(nodes4) < 1) {
    p <<- p %>%                           # create trace so indices remain correct!
      add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
    return()                              # if no similar occupations
  }
  # create segment vectors for x and y
  xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
  ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
  
  # get row numbers of connected data
  vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
  cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
  p <<- p %>% 
    add_lines(x = xs, y = ys, visible = F)                # create lines
}))

...same codes as the above...

There are two problems.

There are 5 white background colors that are not supposed to be there. I guess it is because there are not jitters. However, it showed me the correct colors with different codes.
When attempting to #capture jitter data with the following code:

df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, nm = nodes$name, x1 = nodes$x, y1 = nodes$y) I encountered this error message:

"Error in data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, nm = nodes$name, : arguments imply differing numbers of rows: 5, 124."

Due to this error, I am unable to proceed with creating lines. I'm unsure if the other codes below the df3 would work or not, since df3 is not running.

Do you have any ideas?

P.S. I have an optional question. It is not necessary, but if you know the answer, please help me.

Is it possible to have animations when lines are appearing with clicking? For example, if there are 6 lines, I want the lines to appear not all together but one by one, and the lines should be connected smoothly, not suddenly.

What I mean by 'smooth' is something like this: Currently, the line connects two jobs right away: 0 --------- 0

What I want is connecting them like this(fast speed): 0 - 0 0 ---- 0 0 ------ 0 0 ---------0

Solution

Alright, this is what I propose.

Create ggplotly object with both the new plot (with geom_tile) and the original plot.

The plot with geom_tile will be used for extracting data only.

from the ggplot object, we'll capture color by panel (square) data
from the ggplotly object we'll capture the rgb for each hex
using these new objects, we'll create a list of Plotly shapes to replace what would be traces that represent geom_tile

For some reason, you have to use rgb in Plotly shapes, while this restriction doesn't appear to exist anywhere else in Plotly.

Using the original plot, create all of the other objects. After they are created and the customdata is added, you'll replace the existing shapes with those you created. (When you use ggplotly there is always a shapes object added--even if it represents nothing as in this plot...odd, I know.)

First, capturing the data from the new plot and creating the list of shapes.

library(tidyverse)
library(plotly)

# nodes data from original question
# colors, background_data from this question
# filtered_data from original question

# plot with geom_tile
gg1 <- ggplot() +          # retained for panel data and rgb
  geom_tile(data = background_data, 
            aes(x = x, y = y, fill = color_group), color = "white", alpha = .5) + 
  geom_jitter(data = nodes, aes(x = x, y = y, text = name), 
              width = 0.2, height = 0.2, size = 1, color = "steelblue")

p1 <- ggplotly(gg1) # create plotly so that rgb's are calculated for you
crgb <- invisible(lapply(1:length(p1$x$data), function(i) { # extract rgb (not hex)
  p1$x$data[[i]]$fillcolor
})) %>% unlist()
p1
# capture color assignments
gco <- ggplot_build(gg1)$data[[1]][, 1:3] # fill, x, y

# create background shapes for Plotly (instead of using geom_tile)
shp <- lapply(1:nrow(background_data), function(k) {
  list(type = "rect", fillcolor = crgb[k],      # in shapes you have to use rgb (why!?!??)
       xref = "paper", yref = "paper", layer = "below",
       opacity = .6, line = list(width = .001), # essentially, make line invisible
       x0 = 1/length(unique(background_data$x)) * (gco[k, ]$x - 1), # using paper space
       x1 = 1/length(unique(background_data$x)) * gco[k, ]$x,
       y0 = 1/length(unique(background_data$y)) * (gco[k, ]$y - 1),
       y1 = 1/length(unique(background_data$y)) * gco[k, ]$y)
})

Now, back to the plot created in your last question. (This is unchanged from your previous question and my answer to that question.)

gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
  geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") +
  labs(x = "Job Type", y = "Experience Level") +
  theme_minimal() +
  theme(panel.grid = element_blank()) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(20, 20, 20, 20))


p <- ggplotly(gg) %>% config(doubleClickDelay = 1000)
p

# capture jitter data once persistent
df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                  nm = nodes$name, x1 = nodes$x, y1 = nodes$y)

xx <- lapply(1:nrow(filtered_data), function(j) {  # match jobs
  filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% 
    select(x, y)
  }) %>% bind_rows()
fd2 <- cbind(filtered_data, xx) %>% as.data.frame() # create matched jobs list w/ coord

cdt = list()  # for the customdata field

# retain order of points in lines' traces; this creates lines' traces
invisible(lapply(1:nrow(df3), function(j) {
  dt <- df3[j, ]                          # point the lines will originate from
  mtch <- fd2 %>% 
    filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
    select(Occ2) %>% unlist(use.names = F)
  nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
  if(nrow(nodes4) < 1) {
    p <<- p %>%                           # create trace so indices remain correct!
      add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
    return()                              # if no similar occupations
  }
  # create segment vectors for x and y
  xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
  ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
  
  # get row numbers of connected data
  vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
  cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
  p <<- p %>% 
    add_lines(x = xs, y = ys, visible = F)                # create lines
}))

p <- plotly_build(p)

p$x$data[[1]]$customdata <- cdt   # add customdata vectors to plot

Now, we're at where we would next add the call for onRender, but before we do that we need to add shapes.

p$x$layout$shapes = shp           # this is NEW!!! adding shapes here!
p                                 # inspect for expected

Now you can run the p %>% htmlwidgets::onRender(... of your choice.

Using the most recent option: