Search code examples
rggplot2visualizationggplotly

How to add background colours with geom_gitter and geom based on x and y axis (following question from another)


This is the following question from another question: How to add links to similar jobs on ggplot points?

==============================

Hi,

I've created the visualization with the help of Kat from the previous question above. What I want to do is to add background colors to this visualization below.

# Create nodes dataframe with x and y coordinates
nodes <- data.frame(
  name = all_occupations,
  x = factor(jobType$Job_type[match(all_occupations, jobType$Occupation)], levels = all_job_types),
  y = factor(experience$Strata.Level[match(all_occupations, experience$Occupation)], levels = experience_levels)
)

# Remove rows with missing x or y values
nodes <- nodes[complete.cases(nodes$x, nodes$y), ]

# Remove rows with identical Occupation1 and Occupation2
filtered_data <- filtered_data[!(filtered_data$Occ1 == filtered_data$Occ2), ]

gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
  geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") +
  labs(x = "Job Type", y = "Experience Level") +
  theme_minimal() +
  theme(panel.grid = element_blank()) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(20, 20, 20, 20))

p <- ggplotly(gg) %>% config(doubleClickDelay = 1000) 

# capture jitter data
df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                  nm = nodes$name, x1 = nodes$x, y1 = nodes$y)


 xx <- lapply(1:nrow(filtered_data), function(j) { filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% select(x, y)}) %>% bind_rows(); fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

# create a simulation of jobs that match
invisible(lapply(1:nrow(df3), function(j) {
  dt <- df3[j, ]                          # point the lines will originate from
  mtch <- fd2 %>% 
    filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
    select(Occ2) %>% unlist(use.names = F)
  nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
  if(nrow(nodes4) < 1) {
    p <<- p %>%                           # create trace so indices remain correct!
      add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
    return()                              # if no similar occupations
  }
  # create segment vectors for x and y
  xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
  ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
  
  # get row numbers of connected data
  vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
  cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
  p <<- p %>% 
    add_lines(x = xs, y = ys, visible = F)                # create lines
}))

p


p %>% htmlwidgets::onRender(
  "function(el, x) {
    nms = ['curveNumber', 'pointNumber'];
    coll = [];                                      
    giveMe = [];                                 
    oArr = el.data[0];                 
    redu = function(val, arr) {                 
      return arr.reduce((these, those) => {
        return Math.abs(those - val) < Math.abs(these - val) ? those : these;
      });
    }
    closest = function(xval, yval) { /* p.xvals/yvals from pt data; arr is x/y data obj */
      /* id nearest x and nearest y, make sure they match, if no match, take larger index */
      xpt = redu(xval, oArr.x);           /* get closest data point for x axis*/
      ypt = redu(yval, oArr.y);           /* get closest data point for y axis*/
      xi = oArr.x.indexOf(xpt);           /* get index value for x data point */
      yi = oArr.x.indexOf(ypt);           /* get index value for x data point */
      return xi > yi ? xi : yi;          /* if the indices != return larger # */
    }
    el.on('plotly_hover', function(p) {
      pt = p;                                   /* global: for use in unhover */
    })
    el.on('plotly_unhover', function(p) {       /* create persistent tooltips */
      if(coll.length > 0){           /* if click occurred else no persistence */
        if(giveMe.length < 1) return;   /* are there lines connecting points? */
        if(!Array.isArray(giveMe)) giveMe = [giveMe]; /* make sure its an array */
        whatNow = closest(pt.xvals[0], pt.yvals[0]);  /* mouse on connected point? */
        if(giveMe.includes(whatNow)) {    /* if hover pointIndex is connected */
          coll[1] = whatNow;         /* add connected point to array for tips */
          hvr = [];                     /* clear array for curve & point list */
          for(ea in coll) {                       /* create list for hovering */ 
            var oj = {}; oj[nms[0]] = 0; 
            oj[nms[1]] = coll[ea]; 
            hvr.push(oj);
          }
        } else {
          hvr = [{'curveNumber': 0, 'pointNumber': coll[0]}]; /* if coll, create tooltip */
        }
        Plotly.Fx.hover(el, hvr);                      /* persistent tooltips */
      } 
    })
    el.on('plotly_click', function(p) {     /* create persistent lines upon click */
                                          /* if any lines already vis-- hide them */
      Plotly.restyle(el, {'visible': false}, pt.xaxes[0]._traceIndices.slice(1,));
      giveIt = p.points[0].pointIndex;  /* capture scatter index for curve number */
      if(p.points[0].customdata) {
        giveMe = p.points[0].customdata;       /* get point's array of customdata */
      } else {giveMe = []}
      coll[0] = giveIt;                   /* collect index for persistent tooltip */
      Plotly.restyle(el, {'visible': true}, [giveIt + 1]);
    })
    el.on('plotly_doubleclick', function(p) { /* remove lines & pers tooltips */
      Plotly.restyle(el, {'visible': false}, pt.xaxes[0]._traceIndices.slice(1,));
      coll = [];      /* reset arrays, until next double click */
      giveMe = [];
    }) 
  }")

plot1

I've tried this method, but there are some issues with the colors.

# Create a new data frame for the background
background_data <- expand.grid(x = levels(nodes$x), y = levels(nodes$y))
background_data$color_group <- interaction(background_data$x, background_data$y)

# Define colors vector
colors <- c("#FFFFE0", "#FFEF00", "#FFDb66", "#e4d99d", "#FFFF33", "#FFEF55", "#FFDc99", "#e4d99e", "#FFFF99", "#FFEE55", "#FFDd99", "#e4d99d", "#FFFF66", "#FFEA00", "#FFDF00", "#e4d00a", "#FFFF33", "#FFEB00", "#FFDa66", "#e4d55b")

gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
  labs(x = "Job Type", y = "Experience Level") +
  theme_minimal() +
  theme(panel.grid = element_blank()) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(20, 20, 20, 20))

# Create the plot with geom_tile for background colors
gg_with_background <- gg +
  geom_tile(data = nodes, aes(x = x, y = y, fill = paste(x, y)), alpha = 0.5, color = "white", show.legend = FALSE) +
  scale_fill_manual(values = colors) +  # Use the manual colors here
  guides(fill = FALSE)

# Add the points with geom_jitter()
gg_with_jitters <- gg_with_background +
  geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") 
  
# Print the final combined plot
print(gg_with_jitters)


p <- ggplotly(gg_with_jitters) %>% config(doubleClickDelay = 1000) 

# capture jitter data
df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                  nm = nodes$name, x1 = nodes$x, y1 = nodes$y)


 xx <- lapply(1:nrow(filtered_data), function(j) { filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% select(x, y)}) %>% bind_rows(); fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

fd2 <- cbind(filtered_data, xx) %>% as.data.frame()

# create a simulation of jobs that match
invisible(lapply(1:nrow(df3), function(j) {
  dt <- df3[j, ]                          # point the lines will originate from
  mtch <- fd2 %>% 
    filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
    select(Occ2) %>% unlist(use.names = F)
  nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
  if(nrow(nodes4) < 1) {
    p <<- p %>%                           # create trace so indices remain correct!
      add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
    return()                              # if no similar occupations
  }
  # create segment vectors for x and y
  xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
  ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
  
  # get row numbers of connected data
  vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
  cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
  p <<- p %>% 
    add_lines(x = xs, y = ys, visible = F)                # create lines
}))

...same codes as the above...

Plot2

There are two problems.

  1. There are 5 white background colors that are not supposed to be there. I guess it is because there are not jitters. However, it showed me the correct colors with different codes.
  2. When attempting to #capture jitter data with the following code:

df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, nm = nodes$name, x1 = nodes$x, y1 = nodes$y) I encountered this error message:

"Error in data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, nm = nodes$name, : arguments imply differing numbers of rows: 5, 124."

Due to this error, I am unable to proceed with creating lines. I'm unsure if the other codes below the df3 would work or not, since df3 is not running.

Do you have any ideas?

P.S. I have an optional question. It is not necessary, but if you know the answer, please help me.

Is it possible to have animations when lines are appearing with clicking? For example, if there are 6 lines, I want the lines to appear not all together but one by one, and the lines should be connected smoothly, not suddenly.

What I mean by 'smooth' is something like this: Currently, the line connects two jobs right away: 0 --------- 0

What I want is connecting them like this(fast speed): 0 - 0 0 ---- 0 0 ------ 0 0 ---------0


Solution

  • Alright, this is what I propose.

    Create ggplotly object with both the new plot (with geom_tile) and the original plot.

    The plot with geom_tile will be used for extracting data only.

    • from the ggplot object, we'll capture color by panel (square) data
    • from the ggplotly object we'll capture the rgb for each hex
    • using these new objects, we'll create a list of Plotly shapes to replace what would be traces that represent geom_tile

    For some reason, you have to use rgb in Plotly shapes, while this restriction doesn't appear to exist anywhere else in Plotly.

    Using the original plot, create all of the other objects. After they are created and the customdata is added, you'll replace the existing shapes with those you created. (When you use ggplotly there is always a shapes object added--even if it represents nothing as in this plot...odd, I know.)

    First, capturing the data from the new plot and creating the list of shapes.

    library(tidyverse)
    library(plotly)
    
    # nodes data from original question
    # colors, background_data from this question
    # filtered_data from original question
    
    # plot with geom_tile
    gg1 <- ggplot() +          # retained for panel data and rgb
      geom_tile(data = background_data, 
                aes(x = x, y = y, fill = color_group), color = "white", alpha = .5) + 
      geom_jitter(data = nodes, aes(x = x, y = y, text = name), 
                  width = 0.2, height = 0.2, size = 1, color = "steelblue")
    
    p1 <- ggplotly(gg1) # create plotly so that rgb's are calculated for you
    crgb <- invisible(lapply(1:length(p1$x$data), function(i) { # extract rgb (not hex)
      p1$x$data[[i]]$fillcolor
    })) %>% unlist()
    p1
    # capture color assignments
    gco <- ggplot_build(gg1)$data[[1]][, 1:3] # fill, x, y
    
    # create background shapes for Plotly (instead of using geom_tile)
    shp <- lapply(1:nrow(background_data), function(k) {
      list(type = "rect", fillcolor = crgb[k],      # in shapes you have to use rgb (why!?!??)
           xref = "paper", yref = "paper", layer = "below",
           opacity = .6, line = list(width = .001), # essentially, make line invisible
           x0 = 1/length(unique(background_data$x)) * (gco[k, ]$x - 1), # using paper space
           x1 = 1/length(unique(background_data$x)) * gco[k, ]$x,
           y0 = 1/length(unique(background_data$y)) * (gco[k, ]$y - 1),
           y1 = 1/length(unique(background_data$y)) * gco[k, ]$y)
    })
    

    Now, back to the plot created in your last question. (This is unchanged from your previous question and my answer to that question.)

    gg <- ggplot(nodes, aes(x = x, y = y, text = paste0("Selected Jobs: ", name))) +
      geom_jitter(width = 0.2, height = 0.2, size = 1, color = "steelblue") +
      labs(x = "Job Type", y = "Experience Level") +
      theme_minimal() +
      theme(panel.grid = element_blank()) +
      coord_cartesian(clip = "off") +
      theme(plot.margin = margin(20, 20, 20, 20))
    
    
    p <- ggplotly(gg) %>% config(doubleClickDelay = 1000)
    p
    
    # capture jitter data once persistent
    df3 <- data.frame(x = p$x$data[[1]]$x, y = p$x$data[[1]]$y, 
                      nm = nodes$name, x1 = nodes$x, y1 = nodes$y)
    
    xx <- lapply(1:nrow(filtered_data), function(j) {  # match jobs
      filter(nodes, nodes$name == filtered_data[j, ]$Occ1) %>% 
        select(x, y)
      }) %>% bind_rows()
    fd2 <- cbind(filtered_data, xx) %>% as.data.frame() # create matched jobs list w/ coord
    
    cdt = list()  # for the customdata field
    
    # retain order of points in lines' traces; this creates lines' traces
    invisible(lapply(1:nrow(df3), function(j) {
      dt <- df3[j, ]                          # point the lines will originate from
      mtch <- fd2 %>% 
        filter(x == dt$x1, y == dt$y1, Occ1 == dt$nm) %>%  # matching occ2
        select(Occ2) %>% unlist(use.names = F)
      nodes4 <- df3[df3$nm %in% mtch, ]       # extract matched x, y positions
      if(nrow(nodes4) < 1) {
        p <<- p %>%                           # create trace so indices remain correct!
          add_lines(x = rep(df3[j, ]$x, 2), y = rep(df3[j, ]$y, 2), visible = F)                      # create lines
        return()                              # if no similar occupations
      }
      # create segment vectors for x and y
      xs <- lapply(1:nrow(nodes4), function(m) {c(dt$x, nodes4[m, ]$x, NA)}) %>% unlist()
      ys <- lapply(1:nrow(nodes4), function(m) {c(dt$y, nodes4[m, ]$y, NA)}) %>% unlist()
      
      # get row numbers of connected data
      vect <- which(df3$x %in% nodes4$x & df3$y %in% nodes4$y)
      cdt[[j]] <<- vect - 1 # 0 ind in JS, so subtract one from every value
      p <<- p %>% 
        add_lines(x = xs, y = ys, visible = F)                # create lines
    }))
    
    p <- plotly_build(p)
    
    p$x$data[[1]]$customdata <- cdt   # add customdata vectors to plot
    

    Now, we're at where we would next add the call for onRender, but before we do that we need to add shapes.

    p$x$layout$shapes = shp           # this is NEW!!! adding shapes here!
    p                                 # inspect for expected
    

    Now you can run the p %>% htmlwidgets::onRender(... of your choice.

    Using the most recent option:

    enter image description here