Search code examples
rshinyk-meansdensity-plotgeom-vline

Adding vlines to a density plot in Shiny


I am trying to add vlines to a density plot in shiny for R. For reproducible purposes, I will use the iris data set. The data is clustered using kmeans from the cluster package. Outliers are located by measuring distances from the centers of clusters. Then I wish to create a density plot that has vlines and labels for each of the outliers.

The code works outside of shiny. Inside my shiny app, I get an error message: Error in [: invalid subscript type 'list'. I have tried unlist and making the outliers a variety of other forms and still get the list error. How do I add the vlines?

Global

library(tidyverse)
library(cluster)
library(shiny)

require(iris)

UI

ui <- fluidPage({
  pageWithSidebar(
    headerPanel('Iris k-means clustering'),
    sidebarPanel(
      numericInput('clusters', 'Cluster count', 3, min = 1, max = 9)
    ),
    mainPanel(
      plotOutput('plot1')
    )
  )
})

Server

server <- function(input, output){
  
  ClusterData <- reactive({
    iris[,1:4]
  })
  
  # need to keep row numbers for outlier labels
  ClusterData2 <- reactive({
    ClusterData2 <- data.frame(ClusterData())
    
    row.names(ClusterData2) <- 1:nrow(ClusterData2)
    
    return(ClusterData2)
  })
  

  # scale the iris data
  ScaledData <- reactive({
    scale(ClusterData2())
  })
  
  # kmeans clustering
  final <- reactive({
    kmeans(ScaledData(), 3, nstart = 25)
  })
  
  # find centers
  states.centers <- reactive({
    final()$centers[final()$cluster, ]
  })
  
  # find outliers
  distances <- reactive({
    sqrt(rowSums((ScaledData() - states.centers())^2))
  })
  
  # bind distances back to data
  outliers <- reactive({
    cbind(ClusterData(), Distance = distances())
  })
  
  # bind cluster number to data
  clusterMember <- reactive({
    cbind(outliers(), clusterNum = final()$cluster)
  })
  
  # turn into data frame
  clusterMember2 <- reactive({
    as.data.frame(clusterMember())
  })
  
  # find points that are their own cluster
  dist0 <- reactive({
    clusterMember() %>%
      filter(distances() == 0)
  })
  
  # arrange distances largest to smallest
  distArrange <- reactive({
    clusterMember() %>%
      arrange(desc(Distance))
  })
  
  # find top 5 outliers
  filtTop5 <- reactive({
    distArrange()[1:5,]
  })
  
  # bind outliers and single clusters together
  AllOutliers <- reactive({
    rbind(filtTop5(), dist0())
    
  })
  
  
  # for density plot: vlines and labels
  lines_amt <- reactive({
    data.frame(vlines = (clusterMember2()$Petal.Length[c(AllOutliers())]), labels = c(AllOutliers()))
  })
  
  
  
  ########## output
  output$plot1 <- renderPlot({
    ClusterData() %>%
      ggplot(aes(x = Petal.Length)) +
      geom_density(fill = "blue", alpha = 0.4) #+
    geom_vline(data = lines_amt(), aes(xintercept = get(vlines)))
  })
  
  
}

Run App

shinyApp(ui, server)

Thank you.

The plot should look like the link.

PetalLengthDensityPlot


Solution

  • I think I have it all sorted out. Essentially I removed your lines_amt() reactive, and replaced geom_vline(), while also adding an annotate() for the labels:

    library(tidyverse)
    library(cluster)
    library(shiny)
    
    ui <- fluidPage({
      pageWithSidebar(
        headerPanel('Iris k-means clustering'),
        sidebarPanel(
          numericInput('clusters', 'Cluster count', 3, min = 1, max = 9)
        ),
        mainPanel(
          plotOutput('plot1')
        )
      )
    })
    
    server <- function(input, output){
      
      ClusterData <- reactive({
        iris[,1:4]
      })
      
      # need to keep row numbers for outlier labels
      ClusterData2 <- reactive({
        ClusterData2 <- data.frame(ClusterData())
        
        row.names(ClusterData2) <- 1:nrow(ClusterData2)
        
        return(ClusterData2)
      })
      
      
      # scale the iris data
      ScaledData <- reactive({
        scale(ClusterData2())
      })
      
      # kmeans clustering
      final <- reactive({
        kmeans(ScaledData(), 3, nstart = 25)
      })
      
      # find centers
      states.centers <- reactive({
        final()$centers[final()$cluster, ]
      })
      
      # find outliers
      distances <- reactive({
        sqrt(rowSums((ScaledData() - states.centers())^2))
      })
      
      # bind distances back to data
      outliers <- reactive({
        cbind(ClusterData(), Distance = distances())
      })
      
      # bind cluster number to data
      clusterMember <- reactive({
        cbind(outliers(), clusterNum = final()$cluster)
      })
      
      # turn into data frame
      clusterMember2 <- reactive({
        as.data.frame(clusterMember())
      })
      
      # find points that are their own cluster
      dist0 <- reactive({
        clusterMember() %>%
          filter(distances() == 0)
      })
      
      # arrange distances largest to smallest
      distArrange <- reactive({
        clusterMember() %>%
          arrange(desc(Distance))
      })
      
      # find top 5 outliers
      filtTop5 <- reactive({
        distArrange()[1:5,]
      })
      
      # bind outliers and single clusters together
      AllOutliers <- reactive({
        rbind(filtTop5(), dist0())
        
      })
      
      
      # for density plot: vlines and labels #Made the lines and labels without this
      # lines_amt <- reactive({
      #   data.frame(vlines = (clusterMember2()$Petal.Length[c(AllOutliers()$Petal.Length)]), labels = c(AllOutliers()))
      # })
      
      
      
      ########## output
      output$plot1 <- renderPlot({
        ClusterData() %>%
          ggplot(aes(x = Petal.Length)) +
          geom_density(fill = "blue", alpha = 0.4) +
          # geom_vline(xintercept = lines_amt()$vlines)
          geom_vline(xintercept = AllOutliers()$Petal.Length)+ #Used this in place of your current geom_vline() 
          annotate("text", x = AllOutliers()$Petal.Length, #Added this to add the text
          y = 0,
          label = rownames(AllOutliers()),
          hjust = -1,
          vjust = -1)
        
        # geom_vline(data = lines_amt(), aes(xintercept = get(vlines)))
      })
      
      
    }
    
    shinyApp(ui, server)
    

    Hopefully this helps!