Search code examples
rggplot2ggplotly

Graph for user login time in r


So I have three columns, ID, When user logged on, and for how long he was logged on (in seconds). here is a sample from it (total user are more than 4000):

structure(
  list(
    id = c(
      "id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
      "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
    ),
    time_start = structure(
      c(
        37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
        38466, 30683, 38412, 30643, 29865, 30056, 31727
      ),
      class = c("hms", "difftime"), units = "secs"
    ),
    time_sec = c(
      3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
      3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
    )
  ),
  .Names = c("id", "time_start", "time_sec"),
  row.names = c(NA, -17L),
  class = c("tbl_df", "tbl", "data.frame")
)

I am trying to create a graph to show how many users are in at any given time, with X intervals (like 5 minutes).
Like at 9:00 clock there were X number of users
at 9:05 there were x number of users.
A histogram or something similar on how many are connected at any given time.
My main problem is removing the users when they log off.

I feel the answer scratching at me, but can't find it and no idea what precisely to search for.


Solution

  • Here is simple a proposition

    library(ggplot2)
    library(RColorBrewer)
    
    myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))
    
    data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", 
                                  "id_8", "id_9", "id_10", "id_11", "id_12", "id_13", 
                                  "id_14", "id_15", "id_16", "id_17"), 
                           time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588, 
                                                    31303, 37312, 30849, 38466, 30683, 38412, 30643, 
                                                    29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"), 
                           time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 
                                        3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)), 
                      .Names = c("id", "time_start", "time_sec"), 
                      row.names = c(NA, -17L), 
                      class = c("tbl_df", "tbl", "data.frame"))
    
    data$id <- factor(data$id, levels = data$id) # ordonate ID factor
    
    ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
      theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
      scale_y_discrete("Users") + scale_x_time("Connexion time")
    

    EDIT: After understanding your question, here is one simple way to do what I think you are looking for.

    time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
    sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))
    
    data2 <- data.frame(time=time, sum=sum)
    
    ggplot(data2) + geom_line(aes(x=time, y=sum)) + 
      scale_x_time() + theme_bw()