So I have three columns, ID, When user logged on, and for how long he was logged on (in seconds). here is a sample from it (total user are more than 4000):
structure(
list(
id = c(
"id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9",
"id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17"
),
time_start = structure(
c(
37176, 30846, 30972, 38432, 31016, 37846, 30588, 31303, 37312, 30849,
38466, 30683, 38412, 30643, 29865, 30056, 31727
),
class = c("hms", "difftime"), units = "secs"
),
time_sec = c(
3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L, 3350L,
3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L
)
),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame")
)
I am trying to create a graph to show how many users are in at any given time, with X intervals (like 5 minutes).
Like at 9:00 clock there were X number of users
at 9:05 there were x number of users.
A histogram or something similar on how many are connected at any given time.
My main problem is removing the users when they log off.
I feel the answer scratching at me, but can't find it and no idea what precisely to search for.
Here is simple a proposition
library(ggplot2)
library(RColorBrewer)
myPalette <- colorRampPalette(brewer.pal(11, "Spectral"))
data <- structure(list(id = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7",
"id_8", "id_9", "id_10", "id_11", "id_12", "id_13",
"id_14", "id_15", "id_16", "id_17"),
time_start = structure(c(37176, 30846, 30972, 38432, 31016, 37846, 30588,
31303, 37312, 30849, 38466, 30683, 38412, 30643,
29865, 30056, 31727), class = c("hms", "difftime"), units = "secs"),
time_sec = c(3987L, 2720L, 2812L, 1729L, 1851L, 3484L, 1881L, 2295L, 3770L,
3350L, 3626L, 2525L, 3570L, 2795L, 3606L, 4495L, 2517L)),
.Names = c("id", "time_start", "time_sec"),
row.names = c(NA, -17L),
class = c("tbl_df", "tbl", "data.frame"))
data$id <- factor(data$id, levels = data$id) # ordonate ID factor
ggplot(data) + geom_segment(aes(y=id, yend=id, x=time_start, xend=time_start+time_sec, colour=time_sec), size=2) +
theme_bw() + scale_colour_gradientn("Duration", colours = rev(myPalette(100))) +
scale_y_discrete("Users") + scale_x_time("Connexion time")
EDIT: After understanding your question, here is one simple way to do what I think you are looking for.
time <- seq(from=min(data$time_start), to=max(data$time_start+data$time_sec), length.out=1000)
sum <- sapply(time, FUN=function(x) sum(data$time_start < x & data$time_start+data$time_sec > x))
data2 <- data.frame(time=time, sum=sum)
ggplot(data2) + geom_line(aes(x=time, y=sum)) +
scale_x_time() + theme_bw()