I have the following dataframe and I want to generate a scatter plot with points ordered by date.
structure(list(cluster = c("uid_1", "uid_2", "uid_3", "uid_4",
"uid_5", "uid_6", "Cluster_07", "Cluster_07", "Cluster_07", "Cluster_07",
"Cluster_07", "uid_12", "uid_13", "Cluster_07", "Cluster_07",
"uid_16", "Cluster_07", "Cluster_07", "uid_19", "uid_20", "uid_21",
"uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27", "uid_28",
"uid_29", "Cluster_50", "uid_31", "uid_32", "uid_33", "uid_34",
"uid_35", "uid_36", "Cluster_50", "uid_38", "uid_39", "uid_40",
"Cluster_50"), name = c("E569", "E847", "E848", "E882", "E398",
"E443", "E462", "E970", "E1078", "E831", "E866", "E1047", "E964",
"E507", "E774", "E1106", "E943", "E1069", "E867", "E868", "E44",
"E60", "E869", "E482", "E125", "E126", "E114", "E123", "E81",
"E504", "E178", "E179", "E180", "E181", "E299", "E793", "E219",
"E182", "E183", "E184", "E650"), date = structure(c(18281, 18396,
18396, 18414, 18441, 18526, 18586, 18267, 18300, 18317, 18418,
18437, 18461, 18469, 18470, 18476, 18497, 18501, 18564, 18568,
18341, 18353, 18383, 18401, 18411, 18411, 18423, 18435, 18495,
18498, 18542, 18552, 18559, 18559, 18598, 18604, 18617, 18277,
18277, 18280, 18340), class = "Date"), group_id = c(3L, 8L, 19L,
28L, 30L, 31L, 1L, 1L, 1L, 1L, 1L, 4L, 5L, 1L, 1L, 6L, 1L, 1L,
7L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 2L, 20L,
21L, 22L, 23L, 24L, 25L, 2L, 26L, 27L, 29L, 2L)), row.names = c(NA,
-41L), groups = structure(list(cluster = c("Cluster_07", "Cluster_50",
"uid_1", "uid_12", "uid_13", "uid_16", "uid_19", "uid_2", "uid_20",
"uid_21", "uid_22", "uid_23", "uid_24", "uid_25", "uid_26", "uid_27",
"uid_28", "uid_29", "uid_3", "uid_31", "uid_32", "uid_33", "uid_34",
"uid_35", "uid_36", "uid_38", "uid_39", "uid_4", "uid_40", "uid_5",
"uid_6"), .rows = structure(list(c(7L, 8L, 9L, 10L, 11L, 14L,
15L, 17L, 18L), c(30L, 37L, 41L), 1L, 12L, 13L, 16L, 19L, 2L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 3L, 31L,
32L, 33L, 34L, 35L, 36L, 38L, 39L, 4L, 40L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -31L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I could generate the plot using the following code.
ggplot(subset_so, aes(reorder(date,factor(cluster)), reorder(factor(cluster),date))) +
geom_line(aes(group=cluster), color='black') +
geom_point(size=3) +
ylab("cluster") + xlab("date") +
theme_light() +
theme(
legend.position = "bottom",
legend.key=element_rect(fill='gray96'),
legend.title =element_text(size=10),
text=element_text(size=12),
axis.title.x = element_text(vjust = 0, size = 11),
axis.title.y = element_text(vjust = 2, size = 11),
axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
But what I want is, irrespective of other points, y axis should sort by date based on the first point in Cluster_07 and Cluster_50.
For example,
Cluster_07 has the earliest date so should be at the bottom-most on the y-axis. Similarly Cluster_50 should be placed before uid_21 in the y-axis.
May I know, if there is a way to do this? Many thanks in advance!
It sounds like you want to sort clusters based on their minimum date. We can do this by ungrouping (so the clusters can be compared to others) and then I like forcats::fct_reorder
where the three main parameters are 1) the vector to be ordered, 2) the variable to sort by, 3) the summary function to use.
library(dplyr); library(ggplot2)
subset_so %>%
ungroup() %>%
mutate(cluster = forcats::fct_reorder(cluster, date, min)) %>%
ggplot(aes(date, cluster)) +
geom_line(aes(group=cluster), color='black') +
geom_point(size=3) +
ylab("cluster") + xlab("date") +
theme_light() +
theme(
legend.position = "bottom",
legend.key=element_rect(fill='gray96'),
legend.title =element_text(size=10),
text=element_text(size=12),
axis.title.x = element_text(vjust = 0, size = 11),
axis.title.y = element_text(vjust = 2, size = 11),
axis.text.x = element_text(angle = 90, hjust = 1, size = 9),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())