I am experiencing issues with the legend entries of a dendrogram. I am utilizing the code example provided here.
library(ggplot2)
library(ggdendro)
library(plyr)
library(zoo)
df <- USArrests # really bad idea to muck up internal datasets
labs <- paste("sta_", 1:50, sep = "") # new labels
rownames(df) <- labs # set new row names
cut <- 4 # Number of clusters
hc <- hclust(dist(df), "ave") # hierarchical clustering
dendr <- dendro_data(hc, type = "rectangle")
clust <- cutree(hc, k = cut) # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)
# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend > cut.height, 1, dendr$segments$line)
# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <- ifelse(dendr$segments$line == 1, 1,
ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster)
# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")
# Plot the dendrogram
ggplot() +
geom_segment(data = segment(dendr),
aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)),
lineend = "square", show.legend = FALSE) +
scale_colour_manual(values = c("grey60", rainbow(cut)),
labels = c("remove this entry", paste("cluster", (1:cut)))) +
scale_size_manual(values = c(0.5, 0.5)) +
geom_text(data = label(dendr), aes(x, y, label = label, colour = factor(cluster)),
hjust = -0.2, size = 3, key_glyph = "rect") +
scale_y_reverse(expand = c(0.2, 0)) +
labs(x = NULL, y = "Height") +
coord_flip() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid = element_blank())
I am receiving one more entry than the number of clusters. The purpose of the line scale_colour_manual(values = c("grey60", rainbow(cut)))
is to color the clusters only.
Adding
scale_colour_manual(values = c("grey60", rainbow(cut)),
breaks = c(factor(1:cut+1)),
labels = c(paste("cluster", (1:cut))))
does not work as intended.
Is there a way to remove the grey entry?
Yes, just pass the values you want to keep to the limits
argument of scale_color_manual
:
ggplot() +
geom_segment(data = segment(dendr),
aes(x, y, xend = xend, yend = yend, size = factor(line),
colour = factor(cluster)),
lineend = "square", show.legend = FALSE) +
scale_colour_manual('Cluster', limits = factor(2:5), values = rainbow(cut),
labels = paste('cluster', 1:4)) +
scale_size_manual(values = c(0.5, 0.5)) +
geom_text(data = label(dendr), aes(x, y, label = label,
colour = factor(cluster)),
hjust = -0.2, size = 3, key_glyph = "rect") +
scale_y_reverse(expand = c(0.2, 0)) +
labs(x = NULL, y = "Height") +
coord_flip() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white"),
panel.grid = element_blank())