Search code examples
rcluster-analysisdendrogramdendextend

dendextend get_subdendrograms in R: changing label names causes subdendrograms to be NULL


I am trying to create subdendrograms using dendextend, this works until I change the names of the labels of the leaves in the dendrogram. I want the labels to be 'Y' or 'B' based on a column in my dataframe. If I do this it causes several of the subdendrograms to be NULL.

Here is some reproducible code

library(dplyr)
library(dendextend)

new_labels <- c("Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y")

new_colors <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 
                1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 8, 
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                1, 1, 1, 1)

# define dendrogram object to play with:
dend <- iris[, -5] %>%
  dist() %>%
  hclust() %>%
  as.dendrogram() %>%
  # change the color of the labels
  set('labels_colors', new_colors[order.dendrogram(.)]) %>% 
  # change the labels to 'Y' or 'B'
  set('labels', new_labels[order.dendrogram(.)]) %>% 
  color_branches(k = 5)

dend_list <- get_subdendrograms(dend, 5)

# Plotting the result
par(mfrow = c(2, 3))
plot(dend, main = "Original dendrogram")
sapply(dend_list, plot)

If I run this without the line

set('labels', new_labels[order.dendrogram(.)]) %>%

then it works fine, in that I get a plot with the main dendrogram and 5 subdendrograms, and the labels are colored (black or grey).

However, if I run with this line

set('labels', new_labels[order.dendrogram(.)]) %>%

then I get NULL types in the dend_list, and only 1 of 5 subdendrograms is added to the plot. The one that is in the plot is formatted correctly with 'Y' and 'B' labels.

I have tried lots of things but I'm not sure how to fix this. Does anybody know why this might be happening?


Solution

  • I now have the answer (with some help from Bing Chat)

    The solution was to change the labels of the main dendrogram, and of the sub dendrograms after the sub dendrograms were extracted. Updated code:

    library(dplyr)
    library(dendextend)
    
    new_labels <- c("Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                    "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
                    "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y")
    
    new_colors <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 
                    1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 8, 
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                    8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                    1, 1, 1, 1)
    
    # define dendrogram object to play with:
    dend <- iris[, -5] %>%
      dist() %>%
      hclust() %>%
      as.dendrogram() %>%
      # color the labels according to their values
      set('labels_colors', new_colors[order.dendrogram(.)]) %>% 
      color_branches(k = 5)
    
    dend_list <- get_subdendrograms(dend, 5)
    # after extracting the sub dendrograms: add labels to the original dend
    labels(dend) <- paste(new_labels[order.dendrogram(dend)])
    # and add labels to the sub dendrograms
    dend_list <- lapply(dend_list, function(x) set(x, 'labels', new_labels[order.dendrogram(x)]))
    
    # Plotting the result
    par(mfrow = c(2, 3))
    plot(dend, main = "Original dendrogram")
    sapply(dend_list, plot)