Search code examples
javascriptrsankey-diagramhtmlwidgetsnetworkd3

NetworkD3 sankey plot gives the wrong colors (user-defined colors)


I created a Sankey diagram with the function sankeyNetwork() from the package networkD3. I defined the colors for each node manually with the argument "colourScale" but the plot shows the wrong color for the node named "Solicitação reprovada pelo banco folha" (it is green, but it should be red). Does someone know why it is happening?

Image of the Sankey Diagram I created with the code

Above, is the code I used to create the Sankey diagram:

# Packages
library(data.table)
library(tidyverse)
library(networkD3)

# Load data
df_portabilidade<- structure(list(Motivos = c("Aceite Compulsório", "Solicitação Aprovada pelo Banco Folha", 
                                              "Solicitação Reprovada pelo Banco Folha", "CNPJ do Empregador Não Encontrado", 
                                              "CPF Não encontrado", "CNPJ e CPF divergentes", "Desistência do Cliente", 
                                              "Conta Informada não permite transferência", "Conta salário não aberta", 
                                              "Falta de clareza na prestação da informação", "Conta pagamento benefício —INSS", 
                                              "Beneficiário não solicitou a portabilidade", "Cancelado pela Instituição Destino", 
                                              "Desistência do Cliente", "Por motivos internos da institução"
), Motivo_reprovacao = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
                         1L, 1L, 1L, 0L, 0L, 0L), Motivo_cancelamento = c(0L, 0L, 0L, 
                                                                          0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L), `2018` = c(96768L, 
                                                                                                                                      458497L, 402252L, 161666L, 59793L, 99905L, 45221L, 31754L, 2409L, 
                                                                                                                                      447L, 136L, 921L, 4594L, 2707L, 1887L), `2019` = c(193461L, 1350977L, 
                                                                                                                                                                                         1270182L, 497962L, 244534L, 354402L, 108767L, 31263L, 20647L, 
                                                                                                                                                                                         8966L, 1893L, 1748L, 23898L, 17247L, 6651L), `2020` = c(303995L, 
                                                                                                                                                                                                                                                 1461167L, 1742102L, 702771L, 423935L, 423647L, 116190L, 38641L, 
                                                                                                                                                                                                                                                 17981L, 14062L, 3514L, 1361L, 33301L, 28975L, 4326L), `2021` = c(140768L, 
                                                                                                                                                                                                                                                                                                                  2481335L, 2570585L, 1185654L, 712178L, 361361L, 158192L, 62442L, 
                                                                                                                                                                                                                                                                                                                  43611L, 39232L, 6275L, 1640L, 57684L, 51889L, 5795L), `2022` = c(89485L, 
                                                                                                                                                                                                                                                                                                                                                                                   781028L, 827931L, 366201L, 230655L, 53996L, 69338L, 29363L, 57967L, 
                                                                                                                                                                                                                                                                                                                                                                                   17735L, 2188L, 488L, 17589L, 15698L, 1891L)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                               -15L), class = c("data.table", "data.frame"))

# Create the "links" dataframe
df_links<- df_portabilidade %>%
  mutate("value"=`2018`+`2019`+`2020`+`2021`+`2022`) %>% 
  select(Motivos,value) %>% 
  
## Create the columns "source" and "target"  
  mutate("source"= c(0,0,0,3,3,3,3,3,3,3,3,3,0,13,13),
         "target"= c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15))

# Create the "nodes" dataframe
df_nodes<- df_portabilidade %>% 
  select(Motivos) %>% 
  add_row("Motivos" ="Solicitações de Portabilidade", .before = 1)
  

# prepare color scale: I give one specific color for each node.
#Colors:
#blue: #069CCA
#red: #EB3A0E
#green: #19B704

colors <- 'd3.scaleOrdinal([
"#069CCA",
"#19B704",
"#19B704",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E",
"#EB3A0E"])'



# Sankey diagram
sankey<- sankeyNetwork(Links = df_links, 
                       Nodes = df_nodes, 
                       Source = "source",
                       Target = "target", 
                       Value = "value", 
                       NodeID = "Motivos",
                       units = "TWh",
                       fontSize = 12, 
                       nodeWidth = 30,
                       colourScale = colors)
sankey

Solution

  • Everything after the first space in your node names is dropped when comparing them as groups. Add a new group name column with no spaces and specify that with the NodeGroup parameter...

    df_nodes$node_group <- gsub(" ", "_", df_nodes$Motivos)
    
    # Sankey diagram
    sankey<- sankeyNetwork(Links = df_links, 
                           Nodes = df_nodes, 
                           Source = "source",
                           Target = "target", 
                           Value = "value", 
                           NodeID = "Motivos",
                           NodeGroup = "node_group",
                           units = "TWh",
                           fontSize = 12, 
                           nodeWidth = 30,
                           colourScale = colors)
    sankey