Search code examples
rdatatabledata-manipulationformattable

Create table via R (clusters)


I would like to make an table for my problem via . The code is bellow:

#database
df<-structure(list(Latitude = c(-23.8, -23.8, -23.9, -23.9, -23.9,  -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, 
+ -23.9, -23.9, -23.9, -23.9, -23.9), Longitude = c(-49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.7, 
+ -49.7, -49.7, -49.7, -49.7, -49.6, -49.6, -49.6, -49.6), Waste = c(526, 350, 526, 469, 285, 175, 175, 350, 350, 175, 350, 175, 175, 364, 
+ 175, 175, 350, 45.5, 54.6)), class = "data.frame", row.names = c(NA, -19L))
   
Q1<-matrix(quantile(df$Waste, probs = 0.25))
df_Q1<-subset(df,Waste>Q1[1])
#cluster
d<-dist(df_Q1)
fit.average<-hclust(d,method="average")
clusters<-cutree(fit.average,k=4)
df_Q1$cluster<-clusters
     
dc<-aggregate(df_Q1[,"Waste"],list(cluster=clusters),sum)
colnames(dc)<-c("cluster","Sum_Waste")
dd<-aggregate(df_Q1[,"Waste"],list(cluster=clusters),mean)
colnames(dd)<-c("cluster","Mean_Waste")

Thanks!

New table

enter image description here


Solution

  • The Sum_Waste and Mean_Waste can be merged with your df_Q1 data frame for the final table.

    I was not sure what output you were considering, but one method with kableExtra will merge cells with identical values in designated columns. Hope this is helpful.

    Edit: Added properties to table. Rows are now sorted by cluster and properties.

    library(kableExtra)
    
    #database
    df<-structure(list(Latitude = c(-23.8, -23.8, -23.9, -23.9, -23.9,  -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, -23.9, 
                                    + -23.9, -23.9, -23.9, -23.9, -23.9), Longitude = c(-49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.6, -49.7, 
                                                                                        + -49.7, -49.7, -49.7, -49.7, -49.6, -49.6, -49.6, -49.6), Waste = c(526, 350, 526, 469, 285, 175, 175, 350, 350, 175, 350, 175, 175, 364, 
                                                                                                                                                             + 175, 175, 350, 45.5, 54.6)), class = "data.frame", row.names = c(NA, -19L))
    
    Q1<-matrix(quantile(df$Waste, probs = 0.25))
    df_Q1<-subset(df,Waste>Q1[1])
    df_Q1
    
    #cluster
    d<-dist(df_Q1)
    fit.average<-hclust(d,method="average")
    clusters<-cutree(fit.average,k=4)
    df_Q1$cluster<-clusters
    df_Q1$properties<-names(clusters)
    
    #calculate sum waste
    dc<-aggregate(df_Q1[,"Waste"],list(cluster=clusters),sum)
    colnames(dc)<-c("cluster","Sum_Waste")
    head(dc)
    
    #calculate mean waste
    dd<-aggregate(df_Q1[,"Waste"],list(cluster=clusters),mean)
    colnames(dd)<-c("cluster","Mean_Waste")
    head(dd)
    
    #merge everything
    df_table <- Reduce(merge, list(df_Q1, dc, dd))
    
    #make table
    kable(df_table[order(df_table$cluster, as.numeric(df_table$properties)),c(5,2,3,4,1,6,7)], align = "c", row.names = FALSE) %>%
      kable_styling(full_width = FALSE) %>%
      column_spec(1, bold = TRUE) %>%
      collapse_rows(columns = 5:7, valign = "middle")
    

    Edit (4/18/20)

    To aggregate properties in a single row, with comma separation, try:

    #sort properties
    df_table <- df_table[order(df_table$cluster, as.numeric(df_table$properties)),]
    
    #second table aggregated properties
    df_table2 <- aggregate(. ~ cluster + Sum_Waste + Mean_Waste, df_table[,c(1,5,6,7)], toString)
    
    #make table with df_table2
    kable(df_table2[order(df_table2$cluster), c(4,1,2,3)], align = "c", row.names = FALSE) %>%
      kable_styling(full_width = FALSE)