Search code examples
rjsonsparse-matrixcosine-similaritytibble

Converting a Cosine Similarity Matrix to JSON in R


I have a Cosine similarity matrix (csm), which looks like this:

 csm<-matrix( c(1,0,0.4,0.6, 0,0,1, 0.1,0.4,0.7,0.4,0.1,1,0.9,0.5,0.6,0.4,0.9,1,0.6,0,0.7,0.5,0.6,1),nrow=5,ncol=5)
 rownames(csm) <- c("AAB","AAC","AAD","AAE","AAF")
 colnames(csm) <- c("AAB","AAC","AAD","AAE","AAF")

csm
    AAB   AAC   AAD   AAE    AAF
AAB 1     0     0.4   0.6    0
AAC 0     1     0.1   0.4    0.7
AAD 0.4   0.1   1     0.9    0.5
AAE 0.6   0.4   0.9   1      0.6
AAF 0     0.7   0.5   0.6    1

My goal is to build a JSON file that looks like this:

    {"AAB":[{"ID":"AAB","value":1},{"ID":"AAC","value":0},{"ID":"AAD","value":0.4},{"ID":"AAE","value":0.6},{"ID":"AAF","value":0}],
     "AAC":[{"ID":"AAB","value":0},{"ID":"AAC","value":1},{"ID":"AAD","value":0.1},{"ID":"AAE","value":0.4},{"ID":"AAF","value":0.7}],
     "AAD":[{"ID":"AAB","value":0.4},{"ID":"AAC","value":0.3},{"ID":"AAD","value":1},{"ID":"AAE","value":0.9},{"ID":"AAF","value":0.5}],
     "AAE":[{"ID":"AAB","value":0.6},{"ID":"AAC","value":0.4},{"ID":"AAD","value":0.5},{"ID":"AAE","value":1},{"ID":"AAF","value":0.6}],
     "AAF":[{"ID":"AAB","value":0},{"ID":"AAC","value":0},{"ID":"AAD","value":0.7},{"ID":"AAE","value":0},{"ID":"AAF","value":1}]}

What I have done is this:

csm %>% as_tibble() %>% 
    gather( ID, value) %>% 
    complete(ID, value)%>%
    jsonlite::toJSON()

What I am having is :

[{"ID":"AAB","value":0},{"ID":"AAB","value":0},{"ID":"AAB","value":0.1},{"ID":"AAB","value":0.3},{"ID":"AAB","value":0.4},...

Could someone help? Thank you!


Solution

  • Here is a base R solution using stack

    library(jsonlite)
    toJSON(lapply(split(df2 <- data.frame(ID = rownames(df), stack(df)), df2$ind), function(x)
        x[, -3]))
    #{"AAB":[{"ID":"AAB","values":1},{"ID":"AAC","values":0},{"ID":"AAD","values":0.4},{"ID":"AAE","values":0.6},{"ID":"AAF","values":0}],"AAC":[{"ID":"AAB","values":0},{"ID":"AAC","values":1},{"ID":"AAD","values":0.1},{"ID":"AAE","values":0.4},{"ID":"AAF","values":0.7}],"AAD":[{"ID":"AAB","values":0.4},{"ID":"AAC","values":0.1},{"ID":"AAD","values":1},{"ID":"AAE","values":0.9},{"ID":"AAF","values":0.5}],"AAE":[{"ID":"AAB","values":0.6},{"ID":"AAC","values":0.4},{"ID":"AAD","values":0.9},{"ID":"AAE","values":1},{"ID":"AAF","values":0.6}],"AAF":[{"ID":"AAB","values":0},{"ID":"AAC","values":0.7},{"ID":"AAD","values":0.5},{"ID":"AAE","values":0.6},{"ID":"AAF","values":1}]}
    

    Or the tidyverse way

    df %>%
        rownames_to_column("row") %>%
        gather(ID, value, -row) %>%
        split(.$row) %>%
        map(~.x[, -1]) %>%
        toJSON()
    #{"AAB":[{"ID":"AAB","value":1},{"ID":"AAC","value":0},{"ID":"AAD","value":0.4},{"ID":"AAE","value":0.6},{"ID":"AAF","value":0}],"AAC":[{"ID":"AAB","value":0},{"ID":"AAC","value":1},{"ID":"AAD","value":0.1},{"ID":"AAE","value":0.4},{"ID":"AAF","value":0.7}],"AAD":[{"ID":"AAB","value":0.4},{"ID":"AAC","value":0.1},{"ID":"AAD","value":1},{"ID":"AAE","value":0.9},{"ID":"AAF","value":0.5}],"AAE":[{"ID":"AAB","value":0.6},{"ID":"AAC","value":0.4},{"ID":"AAD","value":0.9},{"ID":"AAE","value":1},{"ID":"AAF","value":0.6}],"AAF":[{"ID":"AAB","value":0},{"ID":"AAC","value":0.7},{"ID":"AAD","value":0.5},{"ID":"AAE","value":0.6},{"ID":"AAF","value":1}]}
    

    Sample data

    df <- read.table(text =
        "    AAB   AAC   AAD   AAE    AAF
    AAB 1     0     0.4   0.6    0
    AAC 0     1     0.1   0.4    0.7
    AAD 0.4   0.1   1     0.9    0.5
    AAE 0.6   0.4   0.9   1      0.6
    AAF 0     0.7   0.5   0.6    1")