Search code examples
rloopslapplyfrequency-distribution

is there a method to get frequency distribution different columns in R


The data are:

df1<-read.table(text=" Car1	Car2	Car3	Group1	Group2	Group3	Code1	Code2	Code3
N	M	M	A	A	A	B	B	B
Q	M	M	B	B	A	A	A	B
Q	N	Q	A	A	B	A	B	B
N	P	P	A	A	A	A	B	A
N	M	Q	A	B	A	B	A	A
M	Q	P	B	A	A	B	B	A
N	M	N	B	A	A	A	B	A
N	N	M	B	B	B	A	B	A
Q	Q	P	A	B	B	B	A	A
N	Q	M	A	B	A	B	A	A
",header=TRUE)

I want to get tables to show Group1 and Code 1 with Car1, Group2 and Code 2with Car2 and Group3 and Code 3 with Car3 for Group1 and Code 1 with Car1, I would get the following table for Car1:

Car1 Car_A Car_B Group_A Group_B
M    M     0     1       0       1
N    N     4     2       3       3
Q    Q     2     1       2       1

I want to loop to get 3 tables using for example lapply.

I have tried this, but I failed to get tables

df2<-lapply(1:3, function(i) as.data.frame.matrix(table(paste0('Car', i, ' ~ ', 'Group', i)), data = df1))


Solution

  • You can use lapply as you have already shown in your question.

    lapply(1:3, function(i) cbind(
        table(df1[,c(paste0('Car', i), paste0('Group', i))])
      , table(df1[,c(paste0('Car', i), paste0('Code', i))])))
    #[[1]]
    #  A B A B
    #M 0 1 0 1
    #N 4 2 3 3
    #Q 2 1 2 1
    #
    #[[2]]
    #  A B A B
    #M 2 2 2 2
    #N 1 1 0 2
    #P 1 0 0 1
    #Q 1 2 2 1
    #
    #[[3]]
    #  A B A B
    #M 3 1 2 2
    #N 1 0 1 0
    #P 2 1 3 0
    #Q 1 1 1 1
    

    And with names.

    lapply(1:3, function(i) {
        t1 <- table(df1[,c(paste0('Car', i), paste0('Group', i))])
        t2 <- table(df1[,c(paste0('Car', i), paste0('Code', i))])
        dimnames(t1) <- lapply(1:2, function(i) paste(names(dimnames(t1))[i], dimnames(t1)[[i]], sep="_"))
        dimnames(t2) <- lapply(1:2, function(i) paste(names(dimnames(t2))[i], dimnames(t2)[[i]], sep="_"))
        cbind(t1, t2)
    })
    #[[1]]
    #       Group1_A Group1_B Code1_A Code1_B
    #Car1_M        0        1       0       1
    #Car1_N        4        2       3       3
    #Car1_Q        2        1       2       1
    #
    #[[2]]
    #       Group2_A Group2_B Code2_A Code2_B
    #Car2_M        2        2       2       2
    #Car2_N        1        1       0       2
    #Car2_P        1        0       0       1
    #Car2_Q        1        2       2       1
    #
    #[[3]]
    #       Group3_A Group3_B Code3_A Code3_B
    #Car3_M        3        1       2       2
    #Car3_N        1        0       1       0
    #Car3_P        2        1       3       0
    #Car3_Q        1        1       1       1
    

    Or a solution changing df1

    df1 <- as.data.frame(sapply(names(df1)
      , function(i) paste(i ,df1[,i], sep="_")))
    lapply(1:3, function(i) cbind(
        table(df1[,c(paste0('Car', i), paste0('Group', i))])
      , table(df1[,c(paste0('Car', i), paste0('Code', i))])))
    #[[1]]
    #       Group1_A Group1_B Code1_A Code1_B
    #Car1_M        0        1       0       1
    #Car1_N        4        2       3       3
    #Car1_Q        2        1       2       1
    #
    #[[2]]
    #       Group2_A Group2_B Code2_A Code2_B
    #Car2_M        2        2       2       2
    #Car2_N        1        1       0       2
    #Car2_P        1        0       0       1
    #Car2_Q        1        2       2       1
    #
    #[[3]]
    #       Group3_A Group3_B Code3_A Code3_B
    #Car3_M        3        1       2       2
    #Car3_N        1        0       1       0
    #Car3_P        2        1       3       0
    #Car3_Q        1        1       1       1