df1 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420"),text=c("asdf","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten"),response=c("y","y","y","n","n","y","y","n","n","y"))
overview of df1
id text response
1 632592651 asdf y
2 633322173 cat y
3 634703802 dog y
4 634927873 mouse n
5 635812953 elephant n
6 636004739 goose y
7 636101211 rat y
8 636157799 mice n
9 636263106 kitty n
10 636752420 kitten y
df2 <- data.frame(id=c("632592651","633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420","636809222","2004722036","2004894388","2005045755","2005535472","2005630542","2005788781","2005809679","2005838317","2005866692"),
text=c("asdf_xyz","cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten","tiger_xyz","lion","leopard","ostrich","kangaroo","platypus","fish","reptile","mammals","amphibians_xyz"),
volume=c("1234","432","324","333","2223","412346","7456","3456","2345","2345","6","345","23","2","4778","234","8675","3459","8","9"))
overview of df2
id text volume
1 632592651 asdf_xyz 1234
2 633322173 cat 432
3 634703802 dog 324
4 634927873 mouse 333
5 635812953 elephant 2223
6 636004739 goose 412346
7 636101211 rat 7456
8 636157799 mice 3456
9 636263106 kitty 2345
10 636752420 kitten 2345
11 636809222 tiger_xyz 6
12 2004722036 lion 345
13 2004894388 leopard 23
14 2005045755 ostrich 2
15 2005535472 kangaroo 4778
16 2005630542 platypus 234
17 2005788781 fish 8675
18 2005809679 reptile 3459
19 2005838317 mammals 8
20 2005866692 amphibians_xyz 9
Above are my two dataframes and I would like to have a new df3 that gives
df3 <- data.frame(id=c("633322173","634703802","634927873","635812953","636004739","636101211","636157799","636263106","636752420"),
text=c("cat","dog","mouse","elephant","goose","rat","mice","kitty","kitten"),
response=c("y","y","n","n","y","y","n","n","y"),
volume=c("432","324","333","2223","412346","7456","3456","2345","2345"))
overview of df3
id text response volume
1 633322173 cat y 432
2 634703802 dog y 324
3 634927873 mouse n 333
4 635812953 elephant n 2223
5 636004739 goose y 412346
6 636101211 rat y 7456
7 636157799 mice n 3456
8 636263106 kitty n 2345
9 636752420 kitten y 2345
and would like to obtain the differences between df1 and df2. Can someone give an advise how to approach this? Thanks.
You may try using inner_join
and anti_join
library(dplyr)
df1 %>%
inner_join(df2, by = c("id", "text"))
id text response volume
1 633322173 cat y 432
2 634703802 dog y 324
3 634927873 mouse n 333
4 635812953 elephant n 2223
5 636004739 goose y 412346
6 636101211 rat y 7456
7 636157799 mice n 3456
8 636263106 kitty n 2345
9 636752420 kitten y 2345
And
df1 %>%
anti_join(df2, by = c("id", "text"))
id text response
1 632592651 asdf y
df2 %>%
anti_join(df1, by = c("id", "text"))
id text volume
1 632592651 asdfxyz 1234
2 636809222 tiger_xyz 6
3 2004722036 lion 345
4 2004894388 leopard 23
5 2005045755 ostrich 2
6 2005535472 kangaroo 4778
7 2005630542 platypus 234
8 2005788781 fish 8675
9 2005809679 reptile 3459
10 2005838317 mammals 8
11 2005866692 amphibians_xyz 9