I have a number of data.frames that each have a factor. I want to make sure that they all use the same levels. What is the proper way to do this?
In the code below you'll see that I reassign the factor for each case using levels from the overall set of levels with a small convenience function changeLevels. I would expect that there is a better way to do this though.
set.seed(1234)
b<-round(runif(100,1,10),digits=2)
set.seed(2345)
b2<-round(runif(100,11,20),digits=2)
set.seed(3456)
b3<-round(runif(50,15,18),digits=2)
#.. all potential levels
bt<-factor(sort(c(b,b2,b3)))
lvls<-levels(bt)
t1<-as.data.frame(table(sample(b,5)))
t2<-as.data.frame(table(sample(b,1)))
t3<-as.data.frame(table(sample(b,1)))
t4<-as.data.frame(table(sample(b,8)))
t5<-as.data.frame(table(sample(b2,20)))
t6<-as.data.frame(table(sample(b3,18)))
t1<-cbind(t1,p="A")
t2<-cbind(t2,p="B")
t3<-cbind(t3,p="C")
t4<-cbind(t4,p="D")
t5<-cbind(t5,p="E")
t6<-cbind(t6,p="F")
d<-data.frame()
d<-rbind(d,t2,t3,t6,t4,t5,t1)
#.. out of order bins
ggplot(d,aes(x=factor(Var1),fill=factor(p))) +
geom_bar(aes(weight=Freq)) +
facet_grid( p ~ ., margins=T)+
ggtitle("out of order bins")
changeFactor<-function(t,lvls){
temp<-as.numeric(as.character(t))
factor(temp,levels=lvls)
}
t1$Var1<-changeFactor(t1$Var1,lvls)
t2$Var1<-changeFactor(t2$Var1,lvls)
t3$Var1<-changeFactor(t3$Var1,lvls)
t4$Var1<-changeFactor(t4$Var1,lvls)
t5$Var1<-changeFactor(t5$Var1,lvls)
t6$Var1<-changeFactor(t6$Var1,lvls)
d<-data.frame()
d<-rbind(d,t2,t3,t6,t4,t5,t1)
#.. in order bins
ggplot(d,aes(x=factor(Var1),fill=factor(p))) +
geom_bar(aes(weight=Freq)) +
facet_grid( p ~ ., margins=T)+
ggtitle("in order bins")
short answer: keep your data in lists and learn the *pply
family
set.seed(1234)
b<-round(runif(100,1,10),digits=2)
set.seed(2345)
b2<-round(runif(100,11,20),digits=2)
set.seed(3456)
b3<-round(runif(50,15,18),digits=2)
#.. all potential levels
bt<-factor(sort(c(b,b2,b3)))
lvls<-levels(bt)
options(stringsAsFactors = FALSE)
f <- function(x, y, z)
cbind(data.frame(table(sample(x, y))), p = z)
datl <- Map(f, list(b,b,b,b,b2,b3), c(5,1,1,8,20,18), LETTERS[1:6])
changeFactor<-function(t,lvls){
temp<-as.numeric(as.character(t))
factor(temp,levels=lvls)
}
datl <- lapply(rapply(datl, f = function(x) changeFactor(x, lvls),
classes = 'factor', how = 'replace'),
data.frame)
d <- do.call(rbind, datl[c(2, 3, 6, 4, 5, 1)])
#.. in order bins
ggplot(d,aes(x=factor(Var1),fill=factor(p))) +
geom_bar(aes(weight=Freq)) +
facet_grid( p ~ ., margins=T)+
ggtitle("in order bins")
long answer:
set.seed(1234)
b<-round(runif(100,1,10),digits=2)
set.seed(2345)
b2<-round(runif(100,11,20),digits=2)
set.seed(3456)
b3<-round(runif(50,15,18),digits=2)
#.. all potential levels
bt<-factor(sort(c(b,b2,b3)))
lvls<-levels(bt)
first, I don't want any unexpected factors popping up, so stringsAsFactors = FALSE
then write a function, f, to do what you want, and check to make sure it works
options(stringsAsFactors = FALSE)
f <- function(x, y, z)
cbind(data.frame(table(sample(x, y))), p = z)
f(b, 5, 'A')
# Var1 Freq p
# 1 1.13 1 A
# 2 1.46 1 A
# 3 2.09 1 A
# 4 2.5 1 A
# 5 7.02 1 A
seems to work, so just Map
it to lists of arguments and check the output
datl <- Map(f, list(b,b,b,b,b2,b3), c(5,1,1,8,20,18), LETTERS[1:6])
# List of 6
# $ :'data.frame': 5 obs. of 3 variables:
# ..$ Var1: Factor w/ 5 levels "2.02","3.09",..: 1 2 3 4 5
# ..$ Freq: int [1:5] 1 1 1 1 1
# ..$ p : chr [1:5] "A" "A" "A" "A" ...
# $ :'data.frame': 1 obs. of 3 variables:
# ..$ Var1: Factor w/ 1 level "1.63": 1
# ..$ Freq: int 1
# ..$ p : chr "B"
so combine everything to use with ggplot
d <- do.call(rbind, datl[c(2, 3, 6, 4, 5, 1)])
library(ggplot2)
#.. out of order bins
ggplot(d,aes(x=factor(Var1),fill=factor(p))) +
geom_bar(aes(weight=Freq)) +
facet_grid( p ~ ., margins=T)+
ggtitle("out of order bins")
changeFactor<-function(t,lvls){
temp<-as.numeric(as.character(t))
factor(temp,levels=lvls)
}
again making sure the function does what it is supposed to do on one data frame
changeFactor(datl[[1]]$Var1, lvls)
# [1] 2.02 3.09 3.79 3.89 8.3
# 234 Levels: 1.09 1.12 1.13 1.24 1.36 1.38 1.41 1.46 1.63 1.66 1.81 1.95 ... 19.86
so apply it again to them all at once and check the output
datl <- lapply(rapply(datl, f = function(x) changeFactor(x, lvls),
classes = 'factor', how = 'replace'),
data.frame)
str(datl)
# List of 6
# $ :'data.frame': 5 obs. of 3 variables:
# ..$ Var1: Factor w/ 234 levels "1.09","1.12",..: 13 28 41 45 81
# ..$ Freq: int [1:5] 1 1 1 1 1
# ..$ p : chr [1:5] "A" "A" "A" "A" ...
# $ :'data.frame': 1 obs. of 3 variables:
# ..$ Var1: Factor w/ 234 levels "1.09","1.12",..: 9
# ..$ Freq: int 1
# ..$ p : chr "B"
# ...
combine again and plot
d <- do.call(rbind, datl[c(2, 3, 6, 4, 5, 1)])
#.. in order bins
ggplot(d,aes(x=factor(Var1),fill=factor(p))) +
geom_bar(aes(weight=Freq)) +
facet_grid( p ~ ., margins=T)+
ggtitle("in order bins")