My problem is the following. I have this data frame:
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Col_1 <- c(NA,45,NA,23,1,2,8,NA,78,12,NA,19)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
Col_2 <- c(1,2,3,4,NA,5,6,7,NA,89,10,12)
df <- data.frame(ID,Col_1, Objekt.Nr., Fahrzeugart, Col_2)
I need to transform it so that there is only one row for every ID, not several like there are now. For that, I need to pivot the data frame so that every object Objekt.Nr will correspond to a new column with the Fahrzeugart.
My goal is that the data frame will look like this:
ID <- c(1,2,3,4,5)
Fahrzeug_1 <- c("E-Bike","Fahrrad","Fahrrad","Bus","E-Bike")
Fahrzeug_2 <- c(NA, "Fahrrad", "Bus", "Fahrrad", "Fahrrad")
Fahrzeug_3 <- c(NA,NA,NA, "Auto", "Fahrrad")
Fahrzeug_4 <- c(NA,NA,NA,NA, "Fahrrad")
col_1 <- c(1,(2,3)...) #merged for every ID
same for Col_2
df_wanted <- data.frame(ID,Fahrzeug_1,Fahrzeug_2,Fahrzeug_3,Fahrzeug_4)
I tried using this code, but it will only return binary values for "Fahrzeugart":
df_melted <- melt(df, id.vars = c("ID"), measure.vars = c("Fahrzeugart"))
df_wanted <- dcast(df_melted, ID ~ Objekt.Nr., value.var = "Fahrzeugart")
Thank you very much!
You can use fill()
from the tidyr
package to fill in the missing ID values and then pivot_wider()
also from the tidyr
package to change from long to wide-form.
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
df <- data.frame(ID, Objekt.Nr., Fahrzeugart)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(names_from="Objekt.Nr.", values_from = "Fahrzeugart", names_prefix="Fahrzeugart_")
#> # A tibble: 5 × 5
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 E-Bike <NA> <NA> <NA>
#> 2 2 Fahrrad Fahrrad <NA> <NA>
#> 3 3 Fahrrad Bus <NA> <NA>
#> 4 4 Bus Fahrrad Auto <NA>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad
Created on 2023-02-14 by the reprex package (v2.0.1)
If you're alright having some list columns in your data, you could do the following:
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Col_1 <- c(NA,45,NA,23,1,2,8,NA,78,12,NA,19)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
Col_2 <- c(1,2,3,4,NA,5,6,7,NA,89,10,12)
df <- data.frame(ID,Col_1, Objekt.Nr., Fahrzeugart, Col_2)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(id_cols=ID,
names_from="Objekt.Nr.",
values_from = "Fahrzeugart",
names_prefix="Fahrzeugart_",
unused_fn = list)
#> # A tibble: 5 × 7
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4 Col_1 Col_2
#> <dbl> <chr> <chr> <chr> <chr> <list> <list>
#> 1 1 E-Bike <NA> <NA> <NA> <dbl [1]> <dbl>
#> 2 2 Fahrrad Fahrrad <NA> <NA> <dbl [2]> <dbl>
#> 3 3 Fahrrad Bus <NA> <NA> <dbl [2]> <dbl>
#> 4 4 Bus Fahrrad Auto <NA> <dbl [3]> <dbl>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad <dbl [4]> <dbl>
Created on 2023-02-14 by the reprex package (v2.0.1)