I'm using the dataframe sheep.df from dmm library in R.
sheep.df <- data.frame(
Id = factor(c(
"9a4003", "0a4721", "1a4123", "1a4371", "2a4127", "9a4071",
"9a4123", "9a4062", "0a4712", "0a4781", "0a4441", "1a4128", "1a4127",
"1a4129", "2a4242", "2a4243", "2a4244", "2a4245", "2a4246", "3a4127",
"3a4128", "3a4129", "3a4140", "3a4141", "3a4142", "3a4143", "4a4144",
"4a4145", "4a4146", "4a4117", "4a4118", "4a4119", "5a4129", "5a4140",
"5a4141", "6a4242", "6a4243", "6a4244", "6a4245", "6a4246", "7a4127",
"7a4128"
)),
SId = factor(c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "9a4003",
"9a4003", "9a4003", "9a4003", "9a4003", "0a4721", "0a4721", "1a4123",
"1a4123", "1a4123", "1a4371", "1a4371", "2a4127", "2a4127", "2a4127",
"2a4127", "2a4127", "2a4127", "1a4123", "1a4123", "1a4123", "9a4003",
"9a4003", "9a4003", "9a4003", "9a4003", "0a4721", "0a4721"
)),
DId = factor(c(
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "9a4123",
"9a4123", "9a4062", "9a4071", "9a4071", "0a4712", "0a4713", "1a4128",
"1a4127", "1a4127", "1a4129", "2a4243", "2a4244", "2a4246", "2a4247",
"3a4127", "3a4128", "3a4129", "1a4128", "1a4127", "1a4127", "9a4123",
"9a4123", "9a4062", "9a4071", "9a4071", "0a4712", "0a4713"
)),
Year = factor(c(
"1981", "1982", "1983", "1983", "1984", "1981", "1981", "1981",
"1982", "1982", "1982", "1983", "1983", "1983", "1984", "1984",
"1984", "1984", "1984", "1985", "1985", "1986", "1986", "1986",
"1986", "1986", "1987", "1987", "1987", "1988", "1988", "1988",
"1987", "1987", "1987", "1985", "1985", "1985", "1985", "1985",
"1986", "1986"
)),
Tb = factor(c(
"S", "S", "S", "T", "S", "S", "S", "T", "S", "S", "T", "S",
"T", "T", "T", "T", "S", "T", "T", "S", "S", "S", "T", "T", "S",
"S", "S", "S", "S", "S", NA, "S", "S", "T", "T", "T", "T", "S",
"T", "T", "S", "S"
)),
Sex = factor(c(
"M", "M", "M", "M", "M", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "M", "F", "F", "M", "F", "F", "F", "F", "M", "F", "M",
"F", "M", "F", "F", "M", "M", "F", "F", "M", "F", "M", "F", "F",
"M", "F", "M", "F"
)),
Cww = c(
NA, NA, NA, 4.2, 4.7, 4.1, 4.4, 3.8, 5.1, 4.9, 4.6, 4.1, 3.9, 4.6, 4.4, 4.1,
4.4, 4.8, 5.2, 5.7, 5, 4.7, 5.6, 5, 5.5, 4.5, 4.1, 3.9, 4.5, NA, 4.1, NA, 4.7,
5.6, 5, 4.4, 4.1, 4.4, 4.8, 5.2, 5.7, 5
),
Diam = c(
NA, NA, NA, 21.7, 21.1, 20, 21.6, 20.1, 22, 21.1, 20.8, 20.3, 19.1, 19.8,
19.2, 19.8, 19.9, 20.2, 21.1, 22.1, 21.9, 22.1, 22, 21.1, 22, 19.8, 20.3,
19.1, 20.5, NA, 19.1, 19.6, 22.1, 22, 21.1, 19.2, 19.8, 19.9, 20.2, 21.1,
22.1, 21.9
),
Bwt = c(
NA, NA, NA, 50, 45, 51, 53, 43, 45, 48, 49, 48, 42, 44, 40, 41, 45, 47, 49,
53, 50, 38, 51, 39, 53, 37, 39, 36, 45, NA, 36, 38, 38, 51, 39, 40, 41, 45,
47, 49, 53, 50
)
)
What I would line to obtain is a new data frame where Sid
and Did
columns are merged into a new column Parent
. So, for each row of the original data frame there will be two new rows in the new data frame with same Id
value.
Example:
Original row (id1, sid1, did1)
New rows (id1, sis1), (id1, did1)
.
I hope I made it clear, otherwise let me know.
Thank you!
Extremely confusing question, but a best attempt to answer:
library(tidyr)
library(dmm)
data("sheep.df")
Original_row <- sheep.df %>%
dplyr::select(Id, SId, DId)
New_rows <- Original_row %>%
pivot_longer(cols = c("SId", "DId"))
A subset of the result:
Id name value
<fct> <chr> <fct>
1 6a4246 SId 9a4003
2 6a4246 DId 9a4071
3 7a4127 SId 0a4721
4 7a4127 DId 0a4712
5 7a4128 SId 0a4721
6 7a4128 DId 0a4713