I am having a trouble in doing double gathering on several column that are related to Comorbidities, and other columns that are related to Symptoms. The purpose is to get a count and frequency per groups of comorbidities and symptoms.
This is the type of data I have.
test <- structure(
list(
ID = c("1",
"2", "3",
"4", "5",
"6"),
Chills = c("No", "Mild", "No", "Mild", "No", "No"),
Cough = c("No", "Severe", "No", "Mild", "Mild", "No"),
Diarrhoea = c("No", "Mild", "No", "No", "No", "No"),
Fatigue = c("No", "Moderate", "Mild", "Mild", "Mild", "Mild"),
Headcahe = c("No", "No", "No", "Mild", "No", "No"),
`Loss of smell and taste` = c("No", "No", "No", "No", "No", "No"),
`Muscle Ache` = c("No", "Moderate", "No", "Moderate", "Mild", "Mild"),
`Nasal Congestion` = c("No", "No", "No", "No", "Mild", "No"),
`Nausea and Vomiting` = c("No", "No",
"No", "No", "No", "No"),
`Shortness of Breath` = c("No",
"Mild", "No", "No", "No", "Mild"),
`Sore Throat` = c("No",
"No", "No", "No", "Mild", "No"),
Sputum = c("No", "Mild",
"No", "Mild", "Mild", "No"),
Temperature = c("No", "No",
"No", "No", "No", "37.5-38"),
Comorbidity_one = c(
"Asthma (managed with an inhaler)",
"None",
"Obesity",
"High Blood Pressure (hypertension)",
"None",
"None"
),
Comorbidity_two = c("Diabetes Type 2", NA,
NA, "Obesity", NA, NA),
Comorbidity_three = c(
"Asthma (managed with an inhaler)",
"None",
"Obesity",
"High Blood Pressure (hypertension)",
"None",
NA_character_
),
Comorbidity_four = c(
"Asthma (managed with an inhaler)",
"None",
"High Blood Pressure (hypertension)",
NA_character_,
NA_character_,
NA_character_
),
Comorbidity_five = c(
"Asthma (managed with an inhaler)",
"None",
NA_character_,
NA_character_,
NA_character_,
NA_character_
),
Comorbidity_six = c(
NA_character_,
NA_character_,
NA_character_,
NA_character_,
NA_character_,
NA_character_
),
Comorbidity_seven = c(
NA_character_,
NA_character_,
NA_character_,
NA_character_,
NA_character_,
NA_character_
),
Comorbidity_eight = c(
"High Blood Pressure (hypertension)",
NA_character_,
NA_character_,
NA_character_,
NA_character_,
NA_character_
),
Comorbidity_nine = c(
NA_character_,
NA_character_,
NA_character_,
"High Blood Pressure (hypertension)",
NA_character_,
"High Blood Pressure (hypertension)"
)
),
row.names = c(NA,-6L),
class = c("tbl_df",
"tbl", "data.frame")
)
But the outputted object, just a sample, should look like this:
structure(list(Comorbidities = c("Asthma", "Asthma", "Asthma",
"Diabetes", "Diabetes", "Diabetes", "High blood Pressure", "High blood Pressure",
"High blood Pressure"), Symptoms = c("Cough", "Cough", "Loss of smell and taste",
"Cough", "Chills mild", "Loss of smell and taste", "Cough", "Chills",
"Loss of smell and taste"), Group = c("Mild", "Moderate", "Severe",
"Mild", "Moderate", "Severe", "Mild", "Moderate", "Severe"),
Count = c(112, 10, 10, 123, 132, 153, 897, 98, 10), Percentage = c(0.23,
0.3, 0.1, 0.6, 0.5, 0.3, 0.8, 0.9, 0.5)), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
I want to achieve this only with tidyverse, in R.
Maybe this is what you're after. I pivot longer separately for symptoms first and then co-morbidities, omitting records if they have none. Percentages are the number of symptoms among each morbidity. If that's not what you want, you can easily change it.
library(tidyr); library(dplyr)
pivot_longer(test, cols=2:14, names_to="symptom", values_to="severity") %>%
filter(severity!="No") %>%
pivot_longer(cols=starts_with("Comorbidity"), values_to="morbidity") %>%
filter(morbidity != "None") %>%
group_by(morbidity, symptom, severity) %>%
summarise(Count=n()) %>%
group_by(morbidity) %>%
mutate(Percentage=Count/sum(Count))
___
# A tibble: 15 x 5
# Groups: morbidity [2]
morbidity symptom severity Count Percentage
<chr> <chr> <chr> <int> <dbl>
1 High Blood Pressure (hypertension) Chills Mild 3 0.130
2 High Blood Pressure (hypertension) Cough Mild 3 0.130
3 High Blood Pressure (hypertension) Fatigue Mild 5 0.217
4 High Blood Pressure (hypertension) Headcahe Mild 3 0.130
5 High Blood Pressure (hypertension) Muscle Ache Mild 1 0.0435
6 High Blood Pressure (hypertension) Muscle Ache Moderate 3 0.130
7 High Blood Pressure (hypertension) Shortness of Breath Mild 1 0.0435
8 High Blood Pressure (hypertension) Sputum Mild 3 0.130
9 High Blood Pressure (hypertension) Temperature 37.5-38 1 0.0435
10 Obesity Chills Mild 1 0.125
11 Obesity Cough Mild 1 0.125
12 Obesity Fatigue Mild 3 0.375
13 Obesity Headcahe Mild 1 0.125
14 Obesity Muscle Ache Moderate 1 0.125
15 Obesity Sputum Mild 1 0.125