I have the following data
pt_id <- c(1,1,1,1,1,2,2,2,3,3,3,3,3,4,4,4,4)
Tob_pk <- c(2, 5, 7, 1, 8, 12, 14, 3, 6, 8, 10, 20, 13, 5, 4, 12, 10)
Tobacco <- c("Once","Twice","Never", NA, NA, NA, NA, NA,"Once","Twice","Quit","Once",NA,NA,"Never", NA, "Never")
Alcohol <- c("Twice", "Once",NA, NA, "Never", NA, NA, "Once", NA, "Quit", "Twice", NA, "Once", NA, NA, "Never", "Never")
PA <- c("Once",NA,"Never", NA, NA, NA, NA, NA,"Once",NA,"Quit","Once",NA,NA,"Never", NA, NA)
mydata <- data.frame(pt_id, Tob_pk, Tobacco, Alcohol, PA)
mydata
I want the summary/ proportions for each variable in my dataset, I tried using the following code to get the summary/ proportions for each variable
data_summ <- mydata %>%
summarize_at(.vars=3:5, funs(prop.table(.)))
However, I am getting the following error
Error: Problem with `summarise()` input `Tobacco`.
x invalid 'type' (character) of argument
ℹ Input `Tobacco` is `prop.table(Tobacco)`.
Run `rlang::last_error()` to see where the error occurred.
I am not sure where I am going wrong. It would be helpful if I could get any suggestions to get the following output but with the percentage of NA's too.
Tobacco Alcohol PA
Never 0.3333333 Never 0.3333333 Never 0.3333333
Once 0.3333333 Once 0.3333333 Once 0.5000000
Quit 0.1111111 Quit 0.1111111 Quit. 0.1666667
Twice 0.2222222 Twice 0.2222222
Thanks in advance!
using base
pt_id <- c(1,1,1,1,1,2,2,2,3,3,3,3,3,4,4,4,4)
Tob_pk <- c(2, 5, 7, 1, 8, 12, 14, 3, 6, 8, 10, 20, 13, 5, 4, 12, 10)
Tobacco <- c("Once","Twice","Never", NA, NA, NA, NA, NA,"Once","Twice","Quit","Once",NA,NA,"Never", NA, "Never")
Alcohol <- c("Twice", "Once",NA, NA, "Never", NA, NA, "Once", NA, "Quit", "Twice", NA, "Once", NA, NA, "Never", "Never")
PA <- c("Once",NA,"Never", NA, NA, NA, NA, NA,"Once",NA,"Quit","Once",NA,NA,"Never", NA, NA)
mydata <- data.frame(pt_id, Tob_pk, Tobacco, Alcohol, PA)
apply(mydata[3:5], 2, function(x) prop.table(table(x, useNA="ifany")))
$Tobacco
x
Never Once Quit Twice <NA>
0.17647059 0.17647059 0.05882353 0.11764706 0.47058824
$Alcohol
x
Never Once Quit Twice <NA>
0.17647059 0.17647059 0.05882353 0.11764706 0.47058824
$PA
x
Never Once Quit <NA>
0.11764706 0.17647059 0.05882353 0.64705882
Created on 2021-01-18 by the reprex package (v0.3.0)
using tidyverse
library(tidyverse)
map_dfr(mydata[3:5], ~prop.table(table(.x)))
#> # A tibble: 3 x 4
#> Never Once Quit Twice
#> <table> <table> <table> <table>
#> 1 0.3333333 0.3333333 0.1111111 0.2222222
#> 2 0.3333333 0.3333333 0.1111111 0.2222222
#> 3 0.3333333 0.5000000 0.1666667 NA
Created on 2021-01-18 by the reprex package (v0.3.0)