Below is the sample data set and the desired manipulations. As of yet, all works fine. Attempting to create a new calculated column. Some context, the smb stands for small business. 1,2,3,4 represent differing thresholds of what would be considered small. The desired column would what percentage of total employment is taken up by smb = 1 for a given area, for example. For area 001, this would be 46/1927 for example. I can figure out how to have it to be appear once but not as a complete column. How would I go about doing this? The desired result is at the bottom.
library(readxl)
library(dplyr)
library(data.table)
library(DBI)
library(stringr)
library(tidyverse)
library(gt)
employment <- c(1,45,125,130,165,260,600,601,2,46,127,132,167,265,601,602,50,61,110,121,170,305,55,603,52,66,112,123,172,310,604,605)
small <- c(1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA)
area <-c(001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003)
year<-c(2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020)
qtr <-c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2)
smbtest <- data.frame(employment,small,area,year,qtr)
smbtest$smb <-0
smbtest <- smbtest %>% mutate(smb = case_when(employment >=0 & employment <100 ~ "1",employment >=0 & employment <150 ~ "2",employment >=0 & employment <250 ~ "3", employment >=0 & employment <500 ~ "4"))
smbsummary2<-smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area,period,smb) %>%
summarise(employment = sum(employment), worksites = n(),
.groups = 'drop_last') %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2<- smbsummary2%>%
group_by(area,smb)%>%
mutate(empprevyear=lag(employment),
empprevyearpp=employment-empprevyear,
empprevyearpct=((employment/empprevyear)-1),
empprevyearpct=scales::percent(empprevyearpct,accuracy = 0.01)
)
smblonger2<-smbsummary2 %>%
dplyr::select(area,period,employment,worksites,smb) %>%
ungroup() %>%
pivot_longer(cols = employment:worksites, names_to = "measure", values_to = "value") %>%
group_by(area,measure) %>%
pivot_wider(names_from = period, values_from = value)%>%filter(smb %in%
c("1","2","3","4"))%>%gt()%>%cols_label(
smb = md("**Category**"))
smblonger2
area period smb employment worksites pcttotal
1 2020q1 1 46 2 46/1927 (total employment)
2 2020q2 2 301 4 301/1927
3 2020q3 3 466 5 466/1927
4 2020q4 4 726 6 726/1927
schema
smb employment range
1 0 to 100
2 0 to 150
3 0 to 250
4 0 to 500
OK so here is my solution (someone will now come along with a 1 line function!)
library(dplyr)
library(tidyr)
employment <- c(1,45,125,130,165,260,600,601,2,46,127,132,167,265,601,602,50,61,110,121,170,305,55,603,52,66,112,123,172,310,604,605)
small <- c(1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA)
area <-c(001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003)
year<-c(2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020)
qtr <-c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2)
smbtest <- data.frame(employment,small,area,year,qtr)
smbtest$smb <-0 # I think this line is redundent
smbtest <- smbtest %>% mutate(smb = case_when(employment >=0 & employment <100 ~ "1",employment >=0 & employment <150 ~ "2",employment >=0 & employment <250 ~ "3", employment >=0 & employment <500 ~ "4"))
smbsummary2<-smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area,period,smb) %>%
summarise(employment = sum(employment), worksites = n(),
.groups = 'drop_last') %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2 %>%
# Make the data wider (a column for each smb)
pivot_wider(
id_cols=c("area", "period"),
names_from = "smb",
values_from = c("employment", "worksites"),
names_prefix = "SMB"
) %>%
# calculate the %
mutate(across(starts_with("employment_SMB"),
~(100*(.x/employment_SMBNA)),
.names = "pcttotal_{.col}")) %>%
# Now make the data longer
pivot_longer(
cols = contains("SMB")
) %>%
# rework the data names so the smb is a value
separate(name, into=c("name", "smb"), sep="_SMB") %>%
# Make the date wider again to match the shape requested
pivot_wider(
id_cols=c("area", "period", "smb"),
names_from = "name",
values_from = "value"
) -> smbsummary3