I am using SQLDF in r to select, filter and group by the data frame dynamically based on user inputs.
I have done the variable selection and done the aggregation part dynamically but not able to add the where clause/having clause in sqldf dynamically. User may select multiple combination of variable and their values as we well.
What I want is
Below is the code with the data
library(sqldf)
table_a <- data.frame(id =c(1,1,1,2,2,2,3,4,4),
state = c("Texas","Texas","Texas","Alaska","Alaska","Alaska","Nevada","Texas","Texas"),
gender = c("M","M","M","F","F","F","F","F","F"),
channel = c("Y","Y","H","Y","D","D","H","Y","Y"),
spend = c(10,10,5,20,20,5,7,1,2))
# user selects variable and their values
# channel is not selected by user here
input_states <- 'Texas'
input_genders <- 'M'
input_channels <- NULL
a <- c('state','gender','channel')[c(!is.null(input_states),!is.null(input_genders),!is.null(input_channels))]
b <- paste(a, collapse = ",")
sqldf(sprintf("select %s,sum(spend) as tot_spend from table_a group by %s",b,b))
If-else may not be an option since there are multiple filter in my final application
You could build the where
clause the same way as you build the select
.
library(sqldf)
table_a <- data.frame(id =c(1,1,1,2,2,2,3,4,4),
state = c("Texas","Texas","Texas","Alaska","Alaska","Alaska","Nevada","Texas","Texas"),
gender = c("M","M","M","F","F","F","F","F","F"),
channel = c("Y","Y","H","Y","D","D","H","Y","Y"),
spend = c(10,10,5,20,20,5,7,1,2))
# user selects variable and their values
# channel is not selected by user here
input_states <- 'Texas'
input_genders <- 'M'
input_channels <- NULL
a <- c('state','gender','channel')[c(!is.null(input_states),!is.null(input_genders),!is.null(input_channels))]
b <- paste(a, collapse = ",")
# Create the where conditions
where_conditions <- c(
sprintf("state in ('%s')", paste(input_states, collapse = "', '")),
sprintf("gender in ('%s')", paste(input_genders, collapse = "', '")),
sprintf("channel in ('%s')", paste(input_channels, collapse = "', '"))
)[c(!is.null(input_states),!is.null(input_genders),!is.null(input_channels))]
# combine into where clause
where_clause <- paste(where_conditions, collapse = " and ")
sqldf(sprintf("select %s,sum(spend) as tot_spend from table_a where %s group by %s", b, where_clause, b))
#> state gender tot_spend
#> 1 Texas M 25
which I think is your requirement?
You can then control the output by changing the input_...
variables.
# Try with different combinations of input_...
input_states <- c('Texas', 'Nevada')
input_genders <- NULL
input_channels <- 'H'
a <- c('state','gender','channel')[c(!is.null(input_states),!is.null(input_genders),!is.null(input_channels))]
b <- paste(a, collapse = ",")
# Create the where conditions
where_conditions <- c(
sprintf("state in ('%s')", paste(input_states, collapse = "', '")),
sprintf("gender in ('%s')", paste(input_genders, collapse = "', '")),
sprintf("channel in ('%s')", paste(input_channels, collapse = "', '"))
)[c(!is.null(input_states),!is.null(input_genders),!is.null(input_channels))]
# combine into where clause
where_clause <- paste(where_conditions, collapse = " and ")
sqldf(sprintf("select %s,sum(spend) as tot_spend from table_a where %s group by %s", b, where_clause, b))
#> state channel tot_spend
#> 1 Nevada H 7
#> 2 Texas H 5
Created on 2024-07-01 with reprex v2.1.0