Search code examples
rautomationiterationmapply

Using lists and mapply on user created function for sum coding contrasts in r


I want to use lists and mapply on user created function for sum coding contrasts in r. But when I try it does not work. Any help would be appreciated.

Specifically, I want to apply sum contrasts to am and vs to create the sum-coding variables am_c and vs_c in the mtcars data. I can do this the long way, but when I try to create a user-created function that produces a data frame with these results, called function_data_frame__sum_contrast() to complete this task, it does not work.

The function works when I put in individual inputs:

### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))

   mpg cyl  disp  hp drat    wt  qsec vs am gear carb vs_c
1 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4    0
2 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4    0
3 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4    0
4 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3    0
5 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2    0
6 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3    0

It does not work when using the list form

### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])

 Error in get(nm1) : 
  object 'IV_info_short_way$dataset_analyses[1]' not found 
3.
get(nm1) 
2.
data.frame(get(nm1)) 
1.
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], 
    IV_info_short_way$dataset_analyses[1])

It does not work with mapply() either.

### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <- 
  mapply(function_data_frame__sum_contrast, 
         (IV_info$IV_original[IV_info$IV_nature == "nominal"]), 
         (IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]), 
         SIMPLIFY = FALSE)

Error in mapply(function_data_frame__sum_contrast, (IV_info$IV_original[IV_info$IV_nature ==  : 
  zero-length inputs cannot be mixed with those of non-zero length

Please help, if possible.



Here is the code for the practice:






# practice script

## loads packages for analyses
# ---- NOTE: data wrangling
if(!require(tidyverse)){install.packages("tidyverse")}

## gives information about datasets

### mtcars
# ---- NOTE: displays head of data
head(mtcars)
# ---- NOTE: gives structure of data
str(mtcars, list.len=ncol(mtcars))
# ---- NOTE: gives colnames of data
colnames(mtcars)

## produces IV_info chart
IV_info <- 
  data.frame(
    cbind(
      IV = c("vs", "am"), 
      IV_analyses = c("vs", "am"), 
      IV_nature = c("nominal", "nominal"),
      dataset_name = c("mtcars"),
      dataset_analyses = c("mtcars")
    ))

## produces datasets for practice
# ---- NOTE: creates long way dataset
mtcars_long_way_df <- mtcars
# ---- NOTE: creates long way dataset
mtcars_short_way_df <- mtcars

## long way

### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analysis, "_short_way_df", sep="")

### creates individual contrast variables
# ---- NOTE: based on IV_info_long_way$IV_analyses list

#### variable: vs
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$vs)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$vs))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$vs_c <- mtcars_long_way_df$vs
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$vs_c <- as.factor(as.character(mtcars_long_way_df$vs_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$vs_c) <- 
  contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$vs)))))

#### variable: am
# ---- NOTE: gives levels of variable
unique(mtcars_long_way_df$am)
# ---- NOTE: gives length of levels of variable
length(unique(mtcars_long_way_df$am))
# ---- NOTE: creates contrast variable
mtcars_long_way_df$am_c <- mtcars_long_way_df$am
# ---- NOTE: turns contrast variable to factor
mtcars_long_way_df$am_c <- as.factor(as.character(mtcars_long_way_df$am_c))
# ---- NOTE: creates contrast variable
contrasts(mtcars_long_way_df$am_c) <- 
  contr.sum(as.numeric(as.character(length(unique(mtcars_long_way_df$am)))))

## short way way

### changes IV_info to IV_info_short_way
# ---- NOTE: creates dataset
IV_info_short_way <- IV_info
# ---- NOTE: changes dataset_analyses variable
IV_info_short_way$dataset_analyses <- paste(IV_info_short_way$dataset_analyses, "_short_way_df", sep="")

### creates function function_data_frame__sum_contrast
# ---- NOTE: creates function
function_data_frame__sum_contrast <- 
  # ---- NOTE: turns variable into sum contrasted version of variable
  # ---- NOTE: variable_name ==  variable to be turned to sum contrast
  # ---- NOTE: dataset_name == dataset that contains variable name
  # ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
  function(variable_name, dataset_name)
  {
    # ---- NOTE: # changes variable_name and dataset_name to strings
    colnm <- deparse(substitute(variable_name))
    nm1 <- deparse(substitute(dataset_name))
    # ---- NOTE: # base data frame
    dataset_funct_object_A <-
      data.frame(get(nm1))
    # ---- NOTE: adds merging column to base data frame
    dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
    # ---- NOTE: ## turns data into data frame
    dataset_funct_object_A <- data.frame(dataset_funct_object_A)
    # ---- NOTE: # sets up unique values part of data
    # ---- NOTE: ## creates object with unique variable values
    dataset_funct_object_B <-  unique(dataset_funct_object_A[[colnm]])
    # ---- NOTE: ### turns object to data frame
    dataset_funct_object_B <- data.frame(dataset_funct_object_B)
    # ---- NOTE: ### changes colnames
    colnames(dataset_funct_object_B) <- c("variable_levels")
    # ---- NOTE: ## gives info on whether a given variable_level has a value of NA
    dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
    length(which(dataset_funct_object_B$isNA=="TRUE"))
    # ---- NOTE: ## gives length of data frame column
    dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
    # ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
    as.numeric(distinct(dataset_funct_object_B, variable_level_number))
    # ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
    contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
    # ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
    dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
    # ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
    contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
    # ---- NOTE: adds merging column to data frame
    dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
    # ---- NOTE: merges original dataset with dataset of interest
    dataset_funct_object_C <- 
      merge(dataset_funct_object_A, 
            dataset_funct_object_B, 
            by.x = "merging_column", 
            by.y = "merging_column", 
            all.x = TRUE,
            all.y = FALSE,
            no.dups = TRUE)
    # ---- NOTE: # removes merging column from appropriate object
    dataset_funct_object_D <- 
      dataset_funct_object_C %>% 
      select(
        -c(merging_column, 
           variable_levels,
           isNA,
           variable_level_number)
      )
    # ---- NOTE: turns data into data frame
    dataset_funct_object_D <- data.frame(dataset_funct_object_D)
    # ---- NOTE: ## changes colname
    names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
    # ---- NOTE: turns data into data frame
    dataset_funct_object_D <- data.frame(dataset_funct_object_D)
    # ---- NOTE: # returns appropriate object/variable
    return(dataset_funct_object_D)
  }

### applied function to individually inputted ivs and datasets
head(function_data_frame__sum_contrast(vs, mtcars_short_way_df))

### applied function to first value in list
function_data_frame__sum_contrast(IV_info_short_way$IV_analyses[1], IV_info_short_way$dataset_analyses[1])
# ---- NOTE: does not work

### attempts to mapply for all parts of relevant lists
mtcars_short_way_df <- 
  mapply(function_data_frame__sum_contrast, 
         (IV_info$IV_original[IV_info$IV_nature == "nominal"]), 
         (IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]), 
         SIMPLIFY = FALSE)
# ---- NOTE: does not work

Solution

  • The deparse/substitute works when the input argument is unquoted and want to retrieve as a string. In the loop, we are passing a string directly. So, we can change that line to

     colnm <- variable_name
     nm1 <- dataset_name
    

    -full function

    function_data_frame__sum_contrast <- 
      # ---- NOTE: turns variable into sum contrasted version of variable
      # ---- NOTE: variable_name ==  variable to be turned to sum contrast
      # ---- NOTE: dataset_name == dataset that contains variable name
      # ---- NOTE: generally speaking, procedure is to create new variable with "_c" as suffix for corresponding sum contrasted variable
      function(variable_name, dataset_name)
      {
        # ---- NOTE: # changes variable_name and dataset_name to strings
        #colnm <- deparse(substitute(variable_name))
        colnm <- variable_name
        nm1 <- dataset_name
        # ---- NOTE: # base data frame
        dataset_funct_object_A <-
          data.frame(get(nm1))
        # ---- NOTE: adds merging column to base data frame
        dataset_funct_object_A$merging_column <- dataset_funct_object_A[[colnm]]
        # ---- NOTE: ## turns data into data frame
        dataset_funct_object_A <- data.frame(dataset_funct_object_A)
        # ---- NOTE: # sets up unique values part of data
        # ---- NOTE: ## creates object with unique variable values
        dataset_funct_object_B <-  unique(dataset_funct_object_A[[colnm]])
        # ---- NOTE: ### turns object to data frame
        dataset_funct_object_B <- data.frame(dataset_funct_object_B)
        # ---- NOTE: ### changes colnames
        colnames(dataset_funct_object_B) <- c("variable_levels")
        # ---- NOTE: ## gives info on whether a given variable_level has a value of NA
        dataset_funct_object_B$isNA <- is.na(dataset_funct_object_B$variable_levels)
        length(which(dataset_funct_object_B$isNA=="TRUE"))
        # ---- NOTE: ## gives length of data frame column
        dataset_funct_object_B$variable_level_number <- (as.numeric(as.character(length(dataset_funct_object_B$variable_levels))) - as.numeric(as.character(length(which(dataset_funct_object_B$isNA=="TRUE")))))
        # ---- NOTE: ## displays distinct number of levels of variariable of interest, from variable_level_number
        as.numeric(distinct(dataset_funct_object_B, variable_level_number))
        # ---- NOTE: ## the contrast matrix for categorical variable with a given number of levels
        contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
        # ---- NOTE: ## creates variable_levels_c variable, which will be used to hold contrast matrix
        dataset_funct_object_B$variable_levels_c <- as.factor(as.character(dataset_funct_object_B$variable_levels))
        # ---- NOTE: ## inserts the contrast matrix for categorical variable with a given number of levels transfers into appropriate variable
        contrasts(dataset_funct_object_B$variable_levels_c) = contr.sum(as.numeric(distinct(dataset_funct_object_B, variable_level_number)))
        # ---- NOTE: adds merging column to data frame
        dataset_funct_object_B$merging_column <- dataset_funct_object_B$variable_levels
        # ---- NOTE: merges original dataset with dataset of interest
        dataset_funct_object_C <- 
          merge(dataset_funct_object_A, 
                dataset_funct_object_B, 
                by.x = "merging_column", 
                by.y = "merging_column", 
                all.x = TRUE,
                all.y = FALSE,
                no.dups = TRUE)
        # ---- NOTE: # removes merging column from appropriate object
        dataset_funct_object_D <- 
          dataset_funct_object_C %>% 
          select(
            -c(merging_column, 
               variable_levels,
               isNA,
               variable_level_number)
          )
        # ---- NOTE: turns data into data frame
        dataset_funct_object_D <- data.frame(dataset_funct_object_D)
        # ---- NOTE: ## changes colname
        names(dataset_funct_object_D)[names(dataset_funct_object_D) == "variable_levels_c"] <- paste(colnm, "_c", sep = "")
        # ---- NOTE: turns data into data frame
        dataset_funct_object_D <- data.frame(dataset_funct_object_D)
        # ---- NOTE: # returns appropriate object/variable
        return(dataset_funct_object_D)
      }
    

    -testing

    Assuming IV_original is IV (as the column was not found in the OP's input example)

    mtcars_short_way_df <- 
      mapply(function_data_frame__sum_contrast, 
             (IV_info$IV[IV_info$IV_nature == "nominal"]), 
             (IV_info$dataset_analyses[IV_info$IV_nature == "nominal"]), 
             SIMPLIFY = FALSE)
             
    lapply(mtcars_short_way_df, head, 3)
    $vs
       mpg cyl disp  hp drat    wt  qsec vs am gear carb vs_c
    1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4    0
    2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4    0
    3 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4    0
    
    $am
       mpg cyl  disp  hp drat    wt  qsec vs am gear carb am_c
    1 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2    0
    2 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2    0
    3 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1    0