Search code examples
rfor-loopnormalization

0 to 1 normalisation in R whilst preserving column 1 and headers


I'm trying to normalise my dataset between 0 to 1. Each column should be normalised independently. I want to output a new dataframe which preserves column one (not normalised) and all the original column headers.

This is a subset of my data:

SEC <- structure(list(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18, 
0.21, 0.24, 0.27), A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19, 
-0.258, -0.272, -0.046, -0.005, 0.138), A1_420 = c(-0.06, -0.303, 
-0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41), 
    A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284, 
    -2.129, -3.131, -2.913), A2_420 = c(-0.445, 13.337, 1.075, 
    -1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61), A3_280 = c(8.782, 
    59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946, 
    0.754), A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77, 
    1.123, 0.488, 0.634, -0.011), B1_280 = c(14.95, 61.441, 37.189, 
    10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07), B1_420 = c(2.455, 
    30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551
    ), B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131, 
    -0.443, -0.081, -0.387, -0.04), B2_420 = c(-0.074, -0.256, 
    0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146
    ), B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03, 
    0.128, 0.077, 0.098), B3_420 = c(-0.056, 0.095, 0.05, -0.015, 
    -0.106, 0.106, -0.017, -0.001, 0.036, 0.139), AB1_280 = c(1.599, 
    1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009, 
    -0.196), AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2, 
    -0.322, 0.294, 0.367, -0.281), AB2_280 = c(-1.46, -1.2, -1.977, 
    -2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106), AB2_420 = c(-1, 
    -0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028, 
    -1.183), AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715, 
    -2.699, -3.896, -2.744, -3.653, -3.387), AB3_420 = c(-0.899, 
    0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946, 
    -1.658), AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441, 
    3.105, 2.631, 1.265, -0.184, 0.529), AB4_420 = c(-0.861, 
    27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242, 
    -0.509)), row.names = c(NA, -10L), spec = structure(list(
    cols = list(ml = structure(list(), class = c("collector_double", 
    "collector")), A1_280 = structure(list(), class = c("collector_double", 
    "collector")), A1_420 = structure(list(), class = c("collector_double", 
    "collector")), A2_280 = structure(list(), class = c("collector_double", 
    "collector")), A2_420 = structure(list(), class = c("collector_double", 
    "collector")), A3_280 = structure(list(), class = c("collector_double", 
    "collector")), A3_420 = structure(list(), class = c("collector_double", 
    "collector")), B1_280 = structure(list(), class = c("collector_double", 
    "collector")), B1_420 = structure(list(), class = c("collector_double", 
    "collector")), B2_280 = structure(list(), class = c("collector_double", 
    "collector")), B2_420 = structure(list(), class = c("collector_double", 
    "collector")), B3_280 = structure(list(), class = c("collector_double", 
    "collector")), B3_420 = structure(list(), class = c("collector_double", 
    "collector")), AB1_280 = structure(list(), class = c("collector_double", 
    "collector")), AB1_420 = structure(list(), class = c("collector_double", 
    "collector")), AB2_280 = structure(list(), class = c("collector_double", 
    "collector")), AB2_420 = structure(list(), class = c("collector_double", 
    "collector")), AB3_280 = structure(list(), class = c("collector_double", 
    "collector")), AB3_420 = structure(list(), class = c("collector_double", 
    "collector")), AB4_280 = structure(list(), class = c("collector_double", 
    "collector")), AB4_420 = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x5606ec29c390>, class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"))

Here is my code so far:

normalize_0_to_1_columnwise <- function(SEC) {
  normalized_SEC <- data.frame(ml = SEC$ml)  # Copy the first column as it is
  
  # Apply normalization for each column (excluding the first column 'ml')
  for (col in names(SEC)[-1]) {
    normalized_SEC[[col]] <- (SEC[[col]] - min(SEC[[col]])) / (max(SEC[[col]]) - min(SEC[[col]]))
  }
  
  # Preserve the original column headers
  colnames(normalized_SEC)[-1] <- colnames(SEC)[-1]

}

# Output normalized dataframe
normalized_SEC 

This works for preserving the first column 'ml' and all the column headers, but all the 'values' in the dataframe are NAs. Where have I gone wrong?

I know there are other similar questions answered but I can't get them to work for my data and required output.


Solution

  • In tidyverse you could do:

    library(tidyverse)
    sec_scaled <- mutate(SEC, across(-ml, scales::rescale))
    
    sec_scaled
    
    # A tibble: 10 × 21
          ml A1_280 A1_420  A2_280 A2_420  A3_280 A3_420 B1_280 B1_420 B2_280 B2_420 B3_280
       <dbl>  <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
     1  0    1      0.711  0.423   0.136  0.136   0.0179 0.244  0.0954  0.355 0.545  0.551 
     2  0.03 0.730  0.376  1       1      1       1      1      1       0.318 0.0793 0.133 
     3  0.06 0.686  0.529  0.216   0.231  0.949   0.946  0.606  0.608   1     0.790  0.316 
     4  0.09 0.655  0.269  0.0731  0.0758 0.374   0.400  0.179  0.169   0.652 1      0.0204
     5  0.12 0.568  1      0.0394  0.0285 0.141   0.144  0.0713 0.0654  0.364 0      0.0816
     6  0.15 0.0172 0.424  0.0611  0.0218 0.0459  0.0579 0.0384 0.0410  0.714 0.379  0.429 
     7  0.18 0      0      0.0275  0.0390 0.0254  0.0369 0.0134 0.0260  0     0.696  0     
     8  0.21 0.278  0.771  0.0326  0.0317 0.00995 0.0162 0.0173 0.0344  0.828 0.990  1     
     9  0.24 0.328  0.0482 0       0.0331 0.00325 0.0210 0.0173 0.0222  0.128 0.680  0.480 
    10  0.27 0.504  0.229  0.00709 0      0       0      0      0       0.922 0.361  0.694 
    # ℹ 9 more variables: B3_420 <dbl>, AB1_280 <dbl>, AB1_420 <dbl>, AB2_280 <dbl>,
    #   AB2_420 <dbl>, AB3_280 <dbl>, AB3_420 <dbl>, AB4_280 <dbl>, AB4_420 <dbl>