I'm trying to normalise my dataset between 0 to 1. Each column should be normalised independently. I want to output a new dataframe which preserves column one (not normalised) and all the original column headers.
This is a subset of my data:
SEC <- structure(list(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18,
0.21, 0.24, 0.27), A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19,
-0.258, -0.272, -0.046, -0.005, 0.138), A1_420 = c(-0.06, -0.303,
-0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41),
A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284,
-2.129, -3.131, -2.913), A2_420 = c(-0.445, 13.337, 1.075,
-1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61), A3_280 = c(8.782,
59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946,
0.754), A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77,
1.123, 0.488, 0.634, -0.011), B1_280 = c(14.95, 61.441, 37.189,
10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07), B1_420 = c(2.455,
30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551
), B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131,
-0.443, -0.081, -0.387, -0.04), B2_420 = c(-0.074, -0.256,
0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146
), B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03,
0.128, 0.077, 0.098), B3_420 = c(-0.056, 0.095, 0.05, -0.015,
-0.106, 0.106, -0.017, -0.001, 0.036, 0.139), AB1_280 = c(1.599,
1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009,
-0.196), AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2,
-0.322, 0.294, 0.367, -0.281), AB2_280 = c(-1.46, -1.2, -1.977,
-2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106), AB2_420 = c(-1,
-0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028,
-1.183), AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715,
-2.699, -3.896, -2.744, -3.653, -3.387), AB3_420 = c(-0.899,
0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946,
-1.658), AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441,
3.105, 2.631, 1.265, -0.184, 0.529), AB4_420 = c(-0.861,
27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242,
-0.509)), row.names = c(NA, -10L), spec = structure(list(
cols = list(ml = structure(list(), class = c("collector_double",
"collector")), A1_280 = structure(list(), class = c("collector_double",
"collector")), A1_420 = structure(list(), class = c("collector_double",
"collector")), A2_280 = structure(list(), class = c("collector_double",
"collector")), A2_420 = structure(list(), class = c("collector_double",
"collector")), A3_280 = structure(list(), class = c("collector_double",
"collector")), A3_420 = structure(list(), class = c("collector_double",
"collector")), B1_280 = structure(list(), class = c("collector_double",
"collector")), B1_420 = structure(list(), class = c("collector_double",
"collector")), B2_280 = structure(list(), class = c("collector_double",
"collector")), B2_420 = structure(list(), class = c("collector_double",
"collector")), B3_280 = structure(list(), class = c("collector_double",
"collector")), B3_420 = structure(list(), class = c("collector_double",
"collector")), AB1_280 = structure(list(), class = c("collector_double",
"collector")), AB1_420 = structure(list(), class = c("collector_double",
"collector")), AB2_280 = structure(list(), class = c("collector_double",
"collector")), AB2_420 = structure(list(), class = c("collector_double",
"collector")), AB3_280 = structure(list(), class = c("collector_double",
"collector")), AB3_420 = structure(list(), class = c("collector_double",
"collector")), AB4_280 = structure(list(), class = c("collector_double",
"collector")), AB4_420 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x5606ec29c390>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
Here is my code so far:
normalize_0_to_1_columnwise <- function(SEC) {
normalized_SEC <- data.frame(ml = SEC$ml) # Copy the first column as it is
# Apply normalization for each column (excluding the first column 'ml')
for (col in names(SEC)[-1]) {
normalized_SEC[[col]] <- (SEC[[col]] - min(SEC[[col]])) / (max(SEC[[col]]) - min(SEC[[col]]))
}
# Preserve the original column headers
colnames(normalized_SEC)[-1] <- colnames(SEC)[-1]
}
# Output normalized dataframe
normalized_SEC
This works for preserving the first column 'ml' and all the column headers, but all the 'values' in the dataframe are NAs. Where have I gone wrong?
I know there are other similar questions answered but I can't get them to work for my data and required output.
In tidyverse you could do:
library(tidyverse)
sec_scaled <- mutate(SEC, across(-ml, scales::rescale))
sec_scaled
# A tibble: 10 × 21
ml A1_280 A1_420 A2_280 A2_420 A3_280 A3_420 B1_280 B1_420 B2_280 B2_420 B3_280
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 1 0.711 0.423 0.136 0.136 0.0179 0.244 0.0954 0.355 0.545 0.551
2 0.03 0.730 0.376 1 1 1 1 1 1 0.318 0.0793 0.133
3 0.06 0.686 0.529 0.216 0.231 0.949 0.946 0.606 0.608 1 0.790 0.316
4 0.09 0.655 0.269 0.0731 0.0758 0.374 0.400 0.179 0.169 0.652 1 0.0204
5 0.12 0.568 1 0.0394 0.0285 0.141 0.144 0.0713 0.0654 0.364 0 0.0816
6 0.15 0.0172 0.424 0.0611 0.0218 0.0459 0.0579 0.0384 0.0410 0.714 0.379 0.429
7 0.18 0 0 0.0275 0.0390 0.0254 0.0369 0.0134 0.0260 0 0.696 0
8 0.21 0.278 0.771 0.0326 0.0317 0.00995 0.0162 0.0173 0.0344 0.828 0.990 1
9 0.24 0.328 0.0482 0 0.0331 0.00325 0.0210 0.0173 0.0222 0.128 0.680 0.480
10 0.27 0.504 0.229 0.00709 0 0 0 0 0 0.922 0.361 0.694
# ℹ 9 more variables: B3_420 <dbl>, AB1_280 <dbl>, AB1_420 <dbl>, AB2_280 <dbl>,
# AB2_420 <dbl>, AB3_280 <dbl>, AB3_420 <dbl>, AB4_280 <dbl>, AB4_420 <dbl>