Search code examples
rviewacrossmutate

How to preserve column names after applying scale()?


With the database pasted as example you can see how the following pipe to scale() everything but the group of intervention (categorical variable), the column names change the name to: variable_name[,1], when I just want variable_name. It seems that this behaviour is using the view() function with the transformed df after scaling data. Whichever the reason is to specify [,1], I would prefer the notation to be removed

The code I've been using is:

library(dplyr)

#1st

df <- df %>% 
  dplyr::mutate(across(.cols = c("ppara":"lrp1"), .fns = scale)) # where c("ppara":"lrp1") is the range of quantitative continouos columns to apply scale

# 2nd
df %>%  dplyr::mutate(across(.cols = c(3:52), .fns = scale, .names = "{.col}"))) 


#Entering the name of the df, seems to work and keep the names, but when I go to the view function 
> df
     id grup_int       ppara       ppard       pparg       nr1h3
1  50109018      LFD  2.88219852  0.21418973  1.29818239  0.40677902
2  50109019      LFD -0.09090266 -0.37583504 -0.17129420  0.24434046
3  50109025      LFD -0.51922065 -0.15165027  0.05554733 -0.42018092
4  50109026     NUTS          NA -0.12085566  0.01702707 -0.01326412
5  50109027      LFD          NA -0.14672313 -0.42809594 -0.82873851
6  50118001       OO          NA  0.68103605  0.25670869  0.49210029

But when I do view(df) as in Image 1 look at the dataframe

Image 1

The df:

df <- structure(list(id = c(50109018, 50109019, 50109025, 50109026, 
50109027, 50118001, 50202099, 50203004, 50203006, 50203008, 50203009, 
50203010, 50203011, 50203012, 50203013, 50203014, 50203015, 50203016, 
50203017, 50203019, 50203020, 50203022, 50203026, 50203027, 50203029, 
50203030, 50203031, 50203032, 50430001, 50508026, 50508027, 50521001, 
50521002, 50527001, 50601001, 50705001, 60901020, 60901021, 60901023, 
60901024, 60901026, 60901027, 60901028, 60901029, 60901030, 60901031, 
60901033, 60901034, 60901035, 60901036, 60901037, 60901038, 70107034, 
70111021, 70111022, 70111023, 70111024, 70201047, 70204055, 70204056, 
70211014, 70710002, 70713001, 70713002, 70802011, 70802012, 70802013, 
70802015, 71801001, 71801002, 71801003, 110104017, 110104019, 
110104023, 110104024, 110104027, 110104028, 110104029, 110104030, 
110110005, 110113001, 110113003, 110113005, 110113006, 110113007, 
110113008, 110606056, 110606061, 111201006, 111201007, 111201014, 
111201017, 111201019, 111201026, 111202007, 111202009, 111202015, 
120715011, 120715012, 120715019, 120715020, 120715021, 120715022, 
120715025, 120715026, 120715027, 120715029, 120715030, 120715032, 
120715033, 120715034, 120715035, 120715037, 130102008, 130102009, 
130102010, 130102012, 130102013, 130102014, 130104004, 130105044, 
130105045, 130106034, 130106037, 130106038, 130108008, 130108009, 
140101088, 140101091, 140101096, 140101097, 140101099, 140102087, 
140102088, 140102089, 140102090, 140102092, 140102095, 140103019, 
140103020, 140103023, 140103024, 140103026, 140103027, 140103028, 
140103029, 140103030, 140103033, 140103035, 140103036, 140103038
), grup_int = structure(c(3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 
3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 
3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 
3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 
1L, 3L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L, 
3L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 
2L, 3L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 
3L, 3L, 1L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 3L), levels = c("OO", 
"NUTS", "LFD"), label = "GENERAL: Grupo de intervención", class = "factor"), 
    ppara = c(3.761, 1.276, 0.918, NA, NA, NA, 1.437, NA, NA, 
    1.108, NA, 1.962, 1.816, 1.26, 0.498, 1.329, 1.447, NA, 1.848, 
    0.71, 0.909, NA, NA, 1.165, 1.079, 0.556, 1.197, NA, NA, 
    0.917, 0.37, 1.591, NA, 1.019, NA, 1.232, NA, NA, NA, 0.597, 
    NA, 1.136, 0.602, 0.724, 0.799, 0.827, 0.701, NA, NA, 1.973, 
    1.471, NA, NA, NA, NA, 0.845, 0.734, 0.987, NA, 0.389, 0.289, 
    1.86, 2.453, 0.895, 3.927, 1.893, 0.684, 0.782, 1.056, NA, 
    1.378, NA, 3.017, 4.415, NA, NA, 1.329, 1.086, 0.586, NA, 
    NA, NA, 1.093, 0.422, 1.278, 1.025, NA, 1.464, 0.872, 1.109, 
    1.342, NA, NA, 0.845, NA, 0.501, 0.765, NA, 2.709, 2.055, 
    2.078, NA, 1.664, 0.636, 1.358, 1.435, NA, 0.84, 1.348, 2.237, 
    2.627, 3.445, NA, NA, 0.736, 0.63, NA, NA, 0.599, 1.137, 
    2.161, NA, 0.731, 1.333, NA, NA, NA, 1.169, NA, 1.541, 1.046, 
    0.693, NA, 0.565, 1.172, NA, 0.801, 0.907, 3.903, 2.544, 
    NA, 1.658, 0.917, 1.667, NA, 2.54, NA, NA, NA, NA, NA), ppard = c(1.575, 
    1.096, 1.278, 1.303, 1.282, 1.954, 1.704, 1.518, 1.637, 0.951, 
    1.122, 1.025, 1.86, 0.904, 0.53, 1.37, 1.704, 0.751, 1.443, 
    0.78, 0.678, 1.856, 1.449, 1.08, 1.038, 0.727, 1.33, 0.96, 
    1.299, 0.851, 0.777, 1.465, NA, 1.121, NA, 1.304, 0.715, 
    1.549, 1.211, 0.764, 1.61, 1.373, 1.055, 1.054, 0.859, 0.987, 
    0.893, 1.187, 0.924, 0.989, 1.252, NA, 3.55, 1.074, 0.597, 
    0.811, 1.439, 1.006, 1.169, 0.699, 0.718, 4.058, 1.931, 1.045, 
    4.25, 1.931, 0.613, 1.159, 1.123, NA, 1.758, 1.517, 2.089, 
    5.654, 2.191, 3.724, 1.693, 0.964, 4.672, 1.276, 1.108, 1.363, 
    1.221, 0.344, 1.363, 1.397, NA, 1.813, 1.203, 1.093, 1.307, 
    0.623, NA, 1.244, NA, 0.822, 0.997, 1.595, 1.73, 2.223, 1.841, 
    1.58, 2.088, 1.032, 1.399, 1.788, 1.323, 1.157, 2.091, 1.868, 
    1.893, 1.98, NA, 0.429, 0.779, 0.714, 0.928, 0.971, 1.201, 
    0.539, 1.222, 1.186, 0.522, 1.037, 0.494, 0.671, 1.52, 0.946, 
    1.019, 1.54, 1.188, 1.133, 1.424, 0.894, 0.943, 4.713, 1.041, 
    1.476, 1.873, 1.515, 1.31, 1.118, 1.562, 1.464, NA, 1.069, 
    NA, 1.442, 1.798, NA, 2.134), pparg = c(2.201, 1.171, 1.33, 
    1.303, 0.991, 1.471, 1.795, 1.221, 2.188, 1.158, 1.532, 0.85, 
    2.393, 0.791, 0.773, 2.011, 1.445, 1.066, 0.392, 0.392, 1.361, 
    1.025, 1.471, 1.063, 0.89, 0.797, 0.981, 0.5, NA, 0.5, 0.682, 
    2.783, NA, 1.322, NA, 1.044, 0.893, 2.676, 0.714, 0.7, 1.662, 
    1.259, 1.135, 0.838, 1.075, 1.45, 1.311, 1.044, 1.223, 1.329, 
    0.932, NA, 3.831, 1.571, 0.879, 0.6, 0.82, 0.862, 1.545, 
    0.451, 0.663, 0.818, 1.539, 0.842, 3.584, 1.501, 0.453, 0.738, 
    1.091, NA, 1.12, 1.182, 2.528, 4.489, 2.161, NA, 1.006, 0.702, 
    3.117, 1.302, 0.857, 1.069, 0.96, 0.354, 0.902, 0.638, NA, 
    1.51, 1.011, 0.421, 1.445, 0.634, NA, 1.197, NA, 1.122, 1.168, 
    1.388, 0.789, 1.51, 1.334, 0.813, 1.718, 1.413, 1.242, 0.903, 
    1.134, 1.366, 1.451, 1.635, 3.1, 2.107, NA, 1.033, 0.719, 
    0.961, 1.203, 1.19, 1.094, 0.846, 1.724, 1.746, 0.169, 0.651, 
    0.4, 0.761, 1.662, 1.324, 0.645, 2.503, 0.555, 1.298, 1.852, 
    2.334, 1.608, 2.606, 1.2, 1.631, 1.262, 1.439, 0.689, 0.856, 
    0.93, 1.167, NA, 0.848, NA, 0.909, 2.218, NA, 2.414), nr1h3 = c(1.55, 
    1.451, 1.046, 1.294, 0.797, 1.602, 1.839, 1.406, 0.87, 1.436, 
    1.318, 1.028, 1.462, 1.919, 0.786, 1.597, 0.932, 0.936, 0.802, 
    1.184, 0.956, 1.712, 0.932, 1.023, 1.001, 1.002, 1.682, 0.735, 
    1.267, 1.052, 0.824, 1.334, 0.026, 1.345, NA, 1.468, 1.317, 
    1.581, 1.809, 0.845, 1.347, 1.769, 1.206, 1.584, 1.19, 1.016, 
    1.138, 1.066, 1.097, 1.137, 2.099, NA, NA, 0.999, 0.727, 
    0.689, 0.678, 0.825, 1.47, 0.612, 0.798, 1.093, 2.946, 0.999, 
    NA, 1.849, NA, 1.123, 1.617, NA, 1.446, 1.449, 1.716, NA, 
    3.037, 2.588, 1.344, 2.2, 4.011, NA, NA, NA, 1.176, 0.268, 
    1.26, 1.366, NA, 1.914, NA, NA, 1.041, 0.536, NA, 2.36, NA, 
    0.881, 1.357, 1.44, 1.713, 0.967, 1.728, 1.145, 1.854, 0.881, 
    1.77, 1.597, 1.181, 0.924, 1.38, 1.754, 2.612, 1.751, NA, 
    0.512, NA, 0.97, 1.604, 0.761, 0.738, 0.711, 1.132, 1.285, 
    0.451, 0.497, 0.538, 0.779, 0.941, 0.881, 0.763, 1.349, 1.693, 
    0.91, 1.128, 0.734, 0.833, 3.879, 0.943, 1.457, 1.879, 1.021, 
    1.016, 1.316, 1.012, 1.259, NA, 1.031, NA, 1.176, 1.588, 
    NA, 2.899), nr1h2 = c(0.861, 1.744, 0.824, 0.665, 0.913, 
    1.355, 1.927, 1.568, 1.18, 0.699, 0.952, 1.397, 2.622, 1.389, 
    0.874, 0.835, 1.304, 0.733, 0.953, 0.803, 0.921, 1.867, 1.56, 
    0.89, 0.907, 0.518, 1.162, 1.178, 2.069, 0.929, 0.994, 1.16, 
    0.044, 1.167, NA, 1.383, 1.089, 1.415, 1.571, 0.777, 1.754, 
    2.031, 0.885, 1.114, 1.388, 1.485, 0.941, 1.146, 0.831, 1.005, 
    0.957, NA, 3.479, 0.736, 0.613, 0.871, 0.984, 0.851, 1.376, 
    0.505, 0.632, 1.397, 2.122, 0.927, 8.337, 1.616, 1.063, 1.407, 
    1.04, NA, 1.993, 1.859, 1.679, 4.436, 1.825, 6.177, 1.51, 
    1.161, 6.476, 1.734, 0.901, 0.82, 1.325, 0.376, 0.89, 1.25, 
    NA, 1.447, 1.019, 1.333, 0.809, 0.545, NA, 1.036, NA, 0.887, 
    0.944, 1.192, 1.848, 1.974, 1.281, 1.992, 1.909, 0.774, 1.436, 
    1.656, 1.508, 1.796, 1.754, 2.372, 1.617, 1.851, NA, 0.575, 
    0.624, 0.683, 0.824, 0.87, 1.099, 0.621, 1.278, 1.486, 0.858, 
    0.715, 0.481, 0.636, 0.872, 0.953, 0.67, 1.356, 1.601, 0.844, 
    1.303, 0.486, 0.808, 5.391, 0.994, 1.037, 1.677, 1.6, 0.718, 
    0.846, 2.001, 1.246, NA, 1.159, NA, 1.485, 1.574, NA, 2.04
    ), rxra = c(1.402, 1.28, 1.076, 1.105, 1.4, 1.327, 1.722, 
    1.365, 1.151, 0.785, 1.047, 1.458, 1.578, 0.952, 0.982, 1.182, 
    1.156, 1.045, 0.835, 0.76, 0.772, 1.739, 1.392, 1.907, 0.95, 
    0.938, 1.166, 1.047, NA, 1.738, 1.378, 1.382, NA, 1.296, 
    NA, 0.89, 1.009, 1.267, 1.101, 1.29, 0.94, 1.41, 0.731, 1.464, 
    1.214, 0.964, 0.857, 0.876, 0.663, 0.83, 0.861, NA, 3.695, 
    0.735, 0.59, 0.675, 1.472, 1.343, 1.282, 0.73, 0.821, 2.176, 
    1.872, 0.954, 8.058, 1.496, 1.141, 1.615, 1.134, NA, 1.435, 
    1.847, 1.888, 5.043, 2.009, 2.9, 1.077, 1.204, 3.304, 1.454, 
    0.779, 1.442, 1.442, 0.444, 0.639, 1.312, NA, 1.267, 1.349, 
    1.472, 0.909, 0.973, NA, 1.519, NA, 0.885, 0.72, 1.428, 1.543, 
    1.539, 1.9, 2.659, 3.304, 1.217, 1.241, 1.875, 1.541, 1.449, 
    2.359, 2.131, 1.58, 2.217, NA, 0.689, 0.927, 1.052, 1.1, 
    0.75, 0.694, 0.463, 1.109, 1.671, 0.929, 0.768, 0.641, 1.082, 
    1.198, 1.434, 1.109, 1.303, 1.135, 0.986, 1.549, 1.061, 1.197, 
    4.063, 1.309, 1.345, 1.357, 1.996, 0.66, 1.188, 1.314, 1.458, 
    NA, 1.333, NA, 1.497, 1.772, NA, 1.609), rxrb = c(0.987, 
    0.938, 1.882, 1.506, 2.067, 1.526, 1.223, 1.192, 1.054, 0.854, 
    1.192, 2.21, 1.78, 0.895, 0.255, 0.78, 0.633, 0.924, 0.868, 
    0.702, 0.721, 1.814, 1.572, 1.714, 1.337, 0.884, 1.347, 1.239, 
    1.737, 2.171, 1.062, 1.48, 0.024, 0.645, NA, 1.207, 1.153, 
    0.922, 1.574, 0.887, 0.636, 1.64, 0.777, 1.241, 1.184, 0.813, 
    0.372, 1.036, 1.146, 0.842, 1.166, NA, 4.097, 1.564, 0.689, 
    1.183, 1.281, 0.761, 1.563, 0.818, 0.873, 2.011, 1.277, 1.357, 
    4.742, 1.925, 1.128, 1.26, 1.239, NA, 1.461, 1.723, 1.514, 
    6.87, 1.437, 0.09, 1.99, 1.385, 0.069, 1.341, 2.724, 1.618, 
    1.252, 0.449, 2.379, 1.635, NA, 1.555, 1.71, 0.952, 1.409, 
    4.682, NA, 1.596, NA, 1.265, 0.97, 1.461, 2.087, 1.55, 1.762, 
    1.461, 1.574, 2.781, 1.1, 1.741, 1.257, 1.468, 2.02, 1.91, 
    2.047, 1.462, NA, 0.6, 0.75, 1.177, 1.023, 0.648, 1.029, 
    0.802, 0.622, 0.906, 0.632, 1.259, 1.054, 1.213, 1.135, 1.253, 
    1.481, 1.517, 1.918, 2.099, 1.541, 1.003, 0.733, 0.122, 1.748, 
    1.264, 1.871, 2.006, 1.552, 1.448, 1.116, 1.379, NA, 1.387, 
    NA, 1.436, 2.086, NA, 1.173), cyp27a1 = c(1.058, 1.079, 1.152, 
    0.743, 1.803, 1.157, 2.25, 1.193, 1.448, 0.649, 1.718, 0.972, 
    1.393, 0.908, 1.3, 1.389, 1.16, 0.666, 0.799, 0.665, 0.991, 
    1.535, 1.257, 1, 1.8, 0.882, 1.087, 1.298, NA, 1.37, 0.765, 
    1.807, NA, 1.104, NA, 0.967, 1.009, 1.03, 1.139, 0.666, 1.462, 
    1.532, 0.767, 1.24, 0.997, 1.005, 0.736, 1.122, 0.868, 0.878, 
    1.129, NA, NA, 1.011, 0.425, 0.615, 0.919, 0.848, 1.178, 
    0.66, 0.714, 6.647, 2.254, 0.804, NA, 1.831, 1.008, 1.421, 
    1.215, NA, 1.92, 1.688, 1.734, NA, 1.571, 3.858, 1.146, 1.128, 
    4.592, 1.865, 1.419, 1.056, 1.201, 0.324, 1.159, 1.099, NA, 
    1.693, 1.503, 1.175, 1.596, 0.836, NA, 1.532, NA, 1.097, 
    0.913, 1.837, 1.334, 2.072, 2.149, 2.595, 2.654, 1.237, 1.28, 
    2.118, 1.153, 1.616, 2.185, 2.29, 3.089, 1.429, NA, 0.981, 
    0.546, 0.997, 0.807, 0.666, 1.393, 0.574, 0.665, 2.131, 0.348, 
    1.269, 0.271, 0.674, 0.81, 0.908, 0.877, 1.144, 1.469, 1, 
    1.309, 0.841, 0.785, 5.332, 1.502, 1.248, 1.705, 2.165, 1.046, 
    1.227, 1.444, 1.114, NA, 1.222, NA, 1.49, 1.368, NA, 3.854
    )), row.names = c(NA, -151L), class = "data.frame")

Solution

  • The issue is that scale returns a matrix not a vector, i.e. when applied on a vector the result is a one-column matrix and that's what the [,1] is telling you.

    This could be seen when calling str():

    Note: I simplified the example data to the first four columns.

    library(dplyr)
    
    df <- df |> select(1:4)
    
    df %>% 
      dplyr::mutate(across(.cols = c(ppara:ppard), .fns = scale)) |> 
      str()
    #> 'data.frame':    151 obs. of  4 variables:
    #>  $ id      : num  50109018 50109019 50109025 50109026 50109027 ...
    #>  $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
    #>   ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
    #>  $ ppara   : num [1:151, 1] 2.8822 -0.0909 -0.5192 NA NA ...
    #>   ..- attr(*, "scaled:center")= num 1.35
    #>   ..- attr(*, "scaled:scale")= num 0.836
    #>  $ ppard   : num [1:151, 1] 0.214 -0.376 -0.152 -0.121 -0.147 ...
    #>   ..- attr(*, "scaled:center")= num 1.4
    #>   ..- attr(*, "scaled:scale")= num 0.812
    

    From the output we see that the scaled columns are numeric matrices of dimension 151 x 1.

    To prevent that you have to explicitly convert to a vector by picking the column via .[,1] or using as.numeric:

    df %>% 
      dplyr::mutate(across(.cols = c(ppara:ppard), .fns = ~ scale(.x)[,1])) |> 
      str()
    #> 'data.frame':    151 obs. of  4 variables:
    #>  $ id      : num  50109018 50109019 50109025 50109026 50109027 ...
    #>  $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
    #>   ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
    #>  $ ppara   : num  2.8822 -0.0909 -0.5192 NA NA ...
    #>  $ ppard   : num  0.214 -0.376 -0.152 -0.121 -0.147 ...
    
    df %>% 
      dplyr::mutate(across(.cols = c(ppara:ppard), .fns = ~ as.numeric(scale(.x)))) |> 
      str()
    #> 'data.frame':    151 obs. of  4 variables:
    #>  $ id      : num  50109018 50109019 50109025 50109026 50109027 ...
    #>  $ grup_int: Factor w/ 3 levels "OO","NUTS","LFD": 3 3 3 2 3 1 1 3 3 3 ...
    #>   ..- attr(*, "label")= chr "GENERAL: Grupo de intervención"
    #>  $ ppara   : num  2.8822 -0.0909 -0.5192 NA NA ...
    #>  $ ppard   : num  0.214 -0.376 -0.152 -0.121 -0.147 ...