Search code examples
rtidyversestringr

Extract specific values of string and make new column in R


Im want to extract the numbers and add some - (minus) for new variables. Are 1800 rows like that.

test-> structure(list(GNUMBER = c("G12879D", "G13004A", "G51770", "G51771", 
"G51772", "G10017", "G12862", "G19898A", "G51361", "G51664B", 
"G51665", "G51665A", "G51666", "G51663", "G51663A", "G51664", 
"G51664A", "G51653C", "G51652B"), LATITUD = c("18.35N", NA, "20.62N", 
"20.62N", "20.62N", "19.00N", "19.78N", "27.34S", "27.39S", "9.90N", 
"9.90N", "9.90N", "9.90N", "9.90N", "9.90N", "9.90N", "9.90N", 
"9.90N", "9.90N"), LONGITUD = c("99.99W", NA, "101.72W", "101.72W", 
"101.72W", "99.11W", "103.11W", "65.96W", "65.98W", "83.93W", 
"83.93W", "83.93W", "83.93W", "83.93W", "83.93W", "83.93W", "83.93W", 
"83.93W", "83.93W")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -19L))


library(tidyverse)
library(stringr)

regexp <- "[^1:9]+$"

test %>%
  mutate(LATITUD_NEW = str_extract(test$LATITUD, regexp))  

enter image description here

Im want this. Add (-) when the column have a S and W, but no N. Because are the coordinates and need make a map.

enter image description here


Solution

  • It is easier with parse_number as it converts to numeric as well. For the condition, use case_when with str_detect

    library(dplyr)
    library(stringr)
    test %>% 
      mutate(across(LATITUD:LONGITUD, 
       ~ case_when(str_detect(.x, 'S|W')~ -1 * readr::parse_number(.x),
         TRUE ~ readr::parse_number(.x)), .names = "{.col}_NEW"))
    

    -output

    # A tibble: 19 × 5
       GNUMBER LATITUD LONGITUD LATITUD_NEW LONGITUD_NEW
       <chr>   <chr>   <chr>          <dbl>        <dbl>
     1 G12879D 18.35N  99.99W          18.4       -100. 
     2 G13004A <NA>    <NA>            NA           NA  
     3 G51770  20.62N  101.72W         20.6       -102. 
     4 G51771  20.62N  101.72W         20.6       -102. 
     5 G51772  20.62N  101.72W         20.6       -102. 
     6 G10017  19.00N  99.11W          19          -99.1
     7 G12862  19.78N  103.11W         19.8       -103. 
     8 G19898A 27.34S  65.96W         -27.3        -66.0
     9 G51361  27.39S  65.98W         -27.4        -66.0
    10 G51664B 9.90N   83.93W           9.9        -83.9
    11 G51665  9.90N   83.93W           9.9        -83.9
    12 G51665A 9.90N   83.93W           9.9        -83.9
    13 G51666  9.90N   83.93W           9.9        -83.9
    14 G51663  9.90N   83.93W           9.9        -83.9
    15 G51663A 9.90N   83.93W           9.9        -83.9
    16 G51664  9.90N   83.93W           9.9        -83.9
    17 G51664A 9.90N   83.93W           9.9        -83.9
    18 G51653C 9.90N   83.93W           9.9        -83.9
    19 G51652B 9.90N   83.93W           9.9        -83.9
    

    Or a slighly compact option

    test %>% 
      mutate(across(LATITUD:LONGITUD, 
        ~ c(1, -1)[1 + str_detect(.x, "S|W")] * readr::parse_number(.x), 
          .names = "{.col}_NEW"))
    

    -output

    # A tibble: 19 × 5
       GNUMBER LATITUD LONGITUD LATITUD_NEW LONGITUD_NEW
       <chr>   <chr>   <chr>          <dbl>        <dbl>
     1 G12879D 18.35N  99.99W          18.4       -100. 
     2 G13004A <NA>    <NA>            NA           NA  
     3 G51770  20.62N  101.72W         20.6       -102. 
     4 G51771  20.62N  101.72W         20.6       -102. 
     5 G51772  20.62N  101.72W         20.6       -102. 
     6 G10017  19.00N  99.11W          19          -99.1
     7 G12862  19.78N  103.11W         19.8       -103. 
     8 G19898A 27.34S  65.96W         -27.3        -66.0
     9 G51361  27.39S  65.98W         -27.4        -66.0
    10 G51664B 9.90N   83.93W           9.9        -83.9
    11 G51665  9.90N   83.93W           9.9        -83.9
    12 G51665A 9.90N   83.93W           9.9        -83.9
    13 G51666  9.90N   83.93W           9.9        -83.9
    14 G51663  9.90N   83.93W           9.9        -83.9
    15 G51663A 9.90N   83.93W           9.9        -83.9
    16 G51664  9.90N   83.93W           9.9        -83.9
    17 G51664A 9.90N   83.93W           9.9        -83.9
    18 G51653C 9.90N   83.93W           9.9        -83.9
    19 G51652B 9.90N   83.93W           9.9        -83.9