Okay, I know this is likely an easy answer. I've just been staring at the code too long at this point. Either way, I've visited Grouping data into ranges in R, R Data Grouping, grouping character variables in R,R - Grouping of Data, grouping data in R, and multiple others. I apologize in advance is this is a duplicate, but I have yet to duplicate.
I have data from over 20 viral families. I've coded it into binary, so that each row has a unique identifier and has a 0 or 1 in each viral family column. I am trying to code the Site data into numeric groups.
Data (sample size)
date Site geometry
<chr> <chr> <S3: sfc_POINT>
8.20.13 NDUFR <S3: sfc_POINT>
8.27.13 UMNP-HQ <S3: sfc_POINT>
8.28.13 UMNP-campsite3 <S3: sfc_POINT>
8.28.13 UMNP-campsite3 <S3: sfc_POINT>
8.28.13 UMNP-hondohondoa <S3: sfc_POINT>
8.28.13 UMNP-hondohondob <S3: sfc_POINT>
8.29.13 UMNP-njokamoni <S3: sfc_POINT>
8.29.13 UMNP-mangabey <S3: sfc_POINT>
8.30.13 UMNP-hondohondoc <S3: sfc_POINT>
8.30.13 UMNP-hondohondod <S3: sfc_POINT>
8.30.13 UMNP-hondohondoe <S3: sfc_POINT>
8.31.13 UMNP-HQ <S3: sfc_POINT>
8.31.13 MamaGoti <S3: sfc_POINT>
9.1.13 UMNP-Sanje1 <S3: sfc_POINT>
9.1.13 UMNP-Sanje2 <S3: sfc_POINT>
9.1.13 UMNP-Sanje3 <S3: sfc_POINT>
9.2.13 Magombera1 <S3: sfc_POINT>
9.2.13 Magombera2 <S3: sfc_POINT>
9.3.13 Sonjo <S3: sfc_POINT>
9.3.13 SonjoRoad <S3: sfc_POINT>
I want to code NDUFR into 1, UMNP to 2, UMNP-campsite3 to 3, and then group UMNP-hondohondoa, UMNP-hondohondob, UMNP-hondohondoc, UMNP-hondohondod and UMNP-hondohondoe to 4 MamaGoti to 5 UMNP-Sanje1, UMNP-Sanje2 and UMNP-Sanje3 to 6 Magombera1 and Magombera2 to 7 Sonjo and SonjoRoad to 8
I know this is likely relatively easy, but so brain dead. I've tried many dplyr and base r suggestions.
Any help is greatly appreciated.
Dput
structure(list(date = c("8.20.13", "8.27.13", "8.28.13", "8.28.13",
"8.28.13", "8.28.13", "8.29.13", "8.29.13", "8.30.13", "8.30.13",
"8.30.13", "8.31.13", "8.31.13", "9.1.13", "9.1.13", "9.1.13",
"9.2.13", "9.2.13", "9.3.13", "9.3.13"), forestsite = c("NDUFR",
"UMNP-HQ", "UMNP-campsite3", "UMNP-campsite3", "UMNP-hondohondoa",
"UMNP-hondohondob", "UMNP-njokamoni", "UMNP-mangabey", "UMNP-hondohondoc",
"UMNP-hondohondod", "UMNP-hondohondoe", "UMNP-HQ", "MamaGoti",
"UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3", "Magombera1", "Magombera2",
"Sonjo", "SonjoRoad"), geometry = structure(list(structure(c(35.908246,
-8.0475655), class = c("XY", "POINT", "sfg")), structure(c(36.883988,
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.884545,
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.884545,
-7.849439), class = c("XY", "POINT", "sfg")), structure(c(36.887065,
-7.833501), class = c("XY", "POINT", "sfg")), structure(c(36.891369,
-7.832091), class = c("XY", "POINT", "sfg")), structure(c(36.878388,
-7.82738), class = c("XY", "POINT", "sfg")), structure(c(36.87753,
-7.8232), class = c("XY", "POINT", "sfg")), structure(c(36.89072,
-7.829972), class = c("XY", "POINT", "sfg")), structure(c(36.890019,
-7.827783), class = c("XY", "POINT", "sfg")), structure(c(36.887414,
-7.825414), class = c("XY", "POINT", "sfg")), structure(c(36.883988,
-7.844929), class = c("XY", "POINT", "sfg")), structure(c(36.886217,
-7.844622), class = c("XY", "POINT", "sfg")), structure(c(36.904182,
-7.783986), class = c("XY", "POINT", "sfg")), structure(c(36.903943,
-7.783), class = c("XY", "POINT", "sfg")), structure(c(36.902821,
-7.77507), class = c("XY", "POINT", "sfg")), structure(c(36.980875,
-7.832182), class = c("XY", "POINT", "sfg")), structure(c(36.960576,
-7.815916), class = c("XY", "POINT", "sfg")), structure(c(36.896019,
-7.808054), class = c("XY", "POINT", "sfg")), structure(c(36.895821,
-7.81365), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 35.908246, ymin = -8.0475655,
xmax = 36.980875, ymax = -7.77507), class = "bbox"), crs = structure(list(
input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n ENSEMBLE[\"World Geodetic System 1984 ensemble\",\n MEMBER[\"World Geodetic System 1984 (Transit)\"],\n MEMBER[\"World Geodetic System 1984 (G730)\"],\n MEMBER[\"World Geodetic System 1984 (G873)\"],\n MEMBER[\"World Geodetic System 1984 (G1150)\"],\n MEMBER[\"World Geodetic System 1984 (G1674)\"],\n MEMBER[\"World Geodetic System 1984 (G1762)\"],\n MEMBER[\"World Geodetic System 1984 (G2139)\"],\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]],\n ENSEMBLEACCURACY[2.0]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"geodetic latitude (Lat)\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"geodetic longitude (Lon)\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n USAGE[\n SCOPE[\"Horizontal component of 3D system.\"],\n AREA[\"World.\"],\n BBOX[-90,-180,90,180]],\n ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA,
-20L), sf_column = "geometry", agr = structure(c(date = NA_integer_,
forestsite = NA_integer_), .Label = c("constant", "aggregate",
"identity"), class = "factor"), class = c("sf", "tbl_df", "tbl",
"data.frame"))
You can use case_when
for this. Note that the conditions have to go from the most specific to the least within case_when
.
library(tidyverse)
df %>% mutate(code = case_when(forestsite == "NDUFR" ~ 1,
forestsite == "UMNP-campsite3" ~ 3,
forestsite %in% c("UMNP-hondohondoa", "UMNP-hondohondob", "UMNP-hondohondoc", "UMNP-hondohondod", "UMNP-hondohondoe") ~ 4,
forestsite == "MamaGoti" ~ 5,
forestsite %in% c("UMNP-Sanje1", "UMNP-Sanje2", "UMNP-Sanje3") ~ 6,
forestsite %in% c("Magombera1", "Magombera2") ~ 7,
forestsite %in% c("Sonjo", "SonjoRoad") ~ 8,
grepl("UMNP", forestsite) ~ 2,
TRUE ~ NA_real_))
# A tibble: 20 × 4
date forestsite geometry code
* <chr> <chr> <POINT [°]> <dbl>
1 8.20.13 NDUFR (35.90825 -8.047565) 1
2 8.27.13 UMNP-HQ (36.88399 -7.844929) 2
3 8.28.13 UMNP-campsite3 (36.88455 -7.849439) 3
4 8.28.13 UMNP-campsite3 (36.88455 -7.849439) 3
5 8.28.13 UMNP-hondohondoa (36.88706 -7.833501) 4
6 8.28.13 UMNP-hondohondob (36.89137 -7.832091) 4
7 8.29.13 UMNP-njokamoni (36.87839 -7.82738) 2
8 8.29.13 UMNP-mangabey (36.87753 -7.8232) 2
9 8.30.13 UMNP-hondohondoc (36.89072 -7.829972) 4
10 8.30.13 UMNP-hondohondod (36.89002 -7.827783) 4
11 8.30.13 UMNP-hondohondoe (36.88741 -7.825414) 4
12 8.31.13 UMNP-HQ (36.88399 -7.844929) 2
13 8.31.13 MamaGoti (36.88622 -7.844622) 5
14 9.1.13 UMNP-Sanje1 (36.90418 -7.783986) 6
15 9.1.13 UMNP-Sanje2 (36.90394 -7.783) 6
16 9.1.13 UMNP-Sanje3 (36.90282 -7.77507) 6
17 9.2.13 Magombera1 (36.98087 -7.832182) 7
18 9.2.13 Magombera2 (36.96058 -7.815916) 7
19 9.3.13 Sonjo (36.89602 -7.808054) 8
20 9.3.13 SonjoRoad (36.89582 -7.81365) 8