I would like observe which value from the vector file "Spec" are absent in df$Species
with this function :
Spec[!Spec %in% df$Species]
The function works properly on a dataset. However, I would like to apply this function on the different groups of the dataset with df %>% group_split(`East-West`, Transect, Station)
from the package dyplr
. But I do not know how to apply the the %in%
formula for a list ( I suppose it is possible with map()
or lapply()
, but I do not succeed to implement it).
Here is the sample for df:
df <- structure(list(`East-West` = c("E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E"), Transect = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "6",
"7"), class = "factor"), Station = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 16L, 16L), .Label = c("1", "10", "11", "12", "14", "16",
"17", "18", "2", "20", "22", "23", "24", "3", "4", "5", "7",
"8", "9"), class = "factor"), Species = c("Calanus finmarchicus",
"Calanus glacialis", "Calanus hyperboreus", "Calanus spp.", "Copepoda",
"Metridia longa", "Microcalanus pusillus", "Microcalanus pygmaeus",
"Microcalanus spp.", "Microsetella norvegica", "Oithona similis",
"Oithona spp.", "Oncaea borealis", "Pareuchaeta glacialis", "Pseudocalanus spp.",
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus",
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus",
"Microcalanus spp.", "Microsetella norvegica", "Oithona similis",
"Oithona spp.", "Oncaea borealis", "Pseudocalanus spp.", "Calanus finmarchicus",
"Calanus glacialis", "Calanus hyperboreus", "Calanus spp.", "Copepoda",
"Metridia longa", "Microcalanus pusillus", "Microcalanus spp.",
"Microsetella norvegica", "Oithona similis", "Oithona spp.",
"Oncaea borealis", "Pseudocalanus spp.", "Calanoida indet.",
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus",
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus",
"Microcalanus pygmaeus", "Microcalanus spp.", "Oithona similis",
"Oithona spp.", "Oncaea borealis", "Pareuchaeta glacialis", "Pseudocalanus spp.",
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus",
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus",
"Microcalanus pygmaeus", "Microcalanus spp.", "Microsetella norvegica",
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis",
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.",
"Calanoida indet.", "Calanus finmarchicus", "Calanus glacialis",
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa",
"Microcalanus pygmaeus", "Microcalanus spp.", "Microsetella norvegica",
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis",
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus minutus",
"Pseudocalanus spp.", "Calanus finmarchicus", "Calanus glacialis",
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa",
"Microcalanus pusillus", "Microcalanus spp.", "Microsetella norvegica",
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis",
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.",
"Calanoida indet.", "Calanus finmarchicus", "Calanus glacialis",
"Calanus hyperboreus", "Calanus spp.", "Copepoda", "Metridia longa",
"Microcalanus pusillus", "Microcalanus pygmaeus", "Microcalanus spp.",
"Oithona atlantica", "Oithona similis", "Oithona spp.", "Oncaea borealis",
"Pareuchaeta glacialis", "Pareuchaeta spp.", "Pseudocalanus spp.",
"Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus",
"Calanus spp.", "Copepoda", "Metridia longa", "Microcalanus pusillus",
"Microcalanus pygmaeus", "Microcalanus spp.", "Oithona similis",
"Oithona spp.", "Oncaea borealis", "Pseudocalanus minutus", "Pseudocalanus spp.",
"Calanus finmarchicus", "Calanus glacialis"), frequency = c(0.00341821910784481,
0.00170910955392241, 0.0153819859853017, 0.185865663989062, 0.0358913006323705,
0.0109383011451034, 0.0141001538198599, 0.00273457528627585,
0.0769099299265083, 0.00683643821568963, 0.421295505041873, 0.151256195522133,
0.0598188343872842, 0.000170910955392241, 0.0136728764313793,
0.000794791061657445, 0.00122275547947299, 0.00550239965762847,
0.0516614190077339, 0.019197261027726, 0.00122275547947299, 0.00733653287683795,
0.0482377036652095, 0.00122275547947299, 0.102711460275731, 0.096842233974261,
0.0215204964387247, 0.0128389325344664, 0.00794791061657445,
0.00122275547947299, 0.0103934215755204, 0.10897808210803, 0.00843701280836365,
6.11377739736496e-05, 0.0168740256167273, 0.0753217375355363,
0.00183413321920949, 0.0755968575184177, 0.0646226270901476,
0.230642252315593, 0.0277565493840369, 0.0247582205029014, 0.0154738878143133,
0.00125725338491296, 0.00967117988394584, 0.151257253384913,
0.0431334622823984, 0.000967117988394584, 0.106769825918762,
0.0201160541586074, 0.0889748549323017, 0.15348162475822, 0.179303675048356,
0.186847195357834, 0.000193423597678917, 0.0177949709864603,
0.00635862653666808, 0.0026494277236117, 0.00423908435777872,
0.0258584145824502, 0.0491733785502332, 0.0105977108944468, 0.0292496820686732,
0.0426557863501484, 0.233997456549385, 0.00715345485375159, 0.00105977108944468,
0.0475307333615939, 0.0320050869012293, 0.47769181856719, 0.000105977108944468,
0.00211954217888936, 0.0275540483255617, 0.000106820488169631,
0.0144207659029002, 0.00117502536986594, 0.0027773326924104,
0.0384553757410671, 0.0246755327671847, 0.00769107514821343,
0.117288896010255, 0.103028360839609, 0.00320461464508893, 0.00106820488169631,
0.0616888319179619, 0.0893019281098115, 0.511029215403514, 0.000907974149441863,
0.00160230732254446, 0.00598194733749933, 0.0155957912727661,
0.00940733772342427, 0.00564440263405456, 0.00752587017873942,
0.026340545625588, 0.0914393226716839, 0.0244590780809031, 0.051928504233302,
0.190968955785513, 0.0112888052681091, 0.00188146754468485, 0.0658513640639699,
0.151458137347131, 0.335841956726246, 0.000752587017873942, 0.000752587017873942,
0.0244590780809031, 0.0569502556950256, 0.00929800092980009,
0.00232450023245002, 0.00348675034867503, 0.0232450023245002,
0.0854253835425383, 0.0168526266852627, 0.0122036262203626, 0.049395629939563,
0.337633658763366, 0.00116225011622501, 0.097629009762901, 0.134239888423989,
0.162715016271502, 0.000464900046490005, 0.00116225011622501,
0.00581125058112506, 0.0285089803288036, 0.00475149672146726,
0.00950299344293453, 0.0883778390192911, 0.246602679844151, 0.00380119737717381,
0.039912572460325, 0.0585384396084767, 0.244892141024423, 0.121258196331845,
0.0244226931483417, 0.117076879216953, 0.00665209541005417, 0.00570179606576071,
0.0053156146179402, 0.00132890365448505)), row.names = c(NA,
-140L), class = c("tbl_df", "tbl", "data.frame"))
And Spec:
Spec <- c("Acartia spp.", "Acartia longiremis", "Calanus spp.", "Calanus finmarchicus",
"Calanus glacialis", "Calanus hyperboreus", "Microcalanus spp.",
"Microcalanus pygmaeus", "Microcalanus pusillus", "Metridia longa",
"Oncaea borealis", "Oithona similis", "Oithona atlantica", "Oithona spp.",
"Pseudocalanus spp.", "Pareuchaeta spp.", "Pareuchaeta glacialis",
"Calanoida indet.", "Copepoda", "Microsetella norvegica", "Pseudocalanus minutus",
"Pseudocalanus acuspes", "Bradyidius similis", "Centropages spp.",
"Pareuchaeta norvegica")
Edit: I do not want to remove anything from df, I want to extract the values given in the vector "Spec" which are not present in df$Species, and that for different groups. With that I could generated a list of the non-present values for each groups.
You may use setdiff
in by
. Notice that by
is basically split
+ lapply
.
by(df$Species, Reduce(paste, df[1:3]), setdiff, x=Spec)
Or maybe this gives nicer group names:
df |>
transform(g=Reduce(paste, df[1:3])) |>
with(by(Species, g, setdiff, x=Spec))
# g: E 1 1
# [1] "Acartia spp." "Acartia longiremis" "Oithona atlantica"
# [4] "Pareuchaeta spp." "Calanoida indet." "Pseudocalanus minutus"
# [7] "Pseudocalanus acuspes" "Bradyidius similis" "Centropages spp."
# [10] "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 1 2
# [1] "Acartia spp." "Acartia longiremis" "Microcalanus pygmaeus"
# [4] "Oithona atlantica" "Pareuchaeta spp." "Pareuchaeta glacialis"
# [7] "Calanoida indet." "Pseudocalanus minutus" "Pseudocalanus acuspes"
# [10] "Bradyidius similis" "Centropages spp." "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 1 4
# [1] "Acartia spp." "Acartia longiremis"
# [3] "Oithona atlantica" "Pareuchaeta spp."
# [5] "Microsetella norvegica" "Pseudocalanus minutus"
# [7] "Pseudocalanus acuspes" "Bradyidius similis"
# [9] "Centropages spp." "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 1 5
# [1] "Acartia spp." "Acartia longiremis" "Calanoida indet."
# [4] "Pseudocalanus minutus" "Pseudocalanus acuspes" "Bradyidius similis"
# [7] "Centropages spp." "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 2 1
# [1] "Acartia spp." "Acartia longiremis" "Microcalanus pusillus"
# [4] "Pseudocalanus acuspes" "Bradyidius similis" "Centropages spp."
# [7] "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 2 11
# [1] "Acartia spp." "Acartia longiremis" "Microcalanus pygmaeus"
# [4] "Calanoida indet." "Pseudocalanus minutus" "Pseudocalanus acuspes"
# [7] "Bradyidius similis" "Centropages spp." "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 2 12
# [1] "Acartia spp." "Acartia longiremis" "Microsetella norvegica"
# [4] "Pseudocalanus minutus" "Pseudocalanus acuspes" "Bradyidius similis"
# [7] "Centropages spp." "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 2 4
# [1] "Acartia spp." "Acartia longiremis"
# [3] "Oithona atlantica" "Pareuchaeta spp."
# [5] "Pareuchaeta glacialis" "Calanoida indet."
# [7] "Microsetella norvegica" "Pseudocalanus acuspes"
# [9] "Bradyidius similis" "Centropages spp."
# [11] "Pareuchaeta norvegica"
# ----------------------------------------------------------
# g: E 2 5
# [1] "Acartia spp." "Acartia longiremis"
# [3] "Calanus spp." "Calanus hyperboreus"
# [5] "Microcalanus spp." "Microcalanus pygmaeus"
# [7] "Microcalanus pusillus" "Metridia longa"
# [9] "Oncaea borealis" "Oithona similis"
# [11] "Oithona atlantica" "Oithona spp."
# [13] "Pseudocalanus spp." "Pareuchaeta spp."
# [15] "Pareuchaeta glacialis" "Calanoida indet."
# [17] "Copepoda" "Microsetella norvegica"
# [19] "Pseudocalanus minutus" "Pseudocalanus acuspes"
# [21] "Bradyidius similis" "Centropages spp."
# [23] "Pareuchaeta norvegica"
You may also try paste0
instead of paste
which avoids whitespace within the group names. If you depend on %in%
you could also do sth like by(df, Reduce(paste, df[1:3]), \(x) Spec[!Spec %in% x$Species])
, but setdiff
is more concise.